forked from apache/polaris
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Check to verify that "copied files" are mentioned in
LICENSE
To ensure that files that are known to be mentioned in `LICENSE` are really mentioned in that file, the Gradle plugin introduced in this change verifies this. The "magic word" `CODE_COPIED_TO_POLARIS` must be present in such files. The presence of the "magic word" triggers a validation that the path of the containing file, relative to the project root directory, must be mentioned in `LICENSE`, prefixed with `* `. The plugin checks all source directories in projects that have any Java plugin applied. For other projects, the plugin's extension provides a mechanism to add directory sets similar to how `SourceDirectorySet` works, which is used for the root project. The plugin must be applied on the root project, it adds itself to all other projects. The introduced `checkForCopiedCode` task is added to the `check` task as a dependency. Files that contain "copied code" need to have the word `CODE_COPIED_TO_POLARIS` anywhere. Related to apache#903
- Loading branch information
Showing
5 changed files
with
405 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 104 additions & 0 deletions
104
build-logic/src/main/kotlin/copiedcode/CopiedCodeCheckerExtension.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package copiedcode | ||
|
||
import org.gradle.api.Project | ||
import org.gradle.api.file.RegularFileProperty | ||
import org.gradle.api.file.SourceDirectorySet | ||
import org.gradle.api.provider.Property | ||
import org.gradle.api.provider.SetProperty | ||
|
||
abstract class CopiedCodeCheckerExtension(private val project: Project) { | ||
/** | ||
* Per-project set of additional directories to scan. | ||
* | ||
* This property is _not_ propagated to subprojects. | ||
*/ | ||
val scanDirectories = | ||
project.objects.domainObjectContainer( | ||
SourceDirectorySet::class.java, | ||
{ name -> project.objects.sourceDirectorySet(name, name) }, | ||
) | ||
|
||
/** | ||
* By default, this plugin scans all files. The content types that match the regular expression of | ||
* this property are excluded, unless a content-type matches one of the regular expressions in | ||
* [CopiedCodeCheckerExtension.includedContentTypePatterns]. | ||
* | ||
* See [CopiedCodeCheckerExtension.addDefaultContentTypes], | ||
* [CopiedCodeCheckerExtension.includedContentTypePatterns], | ||
* [CopiedCodeCheckerExtension.includeUnrecognizedContentType], | ||
* [CopiedCodeCheckerExtension.includedContentTypePatterns]. | ||
*/ | ||
abstract val excludedContentTypePatterns: SetProperty<String> | ||
/** | ||
* By default, this plugin scans all files. The content types that match the regular expression of | ||
* the [copiedcode.CopiedCodeCheckerExtension.excludedContentTypePatterns] property are excluded, | ||
* unless a content-type matches one of the regular expressions in this property. | ||
* | ||
* See [CopiedCodeCheckerExtension.addDefaultContentTypes], | ||
* [CopiedCodeCheckerExtension.excludedContentTypePatterns], | ||
* [CopiedCodeCheckerExtension.includeUnrecognizedContentType], | ||
* [CopiedCodeCheckerExtension.includedContentTypePatterns]. | ||
*/ | ||
abstract val includedContentTypePatterns: SetProperty<String> | ||
|
||
/** | ||
* If a content-type could not be detected, this property, which defaults to `true`, is consulted. | ||
* | ||
* See [CopiedCodeCheckerPlugin] for details. | ||
*/ | ||
abstract val includeUnrecognizedContentType: Property<Boolean> | ||
|
||
/** | ||
* The magic "word", if present in a file, meaning "this file has been copied". | ||
* | ||
* A file is considered as "copied" must contain this magic word. "Word" means that the value must | ||
* be surrounded by regular expression word boundaries (`\b`). | ||
*/ | ||
abstract val magicWord: Property<String> | ||
|
||
/** | ||
* License file to check, configured on the root project. See [CopiedCodeCheckerPlugin] for | ||
* details. | ||
*/ | ||
abstract val licenseFile: RegularFileProperty | ||
|
||
/** Recommended to use, adds known and used binary content types. */ | ||
fun addDefaultContentTypes(): CopiedCodeCheckerExtension { | ||
// Exclude all images | ||
excludedContentTypePatterns.add("image/.*") | ||
// But include images built in XML (e.g. image/svg+xml) | ||
includedContentTypePatterns.add("\\+xml") | ||
|
||
return this | ||
} | ||
|
||
init { | ||
includeUnrecognizedContentType.convention(true) | ||
magicWord.convention(DEFAULT_MAGIC_WORD) | ||
} | ||
|
||
companion object { | ||
// String manipulation is intentional - otherwise this source file would be considered as | ||
// "copied". | ||
val DEFAULT_MAGIC_WORD = "_CODE_COPIED_TO_POLARIS".substring(1) | ||
} | ||
} |
263 changes: 263 additions & 0 deletions
263
build-logic/src/main/kotlin/copiedcode/CopiedCodeCheckerPlugin.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,263 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package copiedcode | ||
|
||
import java.nio.file.Files | ||
import java.util.regex.Pattern | ||
import javax.inject.Inject | ||
import org.gradle.api.DefaultTask | ||
import org.gradle.api.GradleException | ||
import org.gradle.api.Plugin | ||
import org.gradle.api.Project | ||
import org.gradle.api.component.SoftwareComponentFactory | ||
import org.gradle.api.file.SourceDirectorySet | ||
import org.gradle.api.tasks.SourceSetContainer | ||
import org.gradle.api.tasks.TaskAction | ||
import org.gradle.kotlin.dsl.provideDelegate | ||
import org.gradle.work.DisableCachingByDefault | ||
|
||
/** | ||
* This plugin identifies files that have been originally copied from another project. | ||
* | ||
* Configuration is done using the [CopiedCodeCheckerExtension], available under the name | ||
* `copiedCodeChecks`. | ||
* | ||
* Such files need to contain a magic word, see [CopiedCodeCheckerExtension.magicWord]. | ||
* | ||
* This plugin scans all source directories configured in the project's [SourceDirectorySet]. Files | ||
* in the project's build directory are always excluded. | ||
* | ||
* By default, this plugin scans all files. There is a convenience function to exclude known binary | ||
* types, see [CopiedCodeCheckerExtension.addDefaultContentTypes]. The | ||
* [CopiedCodeCheckerExtension.excludedContentTypePatterns] is checked first against a detected | ||
* content type. If a content-type's excluded, the | ||
* [CopiedCodeCheckerExtension.includedContentTypePatterns] is consulted. If a content-type could | ||
* not be detected, the property [CopiedCodeCheckerExtension.includeUnrecognizedContentType], which | ||
* defaults to `true`, is consulted. | ||
* | ||
* Each Gradle project has its own instance of the [CopiedCodeCheckerExtension], the extension of | ||
* the root project serves default values, except for [CopiedCodeCheckerExtension.scanDirectories]] | ||
* | ||
* The license file to check is configured via [CopiedCodeCheckerExtension.licenseFile]. Files must | ||
* be mentioned using the relative path from the root directory, with a trailing `* ` (star + | ||
* space). | ||
*/ | ||
@Suppress("unused") | ||
class CopiedCodeCheckerPlugin | ||
@Inject | ||
constructor(private val softwareComponentFactory: SoftwareComponentFactory) : Plugin<Project> { | ||
override fun apply(project: Project): Unit = | ||
project.run { | ||
val extension = | ||
extensions.create("copiedCodeChecks", CopiedCodeCheckerExtension::class.java, project) | ||
|
||
if (rootProject == this) { | ||
rootProject.afterEvaluate { | ||
allprojects { plugins.apply(CopiedCodeCheckerPlugin::class.java) } | ||
} | ||
} else { | ||
extension.excludedContentTypePatterns.convention( | ||
provider { | ||
rootProject.extensions | ||
.getByType(CopiedCodeCheckerExtension::class.java) | ||
.excludedContentTypePatterns | ||
.get() | ||
} | ||
) | ||
extension.includedContentTypePatterns.convention( | ||
provider { | ||
rootProject.extensions | ||
.getByType(CopiedCodeCheckerExtension::class.java) | ||
.includedContentTypePatterns | ||
.get() | ||
} | ||
) | ||
extension.includeUnrecognizedContentType.convention( | ||
provider { | ||
rootProject.extensions | ||
.getByType(CopiedCodeCheckerExtension::class.java) | ||
.includeUnrecognizedContentType | ||
.get() | ||
} | ||
) | ||
extension.licenseFile.convention( | ||
provider { | ||
rootProject.extensions | ||
.getByType(CopiedCodeCheckerExtension::class.java) | ||
.licenseFile | ||
.get() | ||
} | ||
) | ||
} | ||
|
||
tasks.register(CHECK_FOR_COPIED_CODE_TASK_NAME, CheckForCopiedCodeTask::class.java) | ||
|
||
afterEvaluate { | ||
tasks.named("check").configure { dependsOn(CHECK_FOR_COPIED_CODE_TASK_NAME) } | ||
} | ||
} | ||
|
||
companion object { | ||
private const val CHECK_FOR_COPIED_CODE_TASK_NAME = "checkForCopiedCode" | ||
} | ||
} | ||
|
||
@DisableCachingByDefault | ||
abstract class CheckForCopiedCodeTask : DefaultTask() { | ||
private fun namedDirectorySets(): List<Pair<String, SourceDirectorySet>> { | ||
val namedDirectorySets = mutableListOf<Pair<String, SourceDirectorySet>>() | ||
|
||
val extension = project.extensions.getByType(CopiedCodeCheckerExtension::class.java) | ||
extension.scanDirectories.forEach { scanDirectory -> | ||
namedDirectorySets.add(Pair("scan directory ${scanDirectory.name}", scanDirectory)) | ||
} | ||
|
||
val sourceSets: SourceSetContainer? by project | ||
sourceSets?.forEach { sourceSet -> | ||
namedDirectorySets.add(Pair("source set ${sourceSet.name}", sourceSet.allSource)) | ||
} | ||
|
||
return namedDirectorySets | ||
} | ||
|
||
@TaskAction | ||
fun checkForCopiedCode() { | ||
logger.info("Running copied code check against root project's LICENSE file") | ||
|
||
val namedDirectorySets = namedDirectorySets() | ||
|
||
val extension = project.extensions.getByType(CopiedCodeCheckerExtension::class.java) | ||
|
||
val includedPatterns = extension.includedContentTypePatterns.get().map { Pattern.compile(it) } | ||
val excludedPatterns = extension.includedContentTypePatterns.get().map { Pattern.compile(it) } | ||
val includeUnknown = extension.includeUnrecognizedContentType.get() | ||
|
||
val magicWord = extension.magicWord.get() | ||
val magicWordPattern = Pattern.compile(".*\\b${magicWord}\\b.*") | ||
|
||
val licenseFile = extension.licenseFile.get().asFile | ||
val licenseFileRelative = licenseFile.relativeTo(project.rootDir).toString() | ||
val mentionedFilesInLicense = | ||
extension.licenseFile | ||
.get() | ||
.asFile | ||
.readLines() | ||
.filter { line -> line.startsWith("* ") && line.length > 2 } | ||
.map { line -> line.substring(2) } | ||
.toSet() | ||
|
||
val buildDir = project.layout.buildDirectory.asFile.get() | ||
|
||
val unmentionedFiles = | ||
namedDirectorySets | ||
.flatMap { pair -> | ||
val name = pair.first | ||
val sourceDirectorySet = pair.second | ||
|
||
logger.info( | ||
"Checking {} for files containing {} not mentioned in {}", | ||
name, | ||
magicWord, | ||
licenseFileRelative, | ||
) | ||
|
||
sourceDirectorySet.asFileTree | ||
.filter { file -> !file.startsWith(buildDir) } | ||
.map { file -> | ||
val projectRelativeFile = file.relativeTo(project.projectDir) | ||
val fileType = Files.probeContentType(file.toPath()) | ||
logger.info( | ||
"Checking file '{}' (probed content type: {})", | ||
projectRelativeFile, | ||
fileType, | ||
) | ||
|
||
var r: String? = null | ||
|
||
var check = true | ||
if (fileType == null) { | ||
if (!includeUnknown) { | ||
logger.info(" ... unknown content type, skipping") | ||
check = false | ||
} | ||
} else { | ||
val excluded = | ||
excludedPatterns.any { pattern -> pattern.matcher(fileType).matches() } | ||
if (excluded) { | ||
val included = | ||
includedPatterns.any { pattern -> pattern.matcher(fileType).matches() } | ||
if (!included) { | ||
logger.info(" ... excluded and not included content type, skipping") | ||
check = false | ||
} | ||
} | ||
} | ||
|
||
if (check) { | ||
if (!file.readLines().any { s -> magicWordPattern.matcher(s).matches() }) { | ||
logger.info( | ||
" ... no magic word, not expecting an entry in {}", | ||
licenseFileRelative, | ||
) | ||
} else { | ||
val relativeFilePath = file.relativeTo(project.projectDir).toString() | ||
if (mentionedFilesInLicense.contains(relativeFilePath)) { | ||
logger.info(" ... has magic word & mentioned in {}", licenseFileRelative) | ||
} else { | ||
// error (summary) logged below | ||
logger.info( | ||
"The file '{}' has the {} marker, but is not mentioned in {}", | ||
relativeFilePath, | ||
magicWord, | ||
licenseFileRelative, | ||
) | ||
r = relativeFilePath | ||
} | ||
} | ||
} | ||
|
||
r | ||
} | ||
.filter { r -> r != null } | ||
.map { r -> r!! } | ||
} | ||
.sorted() | ||
.toList() | ||
|
||
if (!unmentionedFiles.isEmpty()) { | ||
logger.error( | ||
""" | ||
The following {} files have the {} marker but are not mentioned in {}, add those in an appropriate section. | ||
{} | ||
""" | ||
.trimIndent(), | ||
unmentionedFiles.size, | ||
magicWord, | ||
licenseFileRelative, | ||
unmentionedFiles.joinToString("\n* ", "* "), | ||
) | ||
|
||
throw GradleException( | ||
"${unmentionedFiles.size} files with the $magicWord marker need to be mentioned in $licenseFileRelative. See the messages above." | ||
) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.