From f78db87e5d12514f3ed97fa3b49e9d9a3fe1bc5b Mon Sep 17 00:00:00 2001 From: Robert Stupp Date: Tue, 28 Jan 2025 15:29:41 +0100 Subject: [PATCH] Check to verify that "copied files" are mentioned in `LICENSE` To ensure that files that are known to be mentioned in `LICENSE` are really mentioned in that file, the Gradle plugin introduced in this change verifies this. The "magic word" `CODE_COPIED_TO_POLARIS` must be present in such files. The presence of the "magic word" triggers a validation that the path of the containing file, relative to the project root directory, must be mentioned in `LICENSE`, prefixed with `* `. The plugin checks all source directories in projects that have any Java plugin applied. For other projects, the plugin's extension provides a mechanism to add directory sets similar to how `SourceDirectorySet` works, which is used for the root project. The plugin must be applied on the root project, it adds itself to all other projects. The introduced `checkForCopiedCode` task is added to the `check` task as a dependency. Files that contain "copied code" need to have the word `CODE_COPIED_TO_POLARIS` anywhere. Related to #903 --- aggregated-license-report/build.gradle.kts | 2 + .../copiedcode/CopiedCodeCheckerExtension.kt | 104 +++++++ .../copiedcode/CopiedCodeCheckerPlugin.kt | 263 ++++++++++++++++++ .../src/main/kotlin/polaris-root.gradle.kts | 3 + build.gradle.kts | 33 +++ 5 files changed, 405 insertions(+) create mode 100644 build-logic/src/main/kotlin/copiedcode/CopiedCodeCheckerExtension.kt create mode 100644 build-logic/src/main/kotlin/copiedcode/CopiedCodeCheckerPlugin.kt diff --git a/aggregated-license-report/build.gradle.kts b/aggregated-license-report/build.gradle.kts index 1e70825fb..3966a740a 100644 --- a/aggregated-license-report/build.gradle.kts +++ b/aggregated-license-report/build.gradle.kts @@ -60,3 +60,5 @@ val aggregatedLicenseReportsZip by destinationDirectory.set(layout.buildDirectory.dir("distributions")) archiveExtension.set("zip") } + +tasks.register("check") diff --git a/build-logic/src/main/kotlin/copiedcode/CopiedCodeCheckerExtension.kt b/build-logic/src/main/kotlin/copiedcode/CopiedCodeCheckerExtension.kt new file mode 100644 index 000000000..411d4ed53 --- /dev/null +++ b/build-logic/src/main/kotlin/copiedcode/CopiedCodeCheckerExtension.kt @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package copiedcode + +import org.gradle.api.Project +import org.gradle.api.file.RegularFileProperty +import org.gradle.api.file.SourceDirectorySet +import org.gradle.api.provider.Property +import org.gradle.api.provider.SetProperty + +abstract class CopiedCodeCheckerExtension(private val project: Project) { + /** + * Per-project set of additional directories to scan. + * + * This property is _not_ propagated to subprojects. + */ + val scanDirectories = + project.objects.domainObjectContainer( + SourceDirectorySet::class.java, + { name -> project.objects.sourceDirectorySet(name, name) }, + ) + + /** + * By default, this plugin scans all files. The content types that match the regular expression of + * this property are excluded, unless a content-type matches one of the regular expressions in + * [CopiedCodeCheckerExtension.includedContentTypePatterns]. + * + * See [CopiedCodeCheckerExtension.addDefaultContentTypes], + * [CopiedCodeCheckerExtension.includedContentTypePatterns], + * [CopiedCodeCheckerExtension.includeUnrecognizedContentType], + * [CopiedCodeCheckerExtension.includedContentTypePatterns]. + */ + abstract val excludedContentTypePatterns: SetProperty + /** + * By default, this plugin scans all files. The content types that match the regular expression of + * the [copiedcode.CopiedCodeCheckerExtension.excludedContentTypePatterns] property are excluded, + * unless a content-type matches one of the regular expressions in this property. + * + * See [CopiedCodeCheckerExtension.addDefaultContentTypes], + * [CopiedCodeCheckerExtension.excludedContentTypePatterns], + * [CopiedCodeCheckerExtension.includeUnrecognizedContentType], + * [CopiedCodeCheckerExtension.includedContentTypePatterns]. + */ + abstract val includedContentTypePatterns: SetProperty + + /** + * If a content-type could not be detected, this property, which defaults to `true`, is consulted. + * + * See [CopiedCodeCheckerPlugin] for details. + */ + abstract val includeUnrecognizedContentType: Property + + /** + * The magic "word", if present in a file, meaning "this file has been copied". + * + * A file is considered as "copied" must contain this magic word. "Word" means that the value must + * be surrounded by regular expression word boundaries (`\b`). + */ + abstract val magicWord: Property + + /** + * License file to check, configured on the root project. See [CopiedCodeCheckerPlugin] for + * details. + */ + abstract val licenseFile: RegularFileProperty + + /** Recommended to use, adds known and used binary content types. */ + fun addDefaultContentTypes(): CopiedCodeCheckerExtension { + // Exclude all images + excludedContentTypePatterns.add("image/.*") + // But include images built in XML (e.g. image/svg+xml) + includedContentTypePatterns.add("\\+xml") + + return this + } + + init { + includeUnrecognizedContentType.convention(true) + magicWord.convention(DEFAULT_MAGIC_WORD) + } + + companion object { + // String manipulation is intentional - otherwise this source file would be considered as + // "copied". + val DEFAULT_MAGIC_WORD = "_CODE_COPIED_TO_POLARIS".substring(1) + } +} diff --git a/build-logic/src/main/kotlin/copiedcode/CopiedCodeCheckerPlugin.kt b/build-logic/src/main/kotlin/copiedcode/CopiedCodeCheckerPlugin.kt new file mode 100644 index 000000000..1d078fa35 --- /dev/null +++ b/build-logic/src/main/kotlin/copiedcode/CopiedCodeCheckerPlugin.kt @@ -0,0 +1,263 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package copiedcode + +import java.nio.file.Files +import java.util.regex.Pattern +import javax.inject.Inject +import org.gradle.api.DefaultTask +import org.gradle.api.GradleException +import org.gradle.api.Plugin +import org.gradle.api.Project +import org.gradle.api.component.SoftwareComponentFactory +import org.gradle.api.file.SourceDirectorySet +import org.gradle.api.tasks.SourceSetContainer +import org.gradle.api.tasks.TaskAction +import org.gradle.kotlin.dsl.provideDelegate +import org.gradle.work.DisableCachingByDefault + +/** + * This plugin identifies files that have been originally copied from another project. + * + * Configuration is done using the [CopiedCodeCheckerExtension], available under the name + * `copiedCodeChecks`. + * + * Such files need to contain a magic word, see [CopiedCodeCheckerExtension.magicWord]. + * + * This plugin scans all source directories configured in the project's [SourceDirectorySet]. Files + * in the project's build directory are always excluded. + * + * By default, this plugin scans all files. There is a convenience function to exclude known binary + * types, see [CopiedCodeCheckerExtension.addDefaultContentTypes]. The + * [CopiedCodeCheckerExtension.excludedContentTypePatterns] is checked first against a detected + * content type. If a content-type's excluded, the + * [CopiedCodeCheckerExtension.includedContentTypePatterns] is consulted. If a content-type could + * not be detected, the property [CopiedCodeCheckerExtension.includeUnrecognizedContentType], which + * defaults to `true`, is consulted. + * + * Each Gradle project has its own instance of the [CopiedCodeCheckerExtension], the extension of + * the root project serves default values, except for [CopiedCodeCheckerExtension.scanDirectories]] + * + * The license file to check is configured via [CopiedCodeCheckerExtension.licenseFile]. Files must + * be mentioned using the relative path from the root directory, with a trailing `* ` (star + + * space). + */ +@Suppress("unused") +class CopiedCodeCheckerPlugin +@Inject +constructor(private val softwareComponentFactory: SoftwareComponentFactory) : Plugin { + override fun apply(project: Project): Unit = + project.run { + val extension = + extensions.create("copiedCodeChecks", CopiedCodeCheckerExtension::class.java, project) + + if (rootProject == this) { + rootProject.afterEvaluate { + allprojects { plugins.apply(CopiedCodeCheckerPlugin::class.java) } + } + } else { + extension.excludedContentTypePatterns.convention( + provider { + rootProject.extensions + .getByType(CopiedCodeCheckerExtension::class.java) + .excludedContentTypePatterns + .get() + } + ) + extension.includedContentTypePatterns.convention( + provider { + rootProject.extensions + .getByType(CopiedCodeCheckerExtension::class.java) + .includedContentTypePatterns + .get() + } + ) + extension.includeUnrecognizedContentType.convention( + provider { + rootProject.extensions + .getByType(CopiedCodeCheckerExtension::class.java) + .includeUnrecognizedContentType + .get() + } + ) + extension.licenseFile.convention( + provider { + rootProject.extensions + .getByType(CopiedCodeCheckerExtension::class.java) + .licenseFile + .get() + } + ) + } + + tasks.register(CHECK_FOR_COPIED_CODE_TASK_NAME, CheckForCopiedCodeTask::class.java) + + afterEvaluate { + tasks.named("check").configure { dependsOn(CHECK_FOR_COPIED_CODE_TASK_NAME) } + } + } + + companion object { + private const val CHECK_FOR_COPIED_CODE_TASK_NAME = "checkForCopiedCode" + } +} + +@DisableCachingByDefault +abstract class CheckForCopiedCodeTask : DefaultTask() { + private fun namedDirectorySets(): List> { + val namedDirectorySets = mutableListOf>() + + val extension = project.extensions.getByType(CopiedCodeCheckerExtension::class.java) + extension.scanDirectories.forEach { scanDirectory -> + namedDirectorySets.add(Pair("scan directory ${scanDirectory.name}", scanDirectory)) + } + + val sourceSets: SourceSetContainer? by project + sourceSets?.forEach { sourceSet -> + namedDirectorySets.add(Pair("source set ${sourceSet.name}", sourceSet.allSource)) + } + + return namedDirectorySets + } + + @TaskAction + fun checkForCopiedCode() { + logger.info("Running copied code check against root project's LICENSE file") + + val namedDirectorySets = namedDirectorySets() + + val extension = project.extensions.getByType(CopiedCodeCheckerExtension::class.java) + + val includedPatterns = extension.includedContentTypePatterns.get().map { Pattern.compile(it) } + val excludedPatterns = extension.includedContentTypePatterns.get().map { Pattern.compile(it) } + val includeUnknown = extension.includeUnrecognizedContentType.get() + + val magicWord = extension.magicWord.get() + val magicWordPattern = Pattern.compile(".*\\b${magicWord}\\b.*") + + val licenseFile = extension.licenseFile.get().asFile + val licenseFileRelative = licenseFile.relativeTo(project.rootDir).toString() + val mentionedFilesInLicense = + extension.licenseFile + .get() + .asFile + .readLines() + .filter { line -> line.startsWith("* ") && line.length > 2 } + .map { line -> line.substring(2) } + .toSet() + + val buildDir = project.layout.buildDirectory.asFile.get() + + val unmentionedFiles = + namedDirectorySets + .flatMap { pair -> + val name = pair.first + val sourceDirectorySet = pair.second + + logger.info( + "Checking {} for files containing {} not mentioned in {}", + name, + magicWord, + licenseFileRelative, + ) + + sourceDirectorySet.asFileTree + .filter { file -> !file.startsWith(buildDir) } + .map { file -> + val projectRelativeFile = file.relativeTo(project.projectDir) + val fileType = Files.probeContentType(file.toPath()) + logger.info( + "Checking file '{}' (probed content type: {})", + projectRelativeFile, + fileType, + ) + + var r: String? = null + + var check = true + if (fileType == null) { + if (!includeUnknown) { + logger.info(" ... unknown content type, skipping") + check = false + } + } else { + val excluded = + excludedPatterns.any { pattern -> pattern.matcher(fileType).matches() } + if (excluded) { + val included = + includedPatterns.any { pattern -> pattern.matcher(fileType).matches() } + if (!included) { + logger.info(" ... excluded and not included content type, skipping") + check = false + } + } + } + + if (check) { + if (!file.readLines().any { s -> magicWordPattern.matcher(s).matches() }) { + logger.info( + " ... no magic word, not expecting an entry in {}", + licenseFileRelative, + ) + } else { + val relativeFilePath = file.relativeTo(project.projectDir).toString() + if (mentionedFilesInLicense.contains(relativeFilePath)) { + logger.info(" ... has magic word & mentioned in {}", licenseFileRelative) + } else { + // error (summary) logged below + logger.info( + "The file '{}' has the {} marker, but is not mentioned in {}", + relativeFilePath, + magicWord, + licenseFileRelative, + ) + r = relativeFilePath + } + } + } + + r + } + .filter { r -> r != null } + .map { r -> r!! } + } + .sorted() + .toList() + + if (!unmentionedFiles.isEmpty()) { + logger.error( + """ + The following {} files have the {} marker but are not mentioned in {}, add those in an appropriate section. + + {} + """ + .trimIndent(), + unmentionedFiles.size, + magicWord, + licenseFileRelative, + unmentionedFiles.joinToString("\n* ", "* "), + ) + + throw GradleException( + "${unmentionedFiles.size} files with the $magicWord marker need to be mentioned in $licenseFileRelative. See the messages above." + ) + } + } +} diff --git a/build-logic/src/main/kotlin/polaris-root.gradle.kts b/build-logic/src/main/kotlin/polaris-root.gradle.kts index 12d322e4e..96faa07b8 100644 --- a/build-logic/src/main/kotlin/polaris-root.gradle.kts +++ b/build-logic/src/main/kotlin/polaris-root.gradle.kts @@ -17,6 +17,7 @@ * under the License. */ +import copiedcode.CopiedCodeCheckerPlugin import org.jetbrains.gradle.ext.copyright import org.jetbrains.gradle.ext.encodings import org.jetbrains.gradle.ext.settings @@ -30,6 +31,8 @@ plugins { apply() +apply() + spotless { kotlinGradle { ktfmt().googleStyle() diff --git a/build.gradle.kts b/build.gradle.kts index 04895afd0..f076984fb 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -159,3 +159,36 @@ nexusPublishing { } } } + +copiedCodeChecks { + addDefaultContentTypes() + + licenseFile = project.layout.projectDirectory.file("LICENSE") + + scanDirectories { + register("build-logic") { srcDir("build-logic/src") } + register("misc") { + srcDir(".github") + srcDir("codestyle") + srcDir("getting-started") + srcDir("k8") + srcDir("regtests") + srcDir("server-templates") + srcDir("spec") + } + register("gradle") { + srcDir("gradle") + exclude("wrapper/*.jar") + exclude("wrapper/*.sha256") + } + register("site") { + srcDir("site") + exclude("build/**") + exclude(".hugo_build.lock") + } + register("root") { + srcDir(".") + include("*") + } + } +}