This is an automated email from the ASF dual-hosted git repository.

snazy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/polaris.git


The following commit(s) were added to refs/heads/main by this push:
     new 9f06888e Check to verify that "copied files" are mentioned in 
`LICENSE` (#904)
9f06888e is described below

commit 9f06888ebbc64741cfb2713f45c87a474b449ba8
Author: Robert Stupp <[email protected]>
AuthorDate: Fri Jan 31 09:28:58 2025 +0100

    Check to verify that "copied files" are mentioned in `LICENSE` (#904)
    
    To ensure that files that are known to be mentioned in `LICENSE` are really 
mentioned in that file, the Gradle plugin introduced in this change verifies 
this.
    
    The "magic word" `CODE_COPIED_TO_POLARIS` must be present in such files. 
The presence of the "magic word" triggers a validation that the path of the 
containing file, relative to the project root directory, must be mentioned in 
`LICENSE`, prefixed with `* `.
    
    The plugin checks all source directories in projects that have any Java 
plugin applied. For other projects, the plugin's extension provides a mechanism 
to add directory sets similar to how `SourceDirectorySet` works, which is used 
for the root project.
    
    The plugin must be applied on the root project, it adds itself to all other 
projects. The introduced `checkForCopiedCode` task is added to the `check` task 
as a dependency.
    
    Files that contain "copied code" need to have the word 
`CODE_COPIED_TO_POLARIS` anywhere.
    
    Related to #903
---
 LICENSE                                            |   3 +-
 aggregated-license-report/build.gradle.kts         |   2 +
 .../copiedcode/CopiedCodeCheckerExtension.kt       | 104 +++++++
 .../kotlin/copiedcode/CopiedCodeCheckerPlugin.kt   | 316 +++++++++++++++++++++
 .../src/main/kotlin/polaris-root.gradle.kts        |   3 +
 build.gradle.kts                                   |  33 +++
 6 files changed, 460 insertions(+), 1 deletion(-)

diff --git a/LICENSE b/LICENSE
index 410a4dd4..baeaed06 100644
--- a/LICENSE
+++ b/LICENSE
@@ -204,7 +204,8 @@
 
 This product includes a gradle wrapper.
 
-* gradlew and gradle/wrapper/gradle-wrapper.properties
+* gradlew
+* gradle/wrapper/gradle-wrapper.properties
 
 Copyright: 2010-2019 Gradle Authors.
 Home page: https://github.com/gradle/gradle
diff --git a/aggregated-license-report/build.gradle.kts 
b/aggregated-license-report/build.gradle.kts
index 1e70825f..3966a740 100644
--- a/aggregated-license-report/build.gradle.kts
+++ b/aggregated-license-report/build.gradle.kts
@@ -60,3 +60,5 @@ val aggregatedLicenseReportsZip by
     destinationDirectory.set(layout.buildDirectory.dir("distributions"))
     archiveExtension.set("zip")
   }
+
+tasks.register("check")
diff --git 
a/build-logic/src/main/kotlin/copiedcode/CopiedCodeCheckerExtension.kt 
b/build-logic/src/main/kotlin/copiedcode/CopiedCodeCheckerExtension.kt
new file mode 100644
index 00000000..411d4ed5
--- /dev/null
+++ b/build-logic/src/main/kotlin/copiedcode/CopiedCodeCheckerExtension.kt
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package copiedcode
+
+import org.gradle.api.Project
+import org.gradle.api.file.RegularFileProperty
+import org.gradle.api.file.SourceDirectorySet
+import org.gradle.api.provider.Property
+import org.gradle.api.provider.SetProperty
+
+abstract class CopiedCodeCheckerExtension(private val project: Project) {
+  /**
+   * Per-project set of additional directories to scan.
+   *
+   * This property is _not_ propagated to subprojects.
+   */
+  val scanDirectories =
+    project.objects.domainObjectContainer(
+      SourceDirectorySet::class.java,
+      { name -> project.objects.sourceDirectorySet(name, name) },
+    )
+
+  /**
+   * By default, this plugin scans all files. The content types that match the 
regular expression of
+   * this property are excluded, unless a content-type matches one of the 
regular expressions in
+   * [CopiedCodeCheckerExtension.includedContentTypePatterns].
+   *
+   * See [CopiedCodeCheckerExtension.addDefaultContentTypes],
+   * [CopiedCodeCheckerExtension.includedContentTypePatterns],
+   * [CopiedCodeCheckerExtension.includeUnrecognizedContentType],
+   * [CopiedCodeCheckerExtension.includedContentTypePatterns].
+   */
+  abstract val excludedContentTypePatterns: SetProperty<String>
+  /**
+   * By default, this plugin scans all files. The content types that match the 
regular expression of
+   * the [copiedcode.CopiedCodeCheckerExtension.excludedContentTypePatterns] 
property are excluded,
+   * unless a content-type matches one of the regular expressions in this 
property.
+   *
+   * See [CopiedCodeCheckerExtension.addDefaultContentTypes],
+   * [CopiedCodeCheckerExtension.excludedContentTypePatterns],
+   * [CopiedCodeCheckerExtension.includeUnrecognizedContentType],
+   * [CopiedCodeCheckerExtension.includedContentTypePatterns].
+   */
+  abstract val includedContentTypePatterns: SetProperty<String>
+
+  /**
+   * If a content-type could not be detected, this property, which defaults to 
`true`, is consulted.
+   *
+   * See [CopiedCodeCheckerPlugin] for details.
+   */
+  abstract val includeUnrecognizedContentType: Property<Boolean>
+
+  /**
+   * The magic "word", if present in a file, meaning "this file has been 
copied".
+   *
+   * A file is considered as "copied" must contain this magic word. "Word" 
means that the value must
+   * be surrounded by regular expression word boundaries (`\b`).
+   */
+  abstract val magicWord: Property<String>
+
+  /**
+   * License file to check, configured on the root project. See 
[CopiedCodeCheckerPlugin] for
+   * details.
+   */
+  abstract val licenseFile: RegularFileProperty
+
+  /** Recommended to use, adds known and used binary content types. */
+  fun addDefaultContentTypes(): CopiedCodeCheckerExtension {
+    // Exclude all images
+    excludedContentTypePatterns.add("image/.*")
+    // But include images built in XML (e.g. image/svg+xml)
+    includedContentTypePatterns.add("\\+xml")
+
+    return this
+  }
+
+  init {
+    includeUnrecognizedContentType.convention(true)
+    magicWord.convention(DEFAULT_MAGIC_WORD)
+  }
+
+  companion object {
+    // String manipulation is intentional - otherwise this source file would 
be considered as
+    // "copied".
+    val DEFAULT_MAGIC_WORD = "_CODE_COPIED_TO_POLARIS".substring(1)
+  }
+}
diff --git a/build-logic/src/main/kotlin/copiedcode/CopiedCodeCheckerPlugin.kt 
b/build-logic/src/main/kotlin/copiedcode/CopiedCodeCheckerPlugin.kt
new file mode 100644
index 00000000..fb14568c
--- /dev/null
+++ b/build-logic/src/main/kotlin/copiedcode/CopiedCodeCheckerPlugin.kt
@@ -0,0 +1,316 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package copiedcode
+
+import java.nio.file.Files
+import java.util.regex.Pattern
+import javax.inject.Inject
+import kotlin.collections.joinToString
+import org.gradle.api.DefaultTask
+import org.gradle.api.GradleException
+import org.gradle.api.Plugin
+import org.gradle.api.Project
+import org.gradle.api.component.SoftwareComponentFactory
+import org.gradle.api.file.SourceDirectorySet
+import org.gradle.api.tasks.SourceSetContainer
+import org.gradle.api.tasks.TaskAction
+import org.gradle.kotlin.dsl.provideDelegate
+import org.gradle.work.DisableCachingByDefault
+
+/**
+ * This plugin identifies files that have been originally copied from another 
project.
+ *
+ * Configuration is done using the [CopiedCodeCheckerExtension], available 
under the name
+ * `copiedCodeChecks`.
+ *
+ * Such files need to contain a magic word, see 
[CopiedCodeCheckerExtension.magicWord].
+ *
+ * This plugin scans all source directories configured in the project's 
[SourceDirectorySet]. Files
+ * in the project's build directory are always excluded.
+ *
+ * By default, this plugin scans all files. There is a convenience function to 
exclude known binary
+ * types, see [CopiedCodeCheckerExtension.addDefaultContentTypes]. The
+ * [CopiedCodeCheckerExtension.excludedContentTypePatterns] is checked first 
against a detected
+ * content type. If a content-type's excluded, the
+ * [CopiedCodeCheckerExtension.includedContentTypePatterns] is consulted. If a 
content-type could
+ * not be detected, the property 
[CopiedCodeCheckerExtension.includeUnrecognizedContentType], which
+ * defaults to `true`, is consulted.
+ *
+ * Each Gradle project has its own instance of the 
[CopiedCodeCheckerExtension], the extension of
+ * the root project serves default values, except for 
[CopiedCodeCheckerExtension.scanDirectories]]
+ *
+ * The license file to check is configured via 
[CopiedCodeCheckerExtension.licenseFile]. Files must
+ * be mentioned using the relative path from the root directory, with a 
trailing `* ` (star +
+ * space).
+ */
+@Suppress("unused")
+class CopiedCodeCheckerPlugin
+@Inject
+constructor(private val softwareComponentFactory: SoftwareComponentFactory) : 
Plugin<Project> {
+  override fun apply(project: Project): Unit =
+    project.run {
+      val extension =
+        extensions.create("copiedCodeChecks", 
CopiedCodeCheckerExtension::class.java, project)
+
+      if (rootProject == this) {
+        // Apply this plugin to all projects
+        afterEvaluate { subprojects { 
plugins.apply(CopiedCodeCheckerPlugin::class.java) } }
+
+        tasks.register(
+          CHECK_COPIED_CODE_MENTIONS_EXIST_TASK_NAME,
+          CheckCopiedCodeMentionsExistTask::class.java,
+        )
+
+        afterEvaluate {
+          tasks.named("check").configure { 
dependsOn(CHECK_COPIED_CODE_MENTIONS_EXIST_TASK_NAME) }
+        }
+      } else {
+        extension.excludedContentTypePatterns.convention(
+          provider {
+            rootProject.extensions
+              .getByType(CopiedCodeCheckerExtension::class.java)
+              .excludedContentTypePatterns
+              .get()
+          }
+        )
+        extension.includedContentTypePatterns.convention(
+          provider {
+            rootProject.extensions
+              .getByType(CopiedCodeCheckerExtension::class.java)
+              .includedContentTypePatterns
+              .get()
+          }
+        )
+        extension.includeUnrecognizedContentType.convention(
+          provider {
+            rootProject.extensions
+              .getByType(CopiedCodeCheckerExtension::class.java)
+              .includeUnrecognizedContentType
+              .get()
+          }
+        )
+        extension.licenseFile.convention(
+          provider {
+            rootProject.extensions
+              .getByType(CopiedCodeCheckerExtension::class.java)
+              .licenseFile
+              .get()
+          }
+        )
+      }
+
+      tasks.register(CHECK_FOR_COPIED_CODE_TASK_NAME, 
CheckForCopiedCodeTask::class.java)
+
+      afterEvaluate {
+        tasks.named("check").configure { 
dependsOn(CHECK_FOR_COPIED_CODE_TASK_NAME) }
+      }
+    }
+
+  companion object {
+    private const val CHECK_FOR_COPIED_CODE_TASK_NAME = "checkForCopiedCode"
+    private const val CHECK_COPIED_CODE_MENTIONS_EXIST_TASK_NAME = 
"checkCopiedCodeMentionsExist"
+  }
+}
+
+@DisableCachingByDefault
+abstract class CheckCopiedCodeMentionsExistTask : DefaultTask() {
+  @TaskAction
+  fun checkMentions() {
+    val extension = 
project.extensions.getByType(CopiedCodeCheckerExtension::class.java)
+
+    val licenseFile = extension.licenseFile.get().asFile
+    val licenseFileRelative = 
licenseFile.relativeTo(project.rootDir).toString()
+
+    logger.info("Checking whether files mentioned in the {} file exist", 
licenseFileRelative)
+
+    val nonExistingMentions =
+      extension.licenseFile
+        .get()
+        .asFile
+        .readLines()
+        .filter { line -> line.startsWith("* ") && line.length > 2 }
+        .map { line -> line.substring(2) }
+        .filter { relFilePath -> 
!project.rootProject.file(relFilePath).exists() }
+        .sorted()
+
+    if (nonExistingMentions.isNotEmpty()) {
+      logger.error(
+        """
+        The following {} files mentioned in {} do not exist, fix the {} file.
+        
+        {}
+        """
+          .trimIndent(),
+        nonExistingMentions.size,
+        licenseFileRelative,
+        licenseFileRelative,
+        nonExistingMentions.joinToString("\n* ", "* "),
+      )
+
+      throw GradleException(
+        "${nonExistingMentions.size} files mentioned in $licenseFileRelative 
do not exist, fix the $licenseFileRelative file."
+      )
+    }
+  }
+}
+
+@DisableCachingByDefault
+abstract class CheckForCopiedCodeTask : DefaultTask() {
+  private fun namedDirectorySets(): List<Pair<String, SourceDirectorySet>> {
+    val namedDirectorySets = mutableListOf<Pair<String, SourceDirectorySet>>()
+
+    val extension = 
project.extensions.getByType(CopiedCodeCheckerExtension::class.java)
+    extension.scanDirectories.forEach { scanDirectory ->
+      namedDirectorySets.add(Pair("scan directory ${scanDirectory.name}", 
scanDirectory))
+    }
+
+    val sourceSets: SourceSetContainer? by project
+    sourceSets?.forEach { sourceSet ->
+      namedDirectorySets.add(Pair("source set ${sourceSet.name}", 
sourceSet.allSource))
+    }
+
+    return namedDirectorySets
+  }
+
+  @TaskAction
+  fun checkForCopiedCode() {
+    val extension = 
project.extensions.getByType(CopiedCodeCheckerExtension::class.java)
+
+    val licenseFile = extension.licenseFile.get().asFile
+    val licenseFileRelative = 
licenseFile.relativeTo(project.rootDir).toString()
+
+    logger.info("Running copied code check against root project's {} file", 
licenseFileRelative)
+
+    val namedDirectorySets = namedDirectorySets()
+
+    val includedPatterns = extension.includedContentTypePatterns.get().map { 
Pattern.compile(it) }
+    val excludedPatterns = extension.includedContentTypePatterns.get().map { 
Pattern.compile(it) }
+    val includeUnknown = extension.includeUnrecognizedContentType.get()
+
+    val magicWord = extension.magicWord.get()
+    val magicWordPattern = Pattern.compile(".*\\b${magicWord}\\b.*")
+
+    val mentionedFilesInLicense =
+      extension.licenseFile
+        .get()
+        .asFile
+        .readLines()
+        .filter { line -> line.startsWith("* ") && line.length > 2 }
+        .map { line -> line.substring(2) }
+        .toSet()
+
+    val buildDir = project.layout.buildDirectory.asFile.get()
+
+    val unmentionedFiles =
+      namedDirectorySets
+        .flatMap { pair ->
+          val name = pair.first
+          val sourceDirectorySet = pair.second
+
+          logger.info(
+            "Checking {} for files containing {} not mentioned in {}",
+            name,
+            magicWord,
+            licenseFileRelative,
+          )
+
+          sourceDirectorySet.asFileTree
+            .filter { file -> !file.startsWith(buildDir) }
+            .map { file ->
+              val projectRelativeFile = file.relativeTo(project.projectDir)
+              val fileType = Files.probeContentType(file.toPath())
+              logger.info(
+                "Checking file '{}' (probed content type: {})",
+                projectRelativeFile,
+                fileType,
+              )
+
+              var r: String? = null
+
+              var check = true
+              if (fileType == null) {
+                if (!includeUnknown) {
+                  logger.info("   ... unknown content type, skipping")
+                  check = false
+                }
+              } else {
+                val excluded =
+                  excludedPatterns.any { pattern -> 
pattern.matcher(fileType).matches() }
+                if (excluded) {
+                  val included =
+                    includedPatterns.any { pattern -> 
pattern.matcher(fileType).matches() }
+                  if (!included) {
+                    logger.info("   ... excluded and not included content 
type, skipping")
+                    check = false
+                  }
+                }
+              }
+
+              if (check) {
+                if (!file.readLines().any { s -> 
magicWordPattern.matcher(s).matches() }) {
+                  logger.info(
+                    "   ... no magic word, not expecting an entry in {}",
+                    licenseFileRelative,
+                  )
+                } else {
+                  val relativeFilePath = 
file.relativeTo(project.rootProject.projectDir).toString()
+                  if (mentionedFilesInLicense.contains(relativeFilePath)) {
+                    logger.info("   ... has magic word & mentioned in {}", 
licenseFileRelative)
+                  } else {
+                    // error (summary) logged below
+                    logger.info(
+                      "The file '{}' has the {} marker, but is not mentioned 
in {}",
+                      relativeFilePath,
+                      magicWord,
+                      licenseFileRelative,
+                    )
+                    r = relativeFilePath
+                  }
+                }
+              }
+
+              r
+            }
+            .filter { r -> r != null }
+            .map { r -> r!! }
+        }
+        .sorted()
+        .toList()
+
+    if (!unmentionedFiles.isEmpty()) {
+      logger.error(
+        """
+        The following {} files have the {} marker but are not mentioned in {}, 
add those in an appropriate section.
+
+        {}
+        """
+          .trimIndent(),
+        unmentionedFiles.size,
+        magicWord,
+        licenseFileRelative,
+        unmentionedFiles.joinToString("\n* ", "* "),
+      )
+
+      throw GradleException(
+        "${unmentionedFiles.size} files with the $magicWord marker need to be 
mentioned in $licenseFileRelative. See the messages above."
+      )
+    }
+  }
+}
diff --git a/build-logic/src/main/kotlin/polaris-root.gradle.kts 
b/build-logic/src/main/kotlin/polaris-root.gradle.kts
index 12d322e4..96faa07b 100644
--- a/build-logic/src/main/kotlin/polaris-root.gradle.kts
+++ b/build-logic/src/main/kotlin/polaris-root.gradle.kts
@@ -17,6 +17,7 @@
  * under the License.
  */
 
+import copiedcode.CopiedCodeCheckerPlugin
 import org.jetbrains.gradle.ext.copyright
 import org.jetbrains.gradle.ext.encodings
 import org.jetbrains.gradle.ext.settings
@@ -30,6 +31,8 @@ plugins {
 
 apply<PublishingHelperPlugin>()
 
+apply<CopiedCodeCheckerPlugin>()
+
 spotless {
   kotlinGradle {
     ktfmt().googleStyle()
diff --git a/build.gradle.kts b/build.gradle.kts
index 04895afd..f076984f 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -159,3 +159,36 @@ nexusPublishing {
     }
   }
 }
+
+copiedCodeChecks {
+  addDefaultContentTypes()
+
+  licenseFile = project.layout.projectDirectory.file("LICENSE")
+
+  scanDirectories {
+    register("build-logic") { srcDir("build-logic/src") }
+    register("misc") {
+      srcDir(".github")
+      srcDir("codestyle")
+      srcDir("getting-started")
+      srcDir("k8")
+      srcDir("regtests")
+      srcDir("server-templates")
+      srcDir("spec")
+    }
+    register("gradle") {
+      srcDir("gradle")
+      exclude("wrapper/*.jar")
+      exclude("wrapper/*.sha256")
+    }
+    register("site") {
+      srcDir("site")
+      exclude("build/**")
+      exclude(".hugo_build.lock")
+    }
+    register("root") {
+      srcDir(".")
+      include("*")
+    }
+  }
+}

Reply via email to