This is an automated email from the ASF dual-hosted git repository.

janhoy pushed a commit to branch branch_10x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_10x by this push:
     new 8300816fa0f Run RAT check only on files tracked by git (#4499)
8300816fa0f is described below

commit 8300816fa0ff62dbe5d9f8d9ddade3bd2dca68f2
Author: Jan Høydahl <[email protected]>
AuthorDate: Wed Jun 10 08:41:47 2026 +0200

    Run RAT check only on files tracked by git (#4499)
    
    (cherry picked from commit 6a7d7664968b9461183eb7d726795f612a79e59b)
---
 gradle/validation/rat-sources.gradle | 124 ++++++++++++++++++++++++++---------
 1 file changed, 94 insertions(+), 30 deletions(-)

diff --git a/gradle/validation/rat-sources.gradle 
b/gradle/validation/rat-sources.gradle
index f8556d2ffa4..4e01a0e93ab 100644
--- a/gradle/validation/rat-sources.gradle
+++ b/gradle/validation/rat-sources.gradle
@@ -15,7 +15,18 @@
  * limitations under the License.
  */
 
-import groovy.xml.NamespaceBuilder
+import groovy.xml.XmlSlurper
+import org.eclipse.jgit.storage.file.FileRepositoryBuilder
+
+buildscript {
+  repositories {
+    mavenCentral()
+  }
+
+  dependencies {
+    classpath libs.eclipse.jgit.jgit
+  }
+}
 
 // Configure rat dependencies for use in the custom task.
 configure(rootProject) {
@@ -28,39 +39,102 @@ configure(rootProject) {
   }
 }
 
+/**
+ * Reads all file paths from the git index (DirCache) once and caches the 
result.
+ * Returns null if this is not a git repository or the index cannot be read.
+ * Paths use forward slashes and are relative to the repository root.
+ */
+Set<String> loadGitTrackedFiles() {
+  if (rootProject.ext.has('ratGitIndexLoaded')) return 
rootProject.ext.ratGitTrackedFiles
+  rootProject.ext.ratGitIndexLoaded = true
+  rootProject.ext.ratGitTrackedFiles = null
+
+  def dotGit = new File(rootProject.projectDir, ".git")
+  if (dotGit.isFile()) {
+    // git worktrees use a .git file — jgit does not fully support them
+    logger.warn("WARNING: git worktrees are not supported by jgit — RAT 
git-tracking filter disabled.")
+    return null
+  }
+  if (!dotGit.isDirectory()) {
+    return null // not a git repository
+  }
+
+  def repository = null
+  try {
+    repository = new FileRepositoryBuilder()
+        .setWorkTree(rootProject.projectDir)
+        .setMustExist(true)
+        .build()
+    def tracked = new HashSet<String>()
+    def dirCache = repository.readDirCache()
+    for (int i = 0; i < dirCache.getEntryCount(); i++) {
+      tracked.add(dirCache.getEntry(i).getPathString())
+    }
+    rootProject.ext.ratGitTrackedFiles = tracked
+    return tracked
+  } catch (Exception e) {
+    logger.warn("WARNING: Could not read git index for RAT check: 
${e.message}")
+    return null
+  } finally {
+    if (repository != null) repository.close()
+  }
+}
+
+/**
+ * Returns the set of git-tracked file paths relative to the given project 
directory,
+ * or null if git tracking is unavailable.
+ */
+Set<String> gitTrackedFiles(File dir) {
+  def allFiles = loadGitTrackedFiles()
+  if (allFiles == null) return null
+
+  def prefix = rootProject.projectDir.toPath().relativize(dir.toPath())
+      .toString().replace('\\', '/')
+  if (prefix.isEmpty()) return allFiles
+
+  prefix += "/"
+  def result = new HashSet<String>()
+  for (String path : allFiles) {
+    if (path.startsWith(prefix)) {
+      result.add(path.substring(prefix.length()))
+    }
+  }
+  return result
+}
+
 // Configure the rat validation task and all scanned directories.
 allprojects {
-  task("rat", type: RatTask) {
+  tasks.register("rat", RatTask) {
     group = 'Verification'
     description = 'Runs Apache Rat checks.'
 
     def defaultScanFileTree = project.fileTree(projectDir, {
-      // Don't check under the project's build folder.
-      exclude project.buildDir.name
-
-      // Exclude any generated stuff.
-      exclude "src/generated"
+      // Only check files tracked by git — skip untracked/gitignored files
+      // (IDE artifacts, AI assistant configs, etc.)
+      def trackedFiles = gitTrackedFiles(projectDir)
+      if (trackedFiles != null) {
+        exclude { element ->
+          // Allow directories through (they are just containers), exclude 
untracked files
+          !element.isDirectory() && 
!trackedFiles.contains(element.relativePath.pathString)
+        }
+      }
 
-      // Don't recurse into local Lucene dev repo.
-      exclude "/lucene"
+      // Exclude the build directory — even though it's not git-tracked, Gradle
+      // validates input/output overlaps at configuration time before the git
+      // filter closure runs, so this must be excluded explicitly.
+      exclude project.layout.buildDirectory.get().asFile.name
 
       // Don't check any of the subprojects - they have their own rat tasks.
       exclude subprojects.collect {it.projectDir.name}
 
-      // At the module scope we only check selected file patterns as folks 
have various .gitignore-d resources
-      // generated by IDEs, etc.
+      // The git index filter above excludes untracked files. These include
+      // patterns select the file types that should carry license headers.
       include "**/*.xml"
       include "**/*.md"
       include "**/*.py"
       include "**/*.sh"
       include "**/*.bat"
-      // include build.gradle but exclude .gradle directories
       include "**/*.gradle"
-      exclude ".gradle/**"
-
-      // Exclude Eclipse
-      exclude ".metadata"
-      exclude ".settings"
 
       // Include selected patterns from any source folders. We could make this
       // relative to source sets but it seems to be of little value - all our 
source sets
@@ -85,10 +159,6 @@ allprojects {
         case ":":
           include "gradlew"
           include "gradlew.bat"
-          exclude ".idea"
-          exclude ".muse"
-          exclude ".git"
-
           // Exclude github stuff (templates, workflows).
           exclude ".github"
 
@@ -132,11 +202,7 @@ allprojects {
           break
 
         case ":solr:core":
-          exclude "**/htmlStripReaderTest.html"
-          exclude "src/resources/*.json"
           exclude "src/resources/*.xml"
-          exclude "src/test-files/**/*.csv"
-          exclude "src/test-files/**/*.json"
           exclude "src/test-files/**/*.aff"
           exclude "src/test-files/**/*.dic"
           exclude "src/test-files/**/*.conf"
@@ -159,7 +225,6 @@ allprojects {
           include "*.yml"
           include "**/*.adoc"
           exclude "ui-src/**"
-          exclude "node_modules/**"
           break
 
         case ":solr:docker":
@@ -172,7 +237,6 @@ allprojects {
           break
 
         case ":solr:solrj":
-          exclude "src/**/*.json"
           exclude "src/test-files/**/*.cfg"
           exclude "src/test-files/**/*.xml"
           break
@@ -269,11 +333,11 @@ class RatTask extends DefaultTask {
   }
 
   def printUnknownFiles(File reportFile) {
-    def ratXml = new XmlParser().parse(reportFile)
+    def ratXml = new XmlSlurper().parse(reportFile)
     def errors = []
     ratXml.resource.each {resource ->
-      if (resource.'license-approval'.@name[0] == "false") {
-        errors << "Unknown license: ${resource.@name}"
+      if (resource.'license-approval'[email protected]() == "false") {
+        errors << "Unknown license: ${[email protected]()}"
       }
     }
     def checkProp = "validation.rat.failOnError"

Reply via email to