dsmiley commented on code in PR #4499:
URL: https://github.com/apache/solr/pull/4499#discussion_r3362614365
##########
gradle/validation/rat-sources.gradle:
##########
@@ -28,15 +39,88 @@ configure(rootProject) {
}
}
+/**
+ * Reads all file paths from the git index (DirCache) once and caches the
result.
+ * Returns null if this is not a git repository or the index cannot be read.
+ * Paths use forward slashes and are relative to the repository root.
+ */
+Set<String> loadGitTrackedFiles() {
+ if (rootProject.ext.has('ratGitIndexLoaded')) return
rootProject.ext.ratGitTrackedFiles
+ rootProject.ext.ratGitIndexLoaded = true
+ rootProject.ext.ratGitTrackedFiles = null
+
+ def dotGit = new File(rootProject.projectDir, ".git")
+ if (dotGit.isFile()) {
+ // git worktrees use a .git file — jgit does not fully support them
+ logger.warn("WARNING: git worktrees are not supported by jgit — RAT
git-tracking filter disabled.")
+ return null
+ }
+ if (!dotGit.isDirectory()) {
+ return null // not a git repository
+ }
+
+ def repository = null
+ try {
+ repository = new FileRepositoryBuilder()
+ .setWorkTree(rootProject.projectDir)
+ .setMustExist(true)
+ .build()
+ def tracked = new HashSet<String>()
+ def dirCache = repository.readDirCache()
+ for (int i = 0; i < dirCache.getEntryCount(); i++) {
+ tracked.add(dirCache.getEntry(i).getPathString())
+ }
+ rootProject.ext.ratGitTrackedFiles = tracked
+ return tracked
+ } catch (Exception e) {
+ logger.warn("WARNING: Could not read git index for RAT check:
${e.message}")
+ return null
+ } finally {
+ if (repository != null) repository.close()
+ }
+}
+
+/**
+ * Returns the set of git-tracked file paths relative to the given project
directory,
+ * or null if git tracking is unavailable.
+ */
+Set<String> gitTrackedFiles(File dir) {
+ def allFiles = loadGitTrackedFiles()
+ if (allFiles == null) return null
+
+ def prefix = rootProject.projectDir.toPath().relativize(dir.toPath())
+ .toString().replace('\\', '/')
+ if (prefix.isEmpty()) return allFiles
+
+ prefix += "/"
+ def result = new HashSet<String>()
+ for (String path : allFiles) {
+ if (path.startsWith(prefix)) {
+ result.add(path.substring(prefix.length()))
+ }
+ }
+ return result
+}
+
// Configure the rat validation task and all scanned directories.
allprojects {
- task("rat", type: RatTask) {
+ tasks.register("rat", RatTask) {
group = 'Verification'
description = 'Runs Apache Rat checks.'
def defaultScanFileTree = project.fileTree(projectDir, {
+ // Only check files tracked by git — skip untracked/gitignored files
+ // (IDE artifacts, AI assistant configs, etc.)
+ def trackedFiles = gitTrackedFiles(projectDir)
+ if (trackedFiles != null) {
+ exclude { element ->
+ // Allow directories through (they are just containers), exclude
untracked files
+ !element.isDirectory() &&
!trackedFiles.contains(element.relativePath.pathString)
+ }
+ }
+
// Don't check under the project's build folder.
- exclude project.buildDir.name
+ exclude project.layout.buildDirectory.get().asFile.name
Review Comment:
this should be a redundant check now
##########
gradle/validation/rat-sources.gradle:
##########
Review Comment:
.gradle isn't git-tracked
##########
gradle/validation/rat-sources.gradle:
##########
@@ -47,8 +131,8 @@ allprojects {
// Don't check any of the subprojects - they have their own rat tasks.
exclude subprojects.collect {it.projectDir.name}
- // At the module scope we only check selected file patterns as folks
have various .gitignore-d resources
- // generated by IDEs, etc.
+ // The git index filter above excludes untracked files. These patterns
+ // further narrow the scan to file types that should carry license
headers.
Review Comment:
how does "include" accomplish narrow-ing? ;-)
##########
gradle/validation/rat-sources.gradle:
##########
Review Comment:
We should no longer have to exclude .idea .muse .git
##########
gradle/validation/rat-sources.gradle:
##########
Review Comment:
I assume we can now omit eclipse config files (not git tracked)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]