(creadur-rat) branch master updated: RAT-473: Update documentation (#472)

claude Mon, 02 Jun 2025 00:19:25 -0700

This is an automated email from the ASF dual-hosted git repository.

claude pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/creadur-rat.git



The following commit(s) were added to refs/heads/master by this push:
     new 6263fd82 RAT-473: Update documentation (#472)
6263fd82 is described below

commit 6263fd8234493ff9cd8210fb797ef95abb31c278
Author: Claude Warren <[email protected]>
AuthorDate: Mon Jun 2 08:19:14 2025 +0100

    RAT-473: Update documentation (#472)
    
    Updated documentation to reflect file processor changes and enhancements.
    
    
    Co-authored-by: P. Ottlinger <[email protected]>
---
 .../AbstractFileProcessorBuilder.java              |   4 +-
 .../apache/rat/document/DocumentNameMatcher.java   |  35 ++++---
 apache-rat-core/src/site/apt/FileNamePattern.apt   |  29 ------
 .../markdown/development/write_file_processor.md   | 113 ++++++++++++++++++---
 4 files changed, 124 insertions(+), 57 deletions(-)

diff --git 
a/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/fileProcessors/AbstractFileProcessorBuilder.java
 
b/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/fileProcessors/AbstractFileProcessorBuilder.java
index 4f526261..4639983b 100644
--- 
a/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/fileProcessors/AbstractFileProcessorBuilder.java
+++ 
b/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/fileProcessors/AbstractFileProcessorBuilder.java
@@ -46,11 +46,11 @@ import org.apache.rat.document.DocumentNameMatcher;
 /**
  * Creates a List of {@link MatcherSet}s that represent the inclusions and 
exclusions of this file processor.
  * <p>
- *     By default this processor:
+ *     By default, this processor:
  * </p>
  * <ul>
  *     <li>Creates a list of levels that correspond to the depth of the 
directories where the specific include/exclude file is located.
- *     Directory depth is relative to the initially discovered include/exclude 
file.</li>
+ *     Directory depth is normally relative to the initially discovered 
include/exclude file.</li>
  *     <li>A MatcherSet is created for each include/exclude file located, and 
the MatcherSet is added to the proper level.</li>
  *     <li>During the build:
  *     <ul>
diff --git 
a/apache-rat-core/src/main/java/org/apache/rat/document/DocumentNameMatcher.java
 
b/apache-rat-core/src/main/java/org/apache/rat/document/DocumentNameMatcher.java
index 7aee5f9a..61412127 100644
--- 
a/apache-rat-core/src/main/java/org/apache/rat/document/DocumentNameMatcher.java
+++ 
b/apache-rat-core/src/main/java/org/apache/rat/document/DocumentNameMatcher.java
@@ -65,7 +65,7 @@ public final class DocumentNameMatcher {
     public DocumentNameMatcher(final String name, final 
Predicate<DocumentName> predicate) {
         this.name = name;
         this.predicate = predicate;
-        this.isCollection = predicate instanceof CollectionPredicate;
+        this.isCollection = predicate instanceof CompoundPredicate;
     }
 
     /**
@@ -103,17 +103,24 @@ public final class DocumentNameMatcher {
     }
 
     /**
-     * Constructs a DocumentNameMatcher from a name and a DocumentName 
predicate.
+     * Constructs a DocumentNameMatcher from a name and a MatcherPatterns 
object.
      * @param name the name of the matcher.
      * @param matchers fully specified matchers.
      */
     public DocumentNameMatcher(final String name, final MatchPatterns 
matchers) {
-        this(name, new CollectionPredicate() {
+        this(name, new CompoundPredicate() {
             @Override
             public Iterable<DocumentNameMatcher> getMatchers() {
                 final List<DocumentNameMatcher> result = new ArrayList<>();
                 matchers.patterns().forEach(p -> result.add(new 
DocumentNameMatcher(p.source(),
-                        (Predicate<DocumentName>) x -> MatchPatterns.from("/", 
p.source()).matches(x.getName(), x.isCaseSensitive()))));
+                        new Predicate<DocumentName>() {
+                            private final MatchPatterns patterns = 
MatchPatterns.from("/", p.source());
+
+                            @Override
+                            public boolean test(final DocumentName 
documentName) {
+                                return 
patterns.matches(documentName.getName(), documentName.isCaseSensitive());
+                            }
+                        })));
                 return result;
             }
 
@@ -302,7 +309,7 @@ public final class DocumentNameMatcher {
         }
         List<DocumentNameMatcher> workingSet = Arrays.asList(includes, 
excludes);
         return new DocumentNameMatcher(format("matcherSet(%s)", 
join(workingSet)),
-                new DefaultCollectionPredicate(workingSet) {
+                new DefaultCompoundPredicate(workingSet) {
                     @Override
                     public boolean test(final DocumentName documentName) {
                         if (includes.matches(documentName)) {
@@ -396,13 +403,13 @@ public final class DocumentNameMatcher {
     /**
      * A marker interface to indicate this predicate contains a collection of 
matchers.
      */
-    interface CollectionPredicate extends Predicate<DocumentName> {
+    interface CompoundPredicate extends Predicate<DocumentName> {
         Iterable<DocumentNameMatcher> getMatchers();
     }
     /**
-     * A {@link CollectionPredicate} implementation.
+     * A {@link CompoundPredicate} implementation.
      */
-    abstract static class DefaultCollectionPredicate implements 
CollectionPredicate {
+    abstract static class DefaultCompoundPredicate implements 
CompoundPredicate {
         /** The collection for matchers that make up this predicate */
         private final Iterable<DocumentNameMatcher> matchers;
 
@@ -410,7 +417,7 @@ public final class DocumentNameMatcher {
          * Constructs a collection predicate from the collection of matchers.
          * @param matchers the collection of matchers to use.
          */
-        protected DefaultCollectionPredicate(final 
Iterable<DocumentNameMatcher> matchers) {
+        protected DefaultCompoundPredicate(final Iterable<DocumentNameMatcher> 
matchers) {
             this.matchers = matchers;
         }
 
@@ -436,7 +443,7 @@ public final class DocumentNameMatcher {
      * An implementation of "and" logic across a collection of 
DocumentNameMatchers.
      */
     // package private for testing access
-    static class And extends DefaultCollectionPredicate {
+    static class And extends DefaultCompoundPredicate {
         And(final Iterable<DocumentNameMatcher> matchers) {
             super(matchers);
         }
@@ -456,7 +463,7 @@ public final class DocumentNameMatcher {
      * An implementation of "or" logic across a collection of 
DocumentNameMatchers.
      */
     // package private for testing access
-    static class Or extends DefaultCollectionPredicate {
+    static class Or extends DefaultCompoundPredicate {
         Or(final Iterable<DocumentNameMatcher> matchers) {
             super(matchers);
         }
@@ -498,8 +505,8 @@ public final class DocumentNameMatcher {
             return format("%s%s: >>%s<< %s%n%s",
                     fill, matcher.toString(), result,
                     level == 0 ? candidate.getName() : "",
-                    matcher.predicate instanceof CollectionPredicate ?
-                            decompose(level + 1, (CollectionPredicate) 
matcher.predicate, candidate) :
+                    matcher.predicate instanceof CompoundPredicate ?
+                            decompose(level + 1, (CompoundPredicate) 
matcher.predicate, candidate) :
                     String.format("%s%s >>%s<<", createFill(level + 1), 
matcher.predicate.toString(), matcher.predicate.test(candidate)));
         }
 
@@ -509,7 +516,7 @@ public final class DocumentNameMatcher {
             return new String(chars);
         }
 
-        private String decompose(final int level, final CollectionPredicate 
predicate, final DocumentName candidate) {
+        private String decompose(final int level, final CompoundPredicate 
predicate, final DocumentName candidate) {
             List<DecomposeData> result = new ArrayList<>();
 
             for (DocumentNameMatcher nameMatcher : predicate.getMatchers()) {
diff --git a/apache-rat-core/src/site/apt/FileNamePattern.apt 
b/apache-rat-core/src/site/apt/FileNamePattern.apt
deleted file mode 100644
index ca865129..00000000
--- a/apache-rat-core/src/site/apt/FileNamePattern.apt
+++ /dev/null
@@ -1,29 +0,0 @@
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~   Licensed to the Apache Software Foundation (ASF) under one or more
-~~   contributor license agreements.  See the NOTICE file distributed with
-~~   this work for additional information regarding copyright ownership.
-~~   The ASF licenses this file to You under the Apache License, Version 2.0
-~~   (the "License"); you may not use this file except in compliance with
-~~   the License.  You may obtain a copy of the License at
-~~
-~~       http://www.apache.org/licenses/LICENSE-2.0
-~~
-~~   Unless required by applicable law or agreed to in writing, software
-~~   distributed under the License is distributed on an "AS IS" BASIS,
-~~   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-~~   See the License for the specific language governing permissions and
-~~   limitations under the License.
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-*-------------*-------------*-------------*-------------*-------------*-------------*-------------*-------------*-------------*-------------*
-|             | foo/x/y     | b/foo       | b/foo/x     | a/b/foo     | foo    
     | a/b/foo/x/y | a/b/foo/x   | foo/x       | b/foo/x/y   |
-| foo         | F           | F           | F           | F           | T      
     | F           | F           | F           | F           |
-| foo/*       | F           | F           | F           | F           | F      
     | F           | F           | T           | F           |
-| foo/**      | T           | F           | F           | F           | T      
     | F           | F           | T           | F           |
-| */foo       | F           | T           | F           | F           | F      
     | F           | F           | F           | F           |
-| */foo/*     | F           | F           | T           | F           | F      
     | F           | F           | F           | F           |
-| */foo/**    | F           | T           | T           | F           | F      
     | F           | F           | F           | T           |
-| **/foo      | F           | T           | F           | T           | T      
     | F           | F           | F           | F           |
-| **/foo/*    | F           | F           | T           | F           | F      
     | F           | T           | T           | F           |
-| **/foo/**   | T           | T           | T           | T           | T      
     | T           | T           | T           | T           |
-*-------------*-------------*-------------*-------------*-------------*-------------*-------------*-------------*-------------*-------------*
diff --git 
a/apache-rat-core/src/site/markdown/development/write_file_processor.md 
b/apache-rat-core/src/site/markdown/development/write_file_processor.md
index 11fb3e62..5079d909 100644
--- a/apache-rat-core/src/site/markdown/development/write_file_processor.md
+++ b/apache-rat-core/src/site/markdown/development/write_file_processor.md
@@ -19,30 +19,119 @@
 > ## Required Knowledge
 > Knowledge of the following topics is recommended:
 >  * [DocumentName](document_name.html): The DocumentName class that is used 
 > to identify files.
->  * Rat [Exclude Expressions](../exclusion_expression.html): The expressions 
that are used to match file names
+>  * RAT [Exclude Expressions](../exclusion_expression.html): The expressions 
that are used to match file names.
 
+A file processor is a construct that locates files with a specific name in the 
directory tree and reads from them file patterns that are translated into RAT 
include or exclude expressions. These files are normally found in the file 
directory tree and their restrictions normally only applies to files at the 
same directory level as the processed file or below. When these files are 
processed the result is a MatcherSet indicating the files to be explicitly 
included and the files to be exclud [...]
 
-A FileProcessor is a module that locates files with a specific name in the 
directory tree and reads from them file patterns that are translated into Rat 
exclude expressions. These files are normally found in the file directory tree 
and their restrictions normally only applies to files at the same directory 
level as the processed file or below.  This type of file is implemented by the 
`org.apache.rat.config.exclusion.MatcherSet.Builder`.
+## MatcherSet
 
-The `DescendingFileProcessor` takes a file name and one or more comment 
prefixes as in the constructor.  The file name is normally a file that is 
generally hidden on Linux systems like ".gitignore" or ".hgignore".  The 
`DescendingFileProcessor` will scan the directories looking for files with the 
specified name.  If one is found it is passed to the `process(DocumentName)` 
method which reads the document and returns a list of exclude expressions.
+The matcher set comprises two collections of patterns, one to include and one 
to exclude. These collections are implemented as DocumentNameMatcher instances. 
The DocumentNameMatcher patterns are fully qualified to the directory in which 
the document specified by the DocumentName is found.
 
-Classes that extend the `DescendingFileProcessor` have two main extension 
points: `modifyEntry(DocumentName, String)` and `process(DocumentName)`. 
+The order of the Match patterns are retained. Multiple MatcherSets may be 
combined into a single MatcherSet.
 
-## modifyEntry
+## DocumentNameMatcher
 
-The `modifyEntry` method accepts the source `DocumentName` and a non-comment 
string.  It is expected to process the string and return an exclude expression 
or null if the line does not result in an exclude expression.  The default 
implementation simply returns the string argument.
+The document name matcher is, as the name says, used to determine if a 
document name is matched. It comprises a `Predicate` to match the file name, 
the name of the DocumentNameMatcher and a flag to indicate if the matcher is a 
collection of matchers.
 
-An example of `modifyEntry` is found in the `BazaarIgnoreProcessor` where 
lines that start with "RE:" are regular expressions and all other lines are 
standard exclude patterns.  The `BazaarIgnoreProcessor.modifyEntry` method 
converts "RE:" prefixed strings into the standard exclude regular expression 
string.
+The name is used to provide feedback to identify where the restriction comes 
from. For example the pattern "/**/foo.txt" may have the pattern as the name of 
the DocumentNameMatcher while a DocumentNameMatcher of exclusions generated by 
an exclude file called `/MyExcludeFile` may be called "exluded /MyExcludeFile".
 
-## process
+Multiple DocumentNameMatchers may be combined together using the 
`DocumentNameMatcher.Or` or `DocumentNameMatcher.And` classes. Additionally, 
DocumentNameMatchers may be negated by use of the `DocumentNameMatcher.Not` 
class.
 
-In many cases the process method does not need to be modified.  In general the 
process method:
+## AbstractFileProcessorBuilder
+
+In many cases a file processor should process multiple files in the source 
tree. For example the `.gitignore` or `.hgignore` files. To implement a file 
processor that performs a walk down the source tree the 
`AbstractFileProcessorBuilder` is used.
+
+The `AbstractFileProcessorBuilder` constructor takes a file name, one or more 
comment prefixes, and a flag to indicate whether the file name should be listed 
in the exclude list.  The file name normally is a file that is generally hidden 
on Linux systems like ".gitignore" or ".hgignore".  The 
`AbstractFileProcessorBuilder` will scan the directories looking for files with 
the specified name. If one is found it is passed to the `process(DocumentName)` 
method which reads the document and re [...]
+
+Classes that extend the `AbstractFileProcessorBuilder` have two main extension 
points: `modifyEntry(DocumentName, String)` and `process(DocumentName)`.
+
+### Extension Points
+#### modifyEntry
+
+The `modifyEntry` method accepts the source `DocumentName` and a non-comment 
string. It is expected to process the string and return an exclude expression 
or null if the line does not result in an exclude expression. The default 
implementation simply returns the string argument.
+
+An example of `modifyEntry` is found in the `BazaarIgnoreBuilder` where lines 
that start with "RE:" are regular expressions and all other lines are standard 
exclude patterns. The `BazaarIgnoreBuilder.modifyEntry` method converts "RE:" 
prefixed strings into the standard exclude regular expression string.
+
+#### process
+
+In many cases the process method does not need to be modified. In general the 
process method:
  * Opens a File on the `DocumentName`
  * Reads each line in the file
  * Calls the modifyEntry on the line.
- * if the line is not null:
+ * If the line is not null:
    * Uses the `FileProcessor.localizePattern()` to create a DocumentName for 
the pattern with the baseName specified as the name of the file being read.
    * Stores the new document name in the list of names being returned.
  * Repeats until all the lines in the input file have been read.
- 
-Classes that override the `process` method generally do so because they have 
some special cases.  For example the `GitFileProcessor` has some specific rules 
about when to add wildcard paths and when the paths are literal  So a special 
process is required.
+
+Classes that override the `process` method generally do so because they have 
some special cases. For example the `GitIgnoreBuilder` has some specific rules 
about when to add wildcard paths and when the paths are literal. Thus a special 
process is required.
+
+## Theory of Operation
+
+The AbstractFileProcessorBuilder creates MatcherSets for each instance of the 
target file it finds in the source tree. Those MatcherSets are organized into 
levels based on how far down the tree the target file is. MatcherSets generated 
from files in the root of the tree are at level zero while files found in a 
subdirectory of root are are level 1, and subdirectories of subdirectories of 
root are at level 2 and so on.
+
+The builder constructs a list of MatcherSets with the MatcherSets from the 
deepest level combined followed by the MatcherSets from the next deepest level 
and so on to the shallowest level. This ensures that later files override 
earlier files.
+
+If files outside the source tree need to be processed they will need to 
override the `process` method to add the processed files at the appropriate 
level. An example of this can be seen in the `GitIgnoreBuilder` code where a 
global ignore file is added at level -1 because it must be processed after all 
the explicit includes and excludes found in the source tree.
+
+## Debugging
+
+Debugging a DocumentNameMatcher might be difficult due to the nested Predicate 
nature of the structure. However, the `decompose()` method provides a view into 
the inner operation of the class without having to execute a stepwise debugging 
session.
+
+Assuming there is a candidate document name that needs to be checked the 
following code block will output the call tree of the DocumentNameMatcher and 
show exactly what the result of each test is.
+
+```
+    DocumentNameMatcher matcher = ...;
+    DocumentName candidate = DocumentName.builder()
+            .setName(dirName+"/dir1/file1.log")
+            .setBaseName(dirName).build();
+    System.out.println("Decomposition for " + candidate);
+    matcher.decompose(candidate).forEach(System.out::println);
+```
+
+The result will list the name of the test, the result of the test, the name of 
the document being tested, and the predicate being executed. If the predicate 
is a CompoundPredicate then each of the matchers from the CompoundPredicate 
will be decomposed as well. The result is a display of all the predicates and 
an indication of which one, if any, fired.
+
+## Examples
+
+All the examples below use `/testName` as the candidate name to match.
+
+### FileFilter
+
+A DocumentNameMatcher created as: `DocumentNameMatcher matcher1 = new 
DocumentNameMatcher("FileFilterTest", new NameFileFilter("File.name"));`
+
+will produce:
+
+```
+FileFilterTest: >>false<< /testName
+  NameFileFilter(File.name) >>false<<
+  ```
+
+### Multiple patterns
+
+A DocumentNameMatcher created as: `DocumentNameMatcher matcher2 = new 
DocumentNameMatcher("MatchPatternsTest", MatchPatterns.from("/", "**/test1*", 
"**/*Name"));`
+
+will produce:
+
+```
+MatchPatternsTest: >>true<< /testName
+  **/test1*: >>false<<
+    
org.apache.rat.document.DocumentNameMatcher$1$$Lambda/0x00007f0c3c141f58@465232e9
 >>false<<
+  **/*Name: >>true<<
+    
org.apache.rat.document.DocumentNameMatcher$1$$Lambda/0x00007f0c3c141f58@798162bc
 >>true<<
+```
+
+### Combined patterns
+
+If the above 2 patterns are combined into a single DocumentNameMatcher as: 
`DocumentNameMatcher.matcherSet(matcher1, matcher2);`
+
+it will produce:
+
+```
+matcherSet(FileFilterTest, MatchPatternsTest): >>false<< /testName
+  FileFilterTest: >>false<<
+    NameFileFilter(File.name) >>false<<
+  MatchPatternsTest: >>true<<
+    **/test1*: >>false<<
+      
org.apache.rat.document.DocumentNameMatcher$1$$Lambda/0x00007f0c3c141f58@6f36c2f0
 >>false<<
+    **/*Name: >>true<<
+      
org.apache.rat.document.DocumentNameMatcher$1$$Lambda/0x00007f0c3c141f58@f58853c
 >>true<<
+```

(creadur-rat) branch master updated: RAT-473: Update documentation (#472)

Reply via email to