This is an automated email from the ASF dual-hosted git repository.
claude pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/creadur-rat.git
The following commit(s) were added to refs/heads/master by this push:
new 6263fd82 RAT-473: Update documentation (#472)
6263fd82 is described below
commit 6263fd8234493ff9cd8210fb797ef95abb31c278
Author: Claude Warren <[email protected]>
AuthorDate: Mon Jun 2 08:19:14 2025 +0100
RAT-473: Update documentation (#472)
Updated documentation to reflect file processor changes and enhancements.
Co-authored-by: P. Ottlinger <[email protected]>
---
.../AbstractFileProcessorBuilder.java | 4 +-
.../apache/rat/document/DocumentNameMatcher.java | 35 ++++---
apache-rat-core/src/site/apt/FileNamePattern.apt | 29 ------
.../markdown/development/write_file_processor.md | 113 ++++++++++++++++++---
4 files changed, 124 insertions(+), 57 deletions(-)
diff --git
a/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/fileProcessors/AbstractFileProcessorBuilder.java
b/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/fileProcessors/AbstractFileProcessorBuilder.java
index 4f526261..4639983b 100644
---
a/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/fileProcessors/AbstractFileProcessorBuilder.java
+++
b/apache-rat-core/src/main/java/org/apache/rat/config/exclusion/fileProcessors/AbstractFileProcessorBuilder.java
@@ -46,11 +46,11 @@ import org.apache.rat.document.DocumentNameMatcher;
/**
* Creates a List of {@link MatcherSet}s that represent the inclusions and
exclusions of this file processor.
* <p>
- * By default this processor:
+ * By default, this processor:
* </p>
* <ul>
* <li>Creates a list of levels that correspond to the depth of the
directories where the specific include/exclude file is located.
- * Directory depth is relative to the initially discovered include/exclude
file.</li>
+ * Directory depth is normally relative to the initially discovered
include/exclude file.</li>
* <li>A MatcherSet is created for each include/exclude file located, and
the MatcherSet is added to the proper level.</li>
* <li>During the build:
* <ul>
diff --git
a/apache-rat-core/src/main/java/org/apache/rat/document/DocumentNameMatcher.java
b/apache-rat-core/src/main/java/org/apache/rat/document/DocumentNameMatcher.java
index 7aee5f9a..61412127 100644
---
a/apache-rat-core/src/main/java/org/apache/rat/document/DocumentNameMatcher.java
+++
b/apache-rat-core/src/main/java/org/apache/rat/document/DocumentNameMatcher.java
@@ -65,7 +65,7 @@ public final class DocumentNameMatcher {
public DocumentNameMatcher(final String name, final
Predicate<DocumentName> predicate) {
this.name = name;
this.predicate = predicate;
- this.isCollection = predicate instanceof CollectionPredicate;
+ this.isCollection = predicate instanceof CompoundPredicate;
}
/**
@@ -103,17 +103,24 @@ public final class DocumentNameMatcher {
}
/**
- * Constructs a DocumentNameMatcher from a name and a DocumentName
predicate.
+ * Constructs a DocumentNameMatcher from a name and a MatcherPatterns
object.
* @param name the name of the matcher.
* @param matchers fully specified matchers.
*/
public DocumentNameMatcher(final String name, final MatchPatterns
matchers) {
- this(name, new CollectionPredicate() {
+ this(name, new CompoundPredicate() {
@Override
public Iterable<DocumentNameMatcher> getMatchers() {
final List<DocumentNameMatcher> result = new ArrayList<>();
matchers.patterns().forEach(p -> result.add(new
DocumentNameMatcher(p.source(),
- (Predicate<DocumentName>) x -> MatchPatterns.from("/",
p.source()).matches(x.getName(), x.isCaseSensitive()))));
+ new Predicate<DocumentName>() {
+ private final MatchPatterns patterns =
MatchPatterns.from("/", p.source());
+
+ @Override
+ public boolean test(final DocumentName
documentName) {
+ return
patterns.matches(documentName.getName(), documentName.isCaseSensitive());
+ }
+ })));
return result;
}
@@ -302,7 +309,7 @@ public final class DocumentNameMatcher {
}
List<DocumentNameMatcher> workingSet = Arrays.asList(includes,
excludes);
return new DocumentNameMatcher(format("matcherSet(%s)",
join(workingSet)),
- new DefaultCollectionPredicate(workingSet) {
+ new DefaultCompoundPredicate(workingSet) {
@Override
public boolean test(final DocumentName documentName) {
if (includes.matches(documentName)) {
@@ -396,13 +403,13 @@ public final class DocumentNameMatcher {
/**
* A marker interface to indicate this predicate contains a collection of
matchers.
*/
- interface CollectionPredicate extends Predicate<DocumentName> {
+ interface CompoundPredicate extends Predicate<DocumentName> {
Iterable<DocumentNameMatcher> getMatchers();
}
/**
- * A {@link CollectionPredicate} implementation.
+ * A {@link CompoundPredicate} implementation.
*/
- abstract static class DefaultCollectionPredicate implements
CollectionPredicate {
+ abstract static class DefaultCompoundPredicate implements
CompoundPredicate {
/** The collection for matchers that make up this predicate */
private final Iterable<DocumentNameMatcher> matchers;
@@ -410,7 +417,7 @@ public final class DocumentNameMatcher {
* Constructs a collection predicate from the collection of matchers.
* @param matchers the collection of matchers to use.
*/
- protected DefaultCollectionPredicate(final
Iterable<DocumentNameMatcher> matchers) {
+ protected DefaultCompoundPredicate(final Iterable<DocumentNameMatcher>
matchers) {
this.matchers = matchers;
}
@@ -436,7 +443,7 @@ public final class DocumentNameMatcher {
* An implementation of "and" logic across a collection of
DocumentNameMatchers.
*/
// package private for testing access
- static class And extends DefaultCollectionPredicate {
+ static class And extends DefaultCompoundPredicate {
And(final Iterable<DocumentNameMatcher> matchers) {
super(matchers);
}
@@ -456,7 +463,7 @@ public final class DocumentNameMatcher {
* An implementation of "or" logic across a collection of
DocumentNameMatchers.
*/
// package private for testing access
- static class Or extends DefaultCollectionPredicate {
+ static class Or extends DefaultCompoundPredicate {
Or(final Iterable<DocumentNameMatcher> matchers) {
super(matchers);
}
@@ -498,8 +505,8 @@ public final class DocumentNameMatcher {
return format("%s%s: >>%s<< %s%n%s",
fill, matcher.toString(), result,
level == 0 ? candidate.getName() : "",
- matcher.predicate instanceof CollectionPredicate ?
- decompose(level + 1, (CollectionPredicate)
matcher.predicate, candidate) :
+ matcher.predicate instanceof CompoundPredicate ?
+ decompose(level + 1, (CompoundPredicate)
matcher.predicate, candidate) :
String.format("%s%s >>%s<<", createFill(level + 1),
matcher.predicate.toString(), matcher.predicate.test(candidate)));
}
@@ -509,7 +516,7 @@ public final class DocumentNameMatcher {
return new String(chars);
}
- private String decompose(final int level, final CollectionPredicate
predicate, final DocumentName candidate) {
+ private String decompose(final int level, final CompoundPredicate
predicate, final DocumentName candidate) {
List<DecomposeData> result = new ArrayList<>();
for (DocumentNameMatcher nameMatcher : predicate.getMatchers()) {
diff --git a/apache-rat-core/src/site/apt/FileNamePattern.apt
b/apache-rat-core/src/site/apt/FileNamePattern.apt
deleted file mode 100644
index ca865129..00000000
--- a/apache-rat-core/src/site/apt/FileNamePattern.apt
+++ /dev/null
@@ -1,29 +0,0 @@
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~ Licensed to the Apache Software Foundation (ASF) under one or more
-~~ contributor license agreements. See the NOTICE file distributed with
-~~ this work for additional information regarding copyright ownership.
-~~ The ASF licenses this file to You under the Apache License, Version 2.0
-~~ (the "License"); you may not use this file except in compliance with
-~~ the License. You may obtain a copy of the License at
-~~
-~~ http://www.apache.org/licenses/LICENSE-2.0
-~~
-~~ Unless required by applicable law or agreed to in writing, software
-~~ distributed under the License is distributed on an "AS IS" BASIS,
-~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-~~ See the License for the specific language governing permissions and
-~~ limitations under the License.
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-*-------------*-------------*-------------*-------------*-------------*-------------*-------------*-------------*-------------*-------------*
-| | foo/x/y | b/foo | b/foo/x | a/b/foo | foo
| a/b/foo/x/y | a/b/foo/x | foo/x | b/foo/x/y |
-| foo | F | F | F | F | T
| F | F | F | F |
-| foo/* | F | F | F | F | F
| F | F | T | F |
-| foo/** | T | F | F | F | T
| F | F | T | F |
-| */foo | F | T | F | F | F
| F | F | F | F |
-| */foo/* | F | F | T | F | F
| F | F | F | F |
-| */foo/** | F | T | T | F | F
| F | F | F | T |
-| **/foo | F | T | F | T | T
| F | F | F | F |
-| **/foo/* | F | F | T | F | F
| F | T | T | F |
-| **/foo/** | T | T | T | T | T
| T | T | T | T |
-*-------------*-------------*-------------*-------------*-------------*-------------*-------------*-------------*-------------*-------------*
diff --git
a/apache-rat-core/src/site/markdown/development/write_file_processor.md
b/apache-rat-core/src/site/markdown/development/write_file_processor.md
index 11fb3e62..5079d909 100644
--- a/apache-rat-core/src/site/markdown/development/write_file_processor.md
+++ b/apache-rat-core/src/site/markdown/development/write_file_processor.md
@@ -19,30 +19,119 @@
> ## Required Knowledge
> Knowledge of the following topics is recommended:
> * [DocumentName](document_name.html): The DocumentName class that is used
> to identify files.
-> * Rat [Exclude Expressions](../exclusion_expression.html): The expressions
that are used to match file names
+> * RAT [Exclude Expressions](../exclusion_expression.html): The expressions
that are used to match file names.
+A file processor is a construct that locates files with a specific name in the
directory tree and reads from them file patterns that are translated into RAT
include or exclude expressions. These files are normally found in the file
directory tree and their restrictions normally only applies to files at the
same directory level as the processed file or below. When these files are
processed the result is a MatcherSet indicating the files to be explicitly
included and the files to be exclud [...]
-A FileProcessor is a module that locates files with a specific name in the
directory tree and reads from them file patterns that are translated into Rat
exclude expressions. These files are normally found in the file directory tree
and their restrictions normally only applies to files at the same directory
level as the processed file or below. This type of file is implemented by the
`org.apache.rat.config.exclusion.MatcherSet.Builder`.
+## MatcherSet
-The `DescendingFileProcessor` takes a file name and one or more comment
prefixes as in the constructor. The file name is normally a file that is
generally hidden on Linux systems like ".gitignore" or ".hgignore". The
`DescendingFileProcessor` will scan the directories looking for files with the
specified name. If one is found it is passed to the `process(DocumentName)`
method which reads the document and returns a list of exclude expressions.
+The matcher set comprises two collections of patterns, one to include and one
to exclude. These collections are implemented as DocumentNameMatcher instances.
The DocumentNameMatcher patterns are fully qualified to the directory in which
the document specified by the DocumentName is found.
-Classes that extend the `DescendingFileProcessor` have two main extension
points: `modifyEntry(DocumentName, String)` and `process(DocumentName)`.
+The order of the Match patterns are retained. Multiple MatcherSets may be
combined into a single MatcherSet.
-## modifyEntry
+## DocumentNameMatcher
-The `modifyEntry` method accepts the source `DocumentName` and a non-comment
string. It is expected to process the string and return an exclude expression
or null if the line does not result in an exclude expression. The default
implementation simply returns the string argument.
+The document name matcher is, as the name says, used to determine if a
document name is matched. It comprises a `Predicate` to match the file name,
the name of the DocumentNameMatcher and a flag to indicate if the matcher is a
collection of matchers.
-An example of `modifyEntry` is found in the `BazaarIgnoreProcessor` where
lines that start with "RE:" are regular expressions and all other lines are
standard exclude patterns. The `BazaarIgnoreProcessor.modifyEntry` method
converts "RE:" prefixed strings into the standard exclude regular expression
string.
+The name is used to provide feedback to identify where the restriction comes
from. For example the pattern "/**/foo.txt" may have the pattern as the name of
the DocumentNameMatcher while a DocumentNameMatcher of exclusions generated by
an exclude file called `/MyExcludeFile` may be called "exluded /MyExcludeFile".
-## process
+Multiple DocumentNameMatchers may be combined together using the
`DocumentNameMatcher.Or` or `DocumentNameMatcher.And` classes. Additionally,
DocumentNameMatchers may be negated by use of the `DocumentNameMatcher.Not`
class.
-In many cases the process method does not need to be modified. In general the
process method:
+## AbstractFileProcessorBuilder
+
+In many cases a file processor should process multiple files in the source
tree. For example the `.gitignore` or `.hgignore` files. To implement a file
processor that performs a walk down the source tree the
`AbstractFileProcessorBuilder` is used.
+
+The `AbstractFileProcessorBuilder` constructor takes a file name, one or more
comment prefixes, and a flag to indicate whether the file name should be listed
in the exclude list. The file name normally is a file that is generally hidden
on Linux systems like ".gitignore" or ".hgignore". The
`AbstractFileProcessorBuilder` will scan the directories looking for files with
the specified name. If one is found it is passed to the `process(DocumentName)`
method which reads the document and re [...]
+
+Classes that extend the `AbstractFileProcessorBuilder` have two main extension
points: `modifyEntry(DocumentName, String)` and `process(DocumentName)`.
+
+### Extension Points
+#### modifyEntry
+
+The `modifyEntry` method accepts the source `DocumentName` and a non-comment
string. It is expected to process the string and return an exclude expression
or null if the line does not result in an exclude expression. The default
implementation simply returns the string argument.
+
+An example of `modifyEntry` is found in the `BazaarIgnoreBuilder` where lines
that start with "RE:" are regular expressions and all other lines are standard
exclude patterns. The `BazaarIgnoreBuilder.modifyEntry` method converts "RE:"
prefixed strings into the standard exclude regular expression string.
+
+#### process
+
+In many cases the process method does not need to be modified. In general the
process method:
* Opens a File on the `DocumentName`
* Reads each line in the file
* Calls the modifyEntry on the line.
- * if the line is not null:
+ * If the line is not null:
* Uses the `FileProcessor.localizePattern()` to create a DocumentName for
the pattern with the baseName specified as the name of the file being read.
* Stores the new document name in the list of names being returned.
* Repeats until all the lines in the input file have been read.
-
-Classes that override the `process` method generally do so because they have
some special cases. For example the `GitFileProcessor` has some specific rules
about when to add wildcard paths and when the paths are literal So a special
process is required.
+
+Classes that override the `process` method generally do so because they have
some special cases. For example the `GitIgnoreBuilder` has some specific rules
about when to add wildcard paths and when the paths are literal. Thus a special
process is required.
+
+## Theory of Operation
+
+The AbstractFileProcessorBuilder creates MatcherSets for each instance of the
target file it finds in the source tree. Those MatcherSets are organized into
levels based on how far down the tree the target file is. MatcherSets generated
from files in the root of the tree are at level zero while files found in a
subdirectory of root are are level 1, and subdirectories of subdirectories of
root are at level 2 and so on.
+
+The builder constructs a list of MatcherSets with the MatcherSets from the
deepest level combined followed by the MatcherSets from the next deepest level
and so on to the shallowest level. This ensures that later files override
earlier files.
+
+If files outside the source tree need to be processed they will need to
override the `process` method to add the processed files at the appropriate
level. An example of this can be seen in the `GitIgnoreBuilder` code where a
global ignore file is added at level -1 because it must be processed after all
the explicit includes and excludes found in the source tree.
+
+## Debugging
+
+Debugging a DocumentNameMatcher might be difficult due to the nested Predicate
nature of the structure. However, the `decompose()` method provides a view into
the inner operation of the class without having to execute a stepwise debugging
session.
+
+Assuming there is a candidate document name that needs to be checked the
following code block will output the call tree of the DocumentNameMatcher and
show exactly what the result of each test is.
+
+```
+ DocumentNameMatcher matcher = ...;
+ DocumentName candidate = DocumentName.builder()
+ .setName(dirName+"/dir1/file1.log")
+ .setBaseName(dirName).build();
+ System.out.println("Decomposition for " + candidate);
+ matcher.decompose(candidate).forEach(System.out::println);
+```
+
+The result will list the name of the test, the result of the test, the name of
the document being tested, and the predicate being executed. If the predicate
is a CompoundPredicate then each of the matchers from the CompoundPredicate
will be decomposed as well. The result is a display of all the predicates and
an indication of which one, if any, fired.
+
+## Examples
+
+All the examples below use `/testName` as the candidate name to match.
+
+### FileFilter
+
+A DocumentNameMatcher created as: `DocumentNameMatcher matcher1 = new
DocumentNameMatcher("FileFilterTest", new NameFileFilter("File.name"));`
+
+will produce:
+
+```
+FileFilterTest: >>false<< /testName
+ NameFileFilter(File.name) >>false<<
+ ```
+
+### Multiple patterns
+
+A DocumentNameMatcher created as: `DocumentNameMatcher matcher2 = new
DocumentNameMatcher("MatchPatternsTest", MatchPatterns.from("/", "**/test1*",
"**/*Name"));`
+
+will produce:
+
+```
+MatchPatternsTest: >>true<< /testName
+ **/test1*: >>false<<
+
org.apache.rat.document.DocumentNameMatcher$1$$Lambda/0x00007f0c3c141f58@465232e9
>>false<<
+ **/*Name: >>true<<
+
org.apache.rat.document.DocumentNameMatcher$1$$Lambda/0x00007f0c3c141f58@798162bc
>>true<<
+```
+
+### Combined patterns
+
+If the above 2 patterns are combined into a single DocumentNameMatcher as:
`DocumentNameMatcher.matcherSet(matcher1, matcher2);`
+
+it will produce:
+
+```
+matcherSet(FileFilterTest, MatchPatternsTest): >>false<< /testName
+ FileFilterTest: >>false<<
+ NameFileFilter(File.name) >>false<<
+ MatchPatternsTest: >>true<<
+ **/test1*: >>false<<
+
org.apache.rat.document.DocumentNameMatcher$1$$Lambda/0x00007f0c3c141f58@6f36c2f0
>>false<<
+ **/*Name: >>true<<
+
org.apache.rat.document.DocumentNameMatcher$1$$Lambda/0x00007f0c3c141f58@f58853c
>>true<<
+```