This is an automated email from the ASF dual-hosted git repository.
pottlinger pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/creadur-rat.git
The following commit(s) were added to refs/heads/master by this push:
new d1dd3d81 RAT-512: Detect PDF files as binary and not standard files as
they do not contain licenses
d1dd3d81 is described below
commit d1dd3d8196cdb08e0352d62bb21aea2006ade27b
Author: P. Ottlinger <[email protected]>
AuthorDate: Mon Dec 22 01:00:31 2025 +0100
RAT-512: Detect PDF files as binary and not standard files as they do not
contain licenses
---
.../main/java/org/apache/rat/analysis/TikaProcessor.java | 2 +-
apache-rat-core/src/test/resources/tikaFiles/README.md | 2 +-
.../test/resources/tikaFiles/{standard => binary}/Image.pdf | Bin
src/changes/changes.xml | 3 +++
4 files changed, 5 insertions(+), 2 deletions(-)
diff --git
a/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java
b/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java
index 693af5bf..6e4f6a45 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java
@@ -62,7 +62,7 @@ public final class TikaProcessor {
DOCUMENT_TYPE_MAP.put("application/xhtml+xml", Document.Type.STANDARD);
// org.apache.tika.parser.pdf.PDFParser", Type.BINARY);
- DOCUMENT_TYPE_MAP.put("application/pdf", Document.Type.STANDARD);
+ DOCUMENT_TYPE_MAP.put("application/pdf", Document.Type.BINARY);
//org.apache.tika.parser.pkg.CompressorParser
DOCUMENT_TYPE_MAP.put("application/zlib", Document.Type.ARCHIVE);
DOCUMENT_TYPE_MAP.put("application/x-gzip", Document.Type.ARCHIVE);
diff --git a/apache-rat-core/src/test/resources/tikaFiles/README.md
b/apache-rat-core/src/test/resources/tikaFiles/README.md
index 55e546c5..8fe54679 100644
--- a/apache-rat-core/src/test/resources/tikaFiles/README.md
+++ b/apache-rat-core/src/test/resources/tikaFiles/README.md
@@ -8,4 +8,4 @@ the `notice` subdirectory contains files that are NOTICE types
the `archive` subdirectory contains files that are ARCHIVE types.
-The `TikeProcessorTest.testTikaFiles()` automatically runs against the files
in the directories. To add a new file to test just place it in the proper
directory.
+The `TikeProcessorTest.testTikaFiles()` automatically runs against the files
in the directories. To add a new file to test just place it in the proper
directory.
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/Image.pdf
b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.pdf
similarity index 100%
rename from apache-rat-core/src/test/resources/tikaFiles/standard/Image.pdf
rename to apache-rat-core/src/test/resources/tikaFiles/binary/Image.pdf
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index c706c583..6d93ce2e 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -68,6 +68,9 @@ in order to be properly linked in site reports.
</release>
-->
<release version="1.0.0" date="xxxx-yy-zz" description="Current SNAPSHOT -
release to be done">
+ <action issue="RAT-512" type="fix" dev="pottlinger" due-to="Niels
Basjes">
+ Bugfix to mark PDF files as binary instead of standard files as they
do not contain licenses.
+ </action>
<action issue="RAT-524" type="add" dev="claudenw">
Fixes case-sensitive detection time of underlying file system and
removed MAVEN StandardCollection from default Maven processing to improve
overall processing time.
</action>