This is an automated email from the ASF dual-hosted git repository.

pottlinger pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/creadur-rat.git


The following commit(s) were added to refs/heads/master by this push:
     new d1dd3d81 RAT-512: Detect PDF files as binary and not standard files as 
they do not contain licenses
d1dd3d81 is described below

commit d1dd3d8196cdb08e0352d62bb21aea2006ade27b
Author: P. Ottlinger <[email protected]>
AuthorDate: Mon Dec 22 01:00:31 2025 +0100

    RAT-512: Detect PDF files as binary and not standard files as they do not 
contain licenses
---
 .../main/java/org/apache/rat/analysis/TikaProcessor.java    |   2 +-
 apache-rat-core/src/test/resources/tikaFiles/README.md      |   2 +-
 .../test/resources/tikaFiles/{standard => binary}/Image.pdf | Bin
 src/changes/changes.xml                                     |   3 +++
 4 files changed, 5 insertions(+), 2 deletions(-)

diff --git 
a/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java 
b/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java
index 693af5bf..6e4f6a45 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java
@@ -62,7 +62,7 @@ public final class TikaProcessor {
         DOCUMENT_TYPE_MAP.put("application/xhtml+xml", Document.Type.STANDARD);
 
 //        org.apache.tika.parser.pdf.PDFParser", Type.BINARY);
-        DOCUMENT_TYPE_MAP.put("application/pdf", Document.Type.STANDARD);
+        DOCUMENT_TYPE_MAP.put("application/pdf", Document.Type.BINARY);
 //org.apache.tika.parser.pkg.CompressorParser
         DOCUMENT_TYPE_MAP.put("application/zlib", Document.Type.ARCHIVE);
         DOCUMENT_TYPE_MAP.put("application/x-gzip", Document.Type.ARCHIVE);
diff --git a/apache-rat-core/src/test/resources/tikaFiles/README.md 
b/apache-rat-core/src/test/resources/tikaFiles/README.md
index 55e546c5..8fe54679 100644
--- a/apache-rat-core/src/test/resources/tikaFiles/README.md
+++ b/apache-rat-core/src/test/resources/tikaFiles/README.md
@@ -8,4 +8,4 @@ the `notice` subdirectory contains files that are NOTICE types
 
 the `archive` subdirectory contains files that are ARCHIVE types.
 
-The `TikeProcessorTest.testTikaFiles()` automatically runs against the files 
in the directories.  To add a new file to test just place it in the proper 
directory.
+The `TikeProcessorTest.testTikaFiles()` automatically runs against the files 
in the directories. To add a new file to test just place it in the proper 
directory.
diff --git a/apache-rat-core/src/test/resources/tikaFiles/standard/Image.pdf 
b/apache-rat-core/src/test/resources/tikaFiles/binary/Image.pdf
similarity index 100%
rename from apache-rat-core/src/test/resources/tikaFiles/standard/Image.pdf
rename to apache-rat-core/src/test/resources/tikaFiles/binary/Image.pdf
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index c706c583..6d93ce2e 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -68,6 +68,9 @@ in order to be properly linked in site reports.
     </release>
     -->
     <release version="1.0.0" date="xxxx-yy-zz" description="Current SNAPSHOT - 
release to be done">
+      <action issue="RAT-512" type="fix" dev="pottlinger" due-to="Niels 
Basjes">
+        Bugfix to mark PDF files as binary instead of standard files as they 
do not contain licenses.
+      </action>
       <action issue="RAT-524" type="add" dev="claudenw">
         Fixes case-sensitive detection time of underlying file system and 
removed MAVEN StandardCollection from default Maven processing to improve 
overall processing time.
       </action>

Reply via email to