This is an automated email from the ASF dual-hosted git repository.

lfcnassif pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new ed1c86a52 TIKA-3771: remove eml magic too common causing false 
positives
ed1c86a52 is described below

commit ed1c86a52d8e07d0d57decfe82ed73a90fb57c8e
Author: Luis Nassif <[email protected]>
AuthorDate: Sun May 22 18:15:43 2022 -0300

    TIKA-3771: remove eml magic too common causing false positives
---
 .../resources/org/apache/tika/mime/tika-mimetypes.xml     |   1 -
 .../test/java/org/apache/tika/mime/MimeDetectionTest.java |   8 ++++++++
 .../resources/org/apache/tika/mime/test-pngNotEml.bin     | Bin 0 -> 938 bytes
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git 
a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 43d7820d3..db7359608 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -6457,7 +6457,6 @@
         <match value="\nUser-Agent:" type="string" offset="0:1024"/>
         <match value="\nX-Mailer:" type="string" offset="0:1024"/>
         <match value="\nX-Originating-IP:" type="stringignorecase" 
offset="0:1024"/>
-        <match value="\nX-" type="string" offset="0:1024"/>
         <match value="\nDKIM-" type="string" offset="0:1024"/>
         <match value="\nARC-" type="string" offset="0:1024"/>        
       </match>
diff --git 
a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java 
b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
index 2c1a71ebd..690da4f29 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
@@ -260,4 +260,12 @@ public class MimeDetectionTest {
         metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, 
"testingTESTINGtesting");
         assertEquals(helloXType, MIME_TYPES.detect(new 
ByteArrayInputStream(helloWorld), metadata));
     }
+
+    /**
+     * Test for TIKA-3771.
+     */
+    @Test
+    public void testPNGWithSomeEmlHeaders() throws IOException {
+        testFile("image/png", "test-pngNotEml.bin");
+    }
 }
diff --git 
a/tika-core/src/test/resources/org/apache/tika/mime/test-pngNotEml.bin 
b/tika-core/src/test/resources/org/apache/tika/mime/test-pngNotEml.bin
new file mode 100644
index 000000000..9fcd031ab
Binary files /dev/null and 
b/tika-core/src/test/resources/org/apache/tika/mime/test-pngNotEml.bin differ

Reply via email to