Repository: tika
Updated Branches:
  refs/heads/master b2821d921 -> de6dbd0a7


TIKA-1971 - add another magic for rfc822


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/e08d0065
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/e08d0065
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/e08d0065

Branch: refs/heads/master
Commit: e08d00654a77267686a112e0665c324ef041c033
Parents: bc0b1f7
Author: tballison <[email protected]>
Authored: Tue May 17 10:05:35 2016 -0400
Committer: tballison <[email protected]>
Committed: Tue May 17 10:05:35 2016 -0400

----------------------------------------------------------------------
 .../org/apache/tika/mime/tika-mimetypes.xml     | 12 +++++++
 .../tika/parser/mail/RFC822ParserTest.java      | 11 +++++++
 .../test-documents/testRFC822_date_utf8         |  8 +++++
 .../resources/test-documents/testRFC822_eml     | 33 ++++++++++++++++++++
 4 files changed, 64 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/e08d0065/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
----------------------------------------------------------------------
diff --git 
a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index d82bd95..11b2c3f 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -679,6 +679,17 @@
     <sub-class-of type="text/plain"/>
   </mime-type>
 
+  <mime-type type="application/x-dbf">
+    <_comment>DBF/dbase 3 File</_comment>
+    <magic priority="100">
+      <match 
value="(?s)^[\x02\x03\x30\x31\x32\x43\x63\x83\x8B\xCB\xF5\xE5\xFB].[\x01-\x0C][\x01-\x1F].{4}(?:.[^\x00]|[\x41-\xFF].)(?:[^\x00\x01].|.[^\x00]).{31}([\x00][^\x00]{0,10})[BCDFGILMNOPQTVWXY@+]"
 type="regex" offset="0"/>
+    </magic>
+    <glob pattern="*.dbf"/>
+    <glob pattern="*.dbase"/>
+    <glob pattern="*.dbase3"/>
+    <sub-class-of type="text/plain"/>
+  </mime-type>
+
   <mime-type type="application/rtx"/>
   <mime-type type="application/samlassertion+xml"/>
   <mime-type type="application/samlmetadata+xml"/>
@@ -5299,6 +5310,7 @@
 
   <mime-type type="message/rfc822">
     <magic priority="50">
+      <match value="Delivered-To:" type="string" offset="0"/>
       <match value="Status:" type="string" offset="0"/>
       <match value="X-Mozilla-Status:" type="string" offset="0"/>
       <match value="X-Mozilla-Status2:" type="string" offset="0"/>

http://git-wip-us.apache.org/repos/asf/tika/blob/e08d0065/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java 
b/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
index a80e544..0e8f613 100644
--- 
a/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
+++ 
b/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
@@ -381,4 +381,15 @@ public class RFC822ParserTest extends TikaTest {
         assertEquals(MediaType.TEXT_HTML, tracker.mediaTypes.get(1));
         assertEquals(MediaType.image("gif"), tracker.mediaTypes.get(2));
     }
+
+    @Test
+    public void testDetection() throws Exception {
+        //test simple text file
+        XMLResult r = getXML("testRFC822_date_utf8");
+        assertEquals("message/rfc822", r.metadata.get(Metadata.CONTENT_TYPE));
+
+        //test without extension
+        r = getXML("testRFC822_eml");
+        assertEquals("message/rfc822", r.metadata.get(Metadata.CONTENT_TYPE));
+    }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/e08d0065/tika-parsers/src/test/resources/test-documents/testRFC822_date_utf8
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/test/resources/test-documents/testRFC822_date_utf8 
b/tika-parsers/src/test/resources/test-documents/testRFC822_date_utf8
new file mode 100644
index 0000000..c3fd0b2
--- /dev/null
+++ b/tika-parsers/src/test/resources/test-documents/testRFC822_date_utf8
@@ -0,0 +1,8 @@
+From: Philipp Steinkrüger <[email protected]>
+Subject: Testemail
+Date: 16 May 2016 at 09:30:32  GMT+1
+To: Philipp Steinkrüger <[email protected]>
+
+
+  GMT+1
+

http://git-wip-us.apache.org/repos/asf/tika/blob/e08d0065/tika-parsers/src/test/resources/test-documents/testRFC822_eml
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/resources/test-documents/testRFC822_eml 
b/tika-parsers/src/test/resources/test-documents/testRFC822_eml
new file mode 100644
index 0000000..5ae32ae
--- /dev/null
+++ b/tika-parsers/src/test/resources/test-documents/testRFC822_eml
@@ -0,0 +1,33 @@
+Delivered-To: [email protected]
+Received: by 10.114.185.229 with SMTP id ff5csp1359148ldc;
+        Mon, 16 May 2016 01:30:35 -0700 (PDT)
+X-Received: by 10.194.165.226 with SMTP id zb2mr30717859wjb.172.1463387435671;
+        Mon, 16 May 2016 01:30:35 -0700 (PDT)
+Return-Path: <[email protected]>
+Received: from smtp-out.rrz.uni-koeln.de (smtp-out.rrz.uni-koeln.de. 
[2a00:a200:0:12::25])
+        by mx.google.com with ESMTPS id tk4si20973899wjb.25.2016.05.16.01.30.35
+        for <[email protected]>
+        (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128);
+        Mon, 16 May 2016 01:30:35 -0700 (PDT)
+Received-SPF: pass (google.com: domain of [email protected] 
designates 2a00:a200:0:12::25 as permitted sender) client-ip=2a00:a200:0:12::25;
+Authentication-Results: mx.google.com;
+       spf=pass (google.com: domain of [email protected] 
designates 2a00:a200:0:12::25 as permitted sender) 
[email protected]
+Received: from smtp-auth.rrz.uni-koeln.de (smtp-auth.rrz.uni-koeln.de 
[IPv6:2a00:a200:0:10::27] (may be forged))
+       by smtp-out.rrz.uni-koeln.de (8.14.4/8.14.4) with ESMTP id 
u4G8UYXw029242
+       for <[email protected]>; Mon, 16 May 2016 10:30:34 
+0200
+Received: from [192.168.1.10] (79-66-108-216.dynamic.dsl.as9105.com 
[79.66.108.216])
+       (authenticated as user altj4 using DIGEST-MD5 bits=0)
+       by smtp-auth.uni-koeln.de (8.13.8/8.13.8) with ESMTP id u4G8UXlP028450
+       (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO)
+       for <[email protected]>; Mon, 16 May 2016 10:30:34 
+0200
+From: =?utf-8?Q?Philipp_Steinkr=C3=BCger?= <[email protected]>
+Content-Type: text/plain
+Content-Transfer-Encoding: 7bit
+Subject: Testemail
+Message-Id: <[email protected]>
+Date: Mon, 16 May 2016 09:30:32 +0100
+To: =?utf-8?Q?Philipp_Steinkr=C3=BCger?= <[email protected]>
+Mime-Version: 1.0 (Mac OS X Mail 9.3 \(3124\))
+X-Mailer: Apple Mail (2.3124)
+X-Scanned-By: MIMEDefang 2.75
+

Reply via email to