Repository: tika
Updated Branches:
  refs/heads/2.x 2a69db7bb -> 6c4c9820f


TIKA-1971: add another mime for RFC882


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/464ad910
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/464ad910
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/464ad910

Branch: refs/heads/2.x
Commit: 464ad9105a2962294e9c967ea6d00c337657d77b
Parents: a882a32
Author: tballison <[email protected]>
Authored: Tue May 17 16:05:29 2016 -0400
Committer: tballison <[email protected]>
Committed: Tue May 17 16:05:29 2016 -0400

----------------------------------------------------------------------
 .../org/apache/tika/mime/tika-mimetypes.xml     |  1 +
 .../tika/parser/mail/RFC822ParserTest.java      | 11 +++++++
 .../test-documents/testRFC822_date_utf8         |  8 +++++
 .../resources/test-documents/testRFC822_eml     | 33 ++++++++++++++++++++
 4 files changed, 53 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/464ad910/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
----------------------------------------------------------------------
diff --git 
a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 3efb2e4..e4a2e17 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -5171,6 +5171,7 @@
 
   <mime-type type="message/rfc822">
     <magic priority="50">
+      <match value="Delivered-To:" type="string" offset="0"/>
       <match value="Status:" type="string" offset="0"/>
       <match value="X-Mozilla-Status:" type="string" offset="0"/>
       <match value="X-Mozilla-Status2:" type="string" offset="0"/>

http://git-wip-us.apache.org/repos/asf/tika/blob/464ad910/tika-parser-modules/tika-parser-web-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
----------------------------------------------------------------------
diff --git 
a/tika-parser-modules/tika-parser-web-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
 
b/tika-parser-modules/tika-parser-web-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
index 2377894..c8c6624 100644
--- 
a/tika-parser-modules/tika-parser-web-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
+++ 
b/tika-parser-modules/tika-parser-web-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
@@ -373,4 +373,15 @@ public class RFC822ParserTest extends TikaTest {
         assertEquals(MediaType.TEXT_HTML, tracker.mediaTypes.get(1));
         assertEquals(MediaType.image("gif"), tracker.mediaTypes.get(2));
     }
+
+    @Test
+    public void testDetection() throws Exception {
+        //test simple text file
+        XMLResult r = getXML("testRFC822_date_utf8");
+        assertEquals("message/rfc822", r.metadata.get(Metadata.CONTENT_TYPE));
+
+        //test without extension
+        r = getXML("testRFC822_eml");
+        assertEquals("message/rfc822", r.metadata.get(Metadata.CONTENT_TYPE));
+    }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/464ad910/tika-test-resources/src/test/resources/test-documents/testRFC822_date_utf8
----------------------------------------------------------------------
diff --git 
a/tika-test-resources/src/test/resources/test-documents/testRFC822_date_utf8 
b/tika-test-resources/src/test/resources/test-documents/testRFC822_date_utf8
new file mode 100644
index 0000000..c3fd0b2
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/testRFC822_date_utf8
@@ -0,0 +1,8 @@
+From: Philipp Steinkrüger <[email protected]>
+Subject: Testemail
+Date: 16 May 2016 at 09:30:32  GMT+1
+To: Philipp Steinkrüger <[email protected]>
+
+
+  GMT+1
+

http://git-wip-us.apache.org/repos/asf/tika/blob/464ad910/tika-test-resources/src/test/resources/test-documents/testRFC822_eml
----------------------------------------------------------------------
diff --git 
a/tika-test-resources/src/test/resources/test-documents/testRFC822_eml 
b/tika-test-resources/src/test/resources/test-documents/testRFC822_eml
new file mode 100644
index 0000000..5ae32ae
--- /dev/null
+++ b/tika-test-resources/src/test/resources/test-documents/testRFC822_eml
@@ -0,0 +1,33 @@
+Delivered-To: [email protected]
+Received: by 10.114.185.229 with SMTP id ff5csp1359148ldc;
+        Mon, 16 May 2016 01:30:35 -0700 (PDT)
+X-Received: by 10.194.165.226 with SMTP id zb2mr30717859wjb.172.1463387435671;
+        Mon, 16 May 2016 01:30:35 -0700 (PDT)
+Return-Path: <[email protected]>
+Received: from smtp-out.rrz.uni-koeln.de (smtp-out.rrz.uni-koeln.de. 
[2a00:a200:0:12::25])
+        by mx.google.com with ESMTPS id tk4si20973899wjb.25.2016.05.16.01.30.35
+        for <[email protected]>
+        (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128);
+        Mon, 16 May 2016 01:30:35 -0700 (PDT)
+Received-SPF: pass (google.com: domain of [email protected] 
designates 2a00:a200:0:12::25 as permitted sender) client-ip=2a00:a200:0:12::25;
+Authentication-Results: mx.google.com;
+       spf=pass (google.com: domain of [email protected] 
designates 2a00:a200:0:12::25 as permitted sender) 
[email protected]
+Received: from smtp-auth.rrz.uni-koeln.de (smtp-auth.rrz.uni-koeln.de 
[IPv6:2a00:a200:0:10::27] (may be forged))
+       by smtp-out.rrz.uni-koeln.de (8.14.4/8.14.4) with ESMTP id 
u4G8UYXw029242
+       for <[email protected]>; Mon, 16 May 2016 10:30:34 
+0200
+Received: from [192.168.1.10] (79-66-108-216.dynamic.dsl.as9105.com 
[79.66.108.216])
+       (authenticated as user altj4 using DIGEST-MD5 bits=0)
+       by smtp-auth.uni-koeln.de (8.13.8/8.13.8) with ESMTP id u4G8UXlP028450
+       (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO)
+       for <[email protected]>; Mon, 16 May 2016 10:30:34 
+0200
+From: =?utf-8?Q?Philipp_Steinkr=C3=BCger?= <[email protected]>
+Content-Type: text/plain
+Content-Transfer-Encoding: 7bit
+Subject: Testemail
+Message-Id: <[email protected]>
+Date: Mon, 16 May 2016 09:30:32 +0100
+To: =?utf-8?Q?Philipp_Steinkr=C3=BCger?= <[email protected]>
+Mime-Version: 1.0 (Mac OS X Mail 9.3 \(3124\))
+X-Mailer: Apple Mail (2.3124)
+X-Scanned-By: MIMEDefang 2.75
+

Reply via email to