Author: nick
Date: Fri Jul 19 13:54:30 2013
New Revision: 1504879

URL: http://svn.apache.org/r1504879
Log:
Patch from Kai-Uwe Schmidt from TIKA-1146 - Handle rfc822 message detection 
with unusual (but standards ok) cases of the header strings, with test

Added:
    
tika/trunk/tika-parsers/src/test/resources/test-documents/testGroupWiseEml.eml
Modified:
    
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java

Modified: 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1504879&r1=1504878&r2=1504879&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
(original)
+++ 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
Fri Jul 19 13:54:30 2013
@@ -4350,17 +4350,17 @@
 
   <mime-type type="message/rfc822">
     <magic priority="50">
-      <match value="Relay-Version:" type="string" offset="0"/>
+      <match value="Relay-Version:" type="stringignorecase" offset="0"/>
       <match value="#!\ rnews" type="string" offset="0"/>
       <match value="N#!\ rnews" type="string" offset="0"/>
       <match value="Forward\ to" type="string" offset="0"/>
       <match value="Pipe\ to" type="string" offset="0"/>
-      <match value="Return-Path:" type="string" offset="0"/>
-      <match value="From:" type="string" offset="0"/>
-      <match value="Received:" type="string" offset="0"/>
-      <match value="Message-ID:" type="string" offset="0"/>
+      <match value="Return-Path:" type="stringignorecase" offset="0"/>
+      <match value="From:" type="stringignorecase" offset="0"/>
+      <match value="Received:" type="stringignorecase" offset="0"/>
+      <match value="Message-ID:" type="stringignorecase" offset="0"/>
       <match value="Date:" type="string" offset="0"/>
-      <match value="MIME-Version:" type="string" offset="0"/>
+      <match value="MIME-Version:" type="stringignorecase" offset="0"/>
       <match value="X-Notes-Item:" type="string" offset="0">
         <match value="Message-ID:" type="string" offset="0:8192"/>
       </match>

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=1504879&r1=1504878&r2=1504879&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java 
(original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java 
Fri Jul 19 13:54:30 2013
@@ -632,6 +632,10 @@ public class TestMimeTypes extends TestC
     public void testEmlx() throws IOException {
         assertTypeDetection("testEMLX.emlx", "message/x-emlx");
     }
+    
+    public void testGroupWiseEml() throws Exception {
+        assertTypeDetection("testGroupWiseEml.eml", "message/rfc822");
+    }
 
     /** Test getMimeType(byte[]) */
     public void testGetMimeType_byteArray() throws IOException {

Added: 
tika/trunk/tika-parsers/src/test/resources/test-documents/testGroupWiseEml.eml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testGroupWiseEml.eml?rev=1504879&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/resources/test-documents/testGroupWiseEml.eml 
(added)
+++ 
tika/trunk/tika-parsers/src/test/resources/test-documents/testGroupWiseEml.eml 
Fri Jul 19 13:54:30 2013
@@ -0,0 +1,58 @@
+Mime-Version: 1.0
+X-Mailer: GroupWise 2012
+Subject: test333
+Date: Thu, 27 Jun 2013 13:29:57 +0200
+Message-ID: <51CC3E55020000000000000F@$$$>
+References: <51CC3DB00200000000000003@$$$>
+In-Reply-To: <51CC3DB00200000000000003@$$$>
+From: "Novell GroupWise" <$$$.$$$.$$$>
+Content-Type: multipart/mixed; boundary="____LPHMXLZMXOMRLFKSEJCW____"
+
+
+--____LPHMXLZMXOMRLFKSEJCW____
+Content-Type: multipart/alternative; boundary="____WHPEPQYSAQXEHDGESJXG____"
+
+
+--____WHPEPQYSAQXEHDGESJXG____
+Content-Type: text/plain; charset=utf-8
+Content-Transfer-Encoding: base64
+Content-Disposition: inline
+
+dGVzdA==
+--____WHPEPQYSAQXEHDGESJXG____
+Content-Type: text/html; charset=utf-8
+Content-Transfer-Encoding: quoted-printable
+
+=
+<HTML><HEAD>
+<META content=3D"text/html; charset=3Dutf-8" http-equiv=3DContent-Type>
+<META name=3DGENERATOR content=3D"MSHTML 8.00.7601.17699"></HEAD>
+<BODY style=3D"MARGIN: 4px 4px 1px; FONT: 10pt Segoe UI">test</BODY></HTML>
+--____WHPEPQYSAQXEHDGESJXG____--
+
+--____LPHMXLZMXOMRLFKSEJCW____
+Content-Type: message/rfc822
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment; filename="test.eml"
+
+TWltZS1WZXJzaW9uOiAxLjANClgtTWFpbGVyOiBHcm91cFdpc2UgMjAxMg0KU3ViamVjdDogdGVz
+dA0KRGF0ZTogVGh1LCAyNyBKdW4gMjAxMyAxMzoyNzoxMiArMDIwMA0KTWVzc2FnZS1JRDogPDUx
+Q0MzREIwMDIwMDAwMDAwMDAwMDAwM0AkJCQ+DQpGcm9tOiAiTm92ZWxsIEdyb3VwV2lzZSIgPCQk
+JC4kJCQuJCQkPg0KQ29udGVudC1UeXBlOiBtdWx0aXBhcnQvYWx0ZXJuYXRpdmU7IGJvdW5kYXJ5
+PSJfX19fTFBITVhMWk1YT01STEZLU0VKQ1dfX19fIg0KDQoNCi0tX19fX0xQSE1YTFpNWE9NUkxG
+S1NFSkNXX19fXw0KQ29udGVudC1UeXBlOiB0ZXh0L3BsYWluOyBjaGFyc2V0PXV0Zi04DQpDb250
+ZW50LVRyYW5zZmVyLUVuY29kaW5nOiBiYXNlNjQNCkNvbnRlbnQtRGlzcG9zaXRpb246IGlubGlu
+ZQ0KDQpkR1Z6ZEE9PQ0KLS1fX19fTFBITVhMWk1YT01STEZLU0VKQ1dfX19fDQpDb250ZW50LVR5
+cGU6IHRleHQvaHRtbDsgY2hhcnNldD11dGYtOA0KQ29udGVudC1UcmFuc2Zlci1FbmNvZGluZzog
+cXVvdGVkLXByaW50YWJsZQ0KDQo8SFRNTD48SEVBRD4NCjxNRVRBIGNvbnRlbnQ9M0QidGV4dC9o
+dG1sOyBjaGFyc2V0PTNEdXRmLTgiIGh0dHAtZXF1aXY9M0RDb250ZW50LVR5cGU+DQo8TUVUQSBu
+YW1lPTNER0VORVJBVE9SIGNvbnRlbnQ9M0QiTVNIVE1MIDguMDAuNzYwMS4xNzY5OSI+PC9IRUFE
+Pg0KPEJPRFkgc3R5bGU9M0QiTUFSR0lOOiA0cHggNHB4IDFweDsgRk9OVDogMTBwdCBTZWdvZSBV
+SSI+dGVzdDwvQk9EWT48L0hUTUw+DQotLV9fX19MUEhNWExaTVhPTVJMRktTRUpDV19fX18tLQ0K
+--____LPHMXLZMXOMRLFKSEJCW____
+Content-Type: text/plain; charset=us-ascii
+Content-Transfer-Encoding: quoted-printable
+Content-Disposition: attachment; filename="Neues Textdokument.txt"
+
+ssssssssssssssssssssss
+--____LPHMXLZMXOMRLFKSEJCW____--


Reply via email to