Author: nick
Date: Sun Nov 27 22:51:36 2011
New Revision: 1206896

URL: http://svn.apache.org/viewvc?rev=1206896&view=rev
Log:
TIKA-697 Correct mime match for .ar unix archives, add the suggested extra 
filetypes and aliases, and list .deb as being ar based

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java
    
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java?rev=1206896&r1=1206895&r2=1206896&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java 
(original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java 
Sun Nov 27 22:51:36 2011
@@ -126,6 +126,12 @@ public class MagicDetector implements De
                     decoded.write(Integer.parseInt(
                             value.substring(i + 2, i + 4), 16));
                     i += 3;
+                } else if (value.charAt(i + 1) == 'r') {
+                    decoded.write((int)'\r');
+                    i++;
+                } else if (value.charAt(i + 1) == 'n') {
+                   decoded.write((int)'\n');
+                   i++;
                 } else {
                     int j = i + 1;
                     while ((j < i + 4) && (j < value.length())

Modified: 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1206896&r1=1206895&r2=1206896&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
(original)
+++ 
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
Sun Nov 27 22:51:36 2011
@@ -2182,11 +2182,13 @@
   </mime-type>
 
   <mime-type type="application/x-archive">
+    <alias type="application/x-unix-archive"/>
     <magic priority="50">
       <match value="=&lt;ar&gt;" type="string" offset="0"/>
-      <match value="=!&lt;arch&gt;" type="string" offset="0"/>
+      <match value="!&lt;arch&gt;\n" type="string" offset="0"/>
     </magic>
     <glob pattern="*.ar"/>
+    <glob pattern="*.a"/>
   </mime-type>
 
   <mime-type type="application/x-authorware-bin">
@@ -2310,6 +2312,7 @@
   </mime-type>
 
   <mime-type type="application/x-debian-package">
+    <sub-class-of type="application/x-archive"/>
     <glob pattern="*.deb"/>
     <glob pattern="*.udeb"/>
   </mime-type>

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=1206896&r1=1206895&r2=1206896&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java 
(original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java 
Sun Nov 27 22:51:36 2011
@@ -204,9 +204,11 @@ public class TestMimeTypes extends TestC
        assertTypeByName("application/x-gzip", "test.tgz"); // See GZIP, not 
tar contents of it
        assertTypeByName("application/x-cpio", "test.cpio");
        
+       // TODO Add an example .deb and .udeb, then check these
+       
        // Check the mime magic patterns for them work too
-//       assertTypeByData("application/x-archive", "testARofText.ar"); // TODO 
TIKA-697
-//       assertTypeByData("application/x-archive", "testARofSND.ar");  // TODO 
TIKA-697 
+       assertTypeByData("application/x-archive", "testARofText.ar"); // TODO 
TIKA-697
+       assertTypeByData("application/x-archive", "testARofSND.ar");  // TODO 
TIKA-697 
        assertTypeByData("application/zip",    "test-documents.zip");
        assertTypeByData("application/x-gtar",  "test-documents.tar"); // GNU 
TAR
        assertTypeByData("application/x-gzip", "test-documents.tgz"); // See 
GZIP, not tar contents of it


Reply via email to