Author: nick Date: Sun Nov 27 22:51:36 2011 New Revision: 1206896 URL: http://svn.apache.org/viewvc?rev=1206896&view=rev Log: TIKA-697 Correct mime match for .ar unix archives, add the suggested extra filetypes and aliases, and list .deb as being ar based
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java?rev=1206896&r1=1206895&r2=1206896&view=diff ============================================================================== --- tika/trunk/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java (original) +++ tika/trunk/tika-core/src/main/java/org/apache/tika/detect/MagicDetector.java Sun Nov 27 22:51:36 2011 @@ -126,6 +126,12 @@ public class MagicDetector implements De decoded.write(Integer.parseInt( value.substring(i + 2, i + 4), 16)); i += 3; + } else if (value.charAt(i + 1) == 'r') { + decoded.write((int)'\r'); + i++; + } else if (value.charAt(i + 1) == 'n') { + decoded.write((int)'\n'); + i++; } else { int j = i + 1; while ((j < i + 4) && (j < value.length()) Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1206896&r1=1206895&r2=1206896&view=diff ============================================================================== --- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original) +++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Sun Nov 27 22:51:36 2011 @@ -2182,11 +2182,13 @@ </mime-type> <mime-type type="application/x-archive"> + <alias type="application/x-unix-archive"/> <magic priority="50"> <match value="=<ar>" type="string" offset="0"/> - <match value="=!<arch>" type="string" offset="0"/> + <match value="!<arch>\n" type="string" offset="0"/> </magic> <glob pattern="*.ar"/> + <glob pattern="*.a"/> </mime-type> <mime-type type="application/x-authorware-bin"> @@ -2310,6 +2312,7 @@ </mime-type> <mime-type type="application/x-debian-package"> + <sub-class-of type="application/x-archive"/> <glob pattern="*.deb"/> <glob pattern="*.udeb"/> </mime-type> Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=1206896&r1=1206895&r2=1206896&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java (original) +++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java Sun Nov 27 22:51:36 2011 @@ -204,9 +204,11 @@ public class TestMimeTypes extends TestC assertTypeByName("application/x-gzip", "test.tgz"); // See GZIP, not tar contents of it assertTypeByName("application/x-cpio", "test.cpio"); + // TODO Add an example .deb and .udeb, then check these + // Check the mime magic patterns for them work too -// assertTypeByData("application/x-archive", "testARofText.ar"); // TODO TIKA-697 -// assertTypeByData("application/x-archive", "testARofSND.ar"); // TODO TIKA-697 + assertTypeByData("application/x-archive", "testARofText.ar"); // TODO TIKA-697 + assertTypeByData("application/x-archive", "testARofSND.ar"); // TODO TIKA-697 assertTypeByData("application/zip", "test-documents.zip"); assertTypeByData("application/x-gtar", "test-documents.tar"); // GNU TAR assertTypeByData("application/x-gzip", "test-documents.tgz"); // See GZIP, not tar contents of it