Author: nick Date: Wed Feb 13 14:47:20 2013 New Revision: 1445632 URL: http://svn.apache.org/r1445632 Log: Patch from Ryan McKinley from TIKA-1083 - Add Link and UTI information for a number of common mimetypes
Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1445632&r1=1445631&r2=1445632&view=diff ============================================================================== --- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original) +++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Wed Feb 13 14:47:20 2013 @@ -171,6 +171,9 @@ <mime-type type="application/iges"/> <mime-type type="application/illustrator"> + <acronym>AI</acronym> + <_comment>Adobe Illustrator Artwork</_comment> + <tika:link>http://en.wikipedia.org/wiki/Adobe_Illustrator_Artwork</tika:link> <glob pattern="*.ai"/>] <sub-class-of type="application/postscript"/> </mime-type> @@ -186,6 +189,9 @@ <mime-type type="application/isup"/> <mime-type type="application/java-archive"> + <_comment>Java Archive</_comment> + <tika:link>http://en.wikipedia.org/wiki/.jar</tika:link> + <tika:uti>com.sun.java-archive</tika:uti> <sub-class-of type="application/zip"/> <glob pattern="*.jar"/> </mime-type> @@ -296,6 +302,8 @@ <!-- Use DefaultDetector / org.apache.tika.parser.microsoft.POIFSContainerDetector for more reliable detection of OLE2 documents --> <alias type="application/vnd.ms-word"/> <_comment>Microsoft Word Document</_comment> + <tika:link>http://en.wikipedia.org/wiki/.doc</tika:link> + <tika:uti>com.microsoft.word.doc</tika:uti> <magic priority="50"> <match value="Microsoft\ Word\ 6.0\ Document" type="string" offset="2080"/> <match value="Documento\ Microsoft\ Word\ 6" type="string" offset="2080"/> @@ -383,6 +391,9 @@ <alias type="application/x-pdf"/> <acronym>PDF</acronym> <_comment>Portable Document Format</_comment> + <tika:link>http://en.wikipedia.org/wiki/PDF</tika:link> + <tika:link>http://www.adobe.com/devnet/pdf/pdf_reference_archive.html</tika:link> + <tika:uti>com.adobe.pdf</tika:uti> <magic priority="50"> <match value="%PDF-" type="string" offset="0"/> </magic> @@ -3291,6 +3302,10 @@ <mime-type type="application/xhtml-voice+xml"/> <mime-type type="application/xml"> + <acronym>XML</acronym> + <_comment>Extensible Markup Language</_comment> + <tika:link>http://en.wikipedia.org/wiki/Xml</tika:link> + <tika:uti>public.xml</tika:uti> <alias type="text/xml"/> <magic priority="50"> <match value="<?xml" type="string" offset="0"/> @@ -3346,6 +3361,8 @@ <mime-type type="application/zip"> <_comment>Compressed Archive File</_comment> + <tika:link>http://en.wikipedia.org/wiki/ZIP_(file_format)</tika:link> + <tika:uti>com.pkware.zip-archive</tika:uti> <alias type="application/x-zip-compressed"/> <magic priority="40"> <match value="PK\003\004" type="string" offset="0"/> @@ -3801,6 +3818,8 @@ <mime-type type="image/gif"> <acronym>GIF</acronym> <_comment>Graphics Interchange Format</_comment> + <tika:link>http://en.wikipedia.org/wiki/Gif</tika:link> + <tika:uti>com.compuserve.gif</tika:uti> <magic priority="50"> <match value="GIF87a" type="string" offset="0"/> <match value="GIF89a" type="string" offset="0"/> @@ -3827,6 +3846,8 @@ <mime-type type="image/jpeg"> <acronym>JPEG</acronym> <_comment>Joint Photographic Experts Group</_comment> + <tika:link>http://en.wikipedia.org/wiki/Jpeg</tika:link> + <tika:uti>public.jpeg</tika:uti> <magic priority="50"> <!-- FFD8 is the SOI (Start Of Image) marker. --> <!-- It is followed by another marker that starts with FF. --> @@ -3947,6 +3968,7 @@ <mime-type type="image/vnd.dwg"> <acronym>DWG</acronym> <_comment>AutoCad Drawing</_comment> + <tika:link>http://en.wikipedia.org/wiki/.dwg</tika:link> <alias type="image/x-dwg"/> <alias type="application/acad"/> <alias type="application/x-acad"/> @@ -3954,6 +3976,8 @@ <alias type="application/dwg"/> <alias type="application/x-dwg"/> <alias type="application/x-autocad"/> + <alias type="image/vnd.dwg"/> + <alias type="drawing/dwg"/> <glob pattern="*.dwg"/> <magic priority="50"> <match value="MC0.0" type="string" offset="0"/> @@ -3970,6 +3994,9 @@ </mime-type> <mime-type type="image/vnd.dxf"> + <acronym>DXF</acronym> + <_comment>AutoCAD DXF</_comment> + <tika:link>http://en.wikipedia.org/wiki/AutoCAD_DXF</tika:link> <glob pattern="*.dxf"/> </mime-type> <mime-type type="image/vnd.fastbidsheet"> @@ -4434,6 +4461,9 @@ <mime-type type="text/example"/> <mime-type type="text/html"> + <_comment>HyperText Markup Language</_comment> + <acronym>HTML</acronym> + <tika:uti>public.html</tika:uti> <!-- TIKA-327: if you encounter tags in the HTML with no declared namespace, it's not XHTML, it's just bad HTML, unfortunately. Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java?rev=1445632&r1=1445631&r2=1445632&view=diff ============================================================================== --- tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java (original) +++ tika/trunk/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java Wed Feb 13 14:47:20 2013 @@ -122,11 +122,17 @@ public class MimeTypesReaderTest extends * @since TIKA-1012 */ public void testReadExtendedMetadata() throws Exception { - MimeType bmp = this.mimeTypes.forName("image/x-ms-bmp"); - assertEquals("BMP", bmp.getAcronym()); - assertEquals("com.microsoft.bmp", bmp.getUniformTypeIdentifier()); + MimeType mime = this.mimeTypes.forName("image/x-ms-bmp"); + assertEquals("BMP", mime.getAcronym()); + assertEquals("com.microsoft.bmp", mime.getUniformTypeIdentifier()); assertEquals("http://en.wikipedia.org/wiki/BMP_file_format", - bmp.getLinks().get(0).toString()); + mime.getLinks().get(0).toString()); + + mime = this.mimeTypes.forName("application/xml"); + assertEquals("XML", mime.getAcronym()); + assertEquals("public.xml", mime.getUniformTypeIdentifier()); + assertEquals("http://en.wikipedia.org/wiki/Xml", + mime.getLinks().get(0).toString()); } /**