Author: jukka
Date: Wed Feb 4 00:48:40 2009
New Revision: 740538
URL: http://svn.apache.org/viewvc?rev=740538&view=rev
Log:
TIKA-192: Add glob and magic patterns for image types
Added WMF type information.
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/parser/pkg/GzipParser.java
lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/parser/pkg/GzipParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/parser/pkg/GzipParser.java?rev=740538&r1=740537&r2=740538&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/parser/pkg/GzipParser.java
(original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/parser/pkg/GzipParser.java
Wed Feb 4 00:48:40 2009
@@ -57,6 +57,10 @@
name = name.substring(0, name.length() - 3);
} else if (name.toLowerCase().endsWith(".svgz")) {
name = name.substring(0, name.length() - 1);
+ } else if (name.toLowerCase().endsWith(".wmz")) {
+ name = name.substring(0, name.length() - 1) + "f";
+ } else if (name.toLowerCase().endsWith(".emz")) {
+ name = name.substring(0, name.length() - 1) + "f";
}
entrydata.set(Metadata.RESOURCE_NAME_KEY, name);
}
Modified: lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml?rev=740538&r1=740537&r2=740538&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml (original)
+++ lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml Wed Feb 4
00:48:40 2009
@@ -441,6 +441,8 @@
<glob pattern="*.gz" />
<glob pattern="*-gz" />
<glob pattern="*.svgz" />
+ <glob pattern="*.wmz" />
+ <glob pattern="*.emz" />
</mime-type>
<mime-type type="application/x-bzip">
@@ -513,6 +515,13 @@
<glob pattern="*.swf" />
</mime-type>
+ <mime-type type="image/x-tika-wmf">
+ <acronym>WMF</acronym>
+ <comment>Windows Metafile</comment>
+ <glob pattern="*.wmf" />
+ <glob pattern="*.emf" />
+ </mime-type>
+
<mime-type type="application/atom+xml">
<root-XML localName="feed" namespaceURI="http://purl.org/atom/ns#" />
</mime-type>
Modified:
lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=740538&r1=740537&r2=740538&view=diff
==============================================================================
--- lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java
(original)
+++ lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java Wed
Feb 4 00:48:40 2009
@@ -184,6 +184,21 @@
assertTypeByName("application/x-shockwave-flash", "x.SWF");
}
+ public void testWmfDetection() throws Exception {
+ // TODO: Need a test wmf file
+ assertTypeByName("image/x-tika-wmf", "x.wmf");
+ assertTypeByName("image/x-tika-wmf", "x.WMF");
+ // TODO: Need a test emf file
+ assertTypeByName("image/x-tika-wmf", "x.emf");
+ assertTypeByName("image/x-tika-wmf", "x.EMF");
+ // TODO: Need a test wmz file
+ assertTypeByName("application/x-gzip", "x.wmz");
+ assertTypeByName("application/x-gzip", "x.WMZ");
+ // TODO: Need a test emf file
+ assertTypeByName("application/x-gzip", "x.emz");
+ assertTypeByName("application/x-gzip", "x.EMZ");
+ }
+
public void testPsDetection() throws Exception {
// TODO: Need a test postscript file
assertTypeByName("application/postscript", "x.ps");