Author: jukka
Date: Wed Feb 4 00:10:51 2009
New Revision: 740526
URL: http://svn.apache.org/viewvc?rev=740526&view=rev
Log:
TIKA-192: Add glob and magic patterns for image types
Made the GzipParser understand .svgz files.
Added:
lucene/tika/trunk/src/test/resources/test-documents/testSVG.svgz (with
props)
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/parser/pkg/GzipParser.java
lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
lucene/tika/trunk/src/main/resources/tika-config.xml
lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java
lucene/tika/trunk/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java
lucene/tika/trunk/src/test/resources/test-documents/testSVG.svg
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/parser/pkg/GzipParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/parser/pkg/GzipParser.java?rev=740526&r1=740525&r2=740526&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/parser/pkg/GzipParser.java
(original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/parser/pkg/GzipParser.java
Wed Feb 4 00:10:51 2009
@@ -55,6 +55,8 @@
name = name.substring(0, name.length() - 4) + ".tar";
} else if (name.endsWith(".gz") || name.endsWith("-gz")) {
name = name.substring(0, name.length() - 3);
+ } else if (name.toLowerCase().endsWith(".svgz")) {
+ name = name.substring(0, name.length() - 1);
}
entrydata.set(Metadata.RESOURCE_NAME_KEY, name);
}
Modified: lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml?rev=740526&r1=740525&r2=740526&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml (original)
+++ lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml Wed Feb 4
00:10:51 2009
@@ -440,6 +440,7 @@
<glob pattern="*.tgz" />
<glob pattern="*.gz" />
<glob pattern="*-gz" />
+ <glob pattern="*.svgz" />
</mime-type>
<mime-type type="application/x-bzip">
Modified: lucene/tika/trunk/src/main/resources/tika-config.xml
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/resources/tika-config.xml?rev=740526&r1=740525&r2=740526&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/resources/tika-config.xml (original)
+++ lucene/tika/trunk/src/main/resources/tika-config.xml Wed Feb 4 00:10:51
2009
@@ -25,6 +25,7 @@
<parser name="parse-dcxml"
class="org.apache.tika.parser.xml.DcXMLParser">
<mime>application/xml</mime>
+ <mime>image/svg+xml</mime>
</parser>
<parser name="parse-office"
class="org.apache.tika.parser.microsoft.OfficeParser">
Modified:
lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=740526&r1=740525&r2=740526&view=diff
==============================================================================
--- lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java
(original)
+++ lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java Wed
Feb 4 00:10:51 2009
@@ -164,6 +164,11 @@
assertTypeByData("image/svg+xml", "testSVG.svg");
assertTypeByName("image/svg+xml", "x.svg");
assertTypeByName("image/svg+xml", "x.SVG");
+
+ assertType("application/x-gzip", "testSVG.svgz");
+ assertTypeByData("application/x-gzip", "testSVG.svgz");
+ assertTypeByName("application/x-gzip", "x.svgz");
+ assertTypeByName("application/x-gzip", "x.SVGZ");
}
public void testRawDetection() throws Exception {
Modified:
lucene/tika/trunk/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java?rev=740526&r1=740525&r2=740526&view=diff
==============================================================================
---
lucene/tika/trunk/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java
(original)
+++
lucene/tika/trunk/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java
Wed Feb 4 00:10:51 2009
@@ -66,4 +66,22 @@
assertTrue(content.contains("Rida Benjelloun"));
}
+ public void testSvgzParsing() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ InputStream stream = GzipParserTest.class.getResourceAsStream(
+ "/test-documents/testSVG.svgz");
+ try {
+ parser.parse(stream, handler, metadata);
+ } finally {
+ stream.close();
+ }
+
+ assertEquals("application/x-gzip",
metadata.get(Metadata.CONTENT_TYPE));
+ String content = handler.toString();
+ assertTrue(content.contains("Test SVG image"));
+ }
+
}
Modified: lucene/tika/trunk/src/test/resources/test-documents/testSVG.svg
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/resources/test-documents/testSVG.svg?rev=740526&r1=740525&r2=740526&view=diff
==============================================================================
--- lucene/tika/trunk/src/test/resources/test-documents/testSVG.svg (original)
+++ lucene/tika/trunk/src/test/resources/test-documents/testSVG.svg Wed Feb 4
00:10:51 2009
@@ -2,5 +2,6 @@
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg width="1cm" height="1cm" version="1.1" xmlns="http://www.w3.org/2000/svg">
+ <desc>Test SVG image</desc>
<rect x="0.1cm" y="0.1cm" width="0.8cm" height="0.8cm"/>
</svg>
\ No newline at end of file
Added: lucene/tika/trunk/src/test/resources/test-documents/testSVG.svgz
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/resources/test-documents/testSVG.svgz?rev=740526&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/tika/trunk/src/test/resources/test-documents/testSVG.svgz
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream