Author: jukka
Date: Wed Feb  4 00:10:51 2009
New Revision: 740526

URL: http://svn.apache.org/viewvc?rev=740526&view=rev
Log:
TIKA-192: Add glob and magic patterns for image types

Made the GzipParser understand .svgz files.

Added:
    lucene/tika/trunk/src/test/resources/test-documents/testSVG.svgz   (with 
props)
Modified:
    lucene/tika/trunk/src/main/java/org/apache/tika/parser/pkg/GzipParser.java
    lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
    lucene/tika/trunk/src/main/resources/tika-config.xml
    lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java
    
lucene/tika/trunk/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java
    lucene/tika/trunk/src/test/resources/test-documents/testSVG.svg

Modified: 
lucene/tika/trunk/src/main/java/org/apache/tika/parser/pkg/GzipParser.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/parser/pkg/GzipParser.java?rev=740526&r1=740525&r2=740526&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/parser/pkg/GzipParser.java 
(original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/parser/pkg/GzipParser.java 
Wed Feb  4 00:10:51 2009
@@ -55,6 +55,8 @@
                     name = name.substring(0, name.length() - 4) + ".tar";
                 } else if (name.endsWith(".gz") || name.endsWith("-gz")) {
                     name = name.substring(0, name.length() - 3);
+                } else if (name.toLowerCase().endsWith(".svgz")) {
+                    name = name.substring(0, name.length() - 1);
                 }
                 entrydata.set(Metadata.RESOURCE_NAME_KEY, name);
             }

Modified: lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml?rev=740526&r1=740525&r2=740526&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml (original)
+++ lucene/tika/trunk/src/main/resources/mime/tika-mimetypes.xml Wed Feb  4 
00:10:51 2009
@@ -440,6 +440,7 @@
     <glob pattern="*.tgz" />
     <glob pattern="*.gz" />
     <glob pattern="*-gz" />
+    <glob pattern="*.svgz" />
   </mime-type>
 
   <mime-type type="application/x-bzip">

Modified: lucene/tika/trunk/src/main/resources/tika-config.xml
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/resources/tika-config.xml?rev=740526&r1=740525&r2=740526&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/resources/tika-config.xml (original)
+++ lucene/tika/trunk/src/main/resources/tika-config.xml Wed Feb  4 00:10:51 
2009
@@ -25,6 +25,7 @@
 
         <parser name="parse-dcxml" 
class="org.apache.tika.parser.xml.DcXMLParser">
                 <mime>application/xml</mime>
+                <mime>image/svg+xml</mime>
         </parser>
 
         <parser name="parse-office" 
class="org.apache.tika.parser.microsoft.OfficeParser">

Modified: 
lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=740526&r1=740525&r2=740526&view=diff
==============================================================================
--- lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java 
(original)
+++ lucene/tika/trunk/src/test/java/org/apache/tika/mime/TestMimeTypes.java Wed 
Feb  4 00:10:51 2009
@@ -164,6 +164,11 @@
         assertTypeByData("image/svg+xml", "testSVG.svg");
         assertTypeByName("image/svg+xml", "x.svg");
         assertTypeByName("image/svg+xml", "x.SVG");
+
+        assertType("application/x-gzip", "testSVG.svgz");
+        assertTypeByData("application/x-gzip", "testSVG.svgz");
+        assertTypeByName("application/x-gzip", "x.svgz");
+        assertTypeByName("application/x-gzip", "x.SVGZ");
     }
 
     public void testRawDetection() throws Exception {

Modified: 
lucene/tika/trunk/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java?rev=740526&r1=740525&r2=740526&view=diff
==============================================================================
--- 
lucene/tika/trunk/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java 
(original)
+++ 
lucene/tika/trunk/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java 
Wed Feb  4 00:10:51 2009
@@ -66,4 +66,22 @@
         assertTrue(content.contains("Rida Benjelloun"));
     }
 
+    public void testSvgzParsing() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        InputStream stream = GzipParserTest.class.getResourceAsStream(
+                "/test-documents/testSVG.svgz");
+        try {
+            parser.parse(stream, handler, metadata);
+        } finally {
+            stream.close();
+        }
+
+        assertEquals("application/x-gzip", 
metadata.get(Metadata.CONTENT_TYPE));
+        String content = handler.toString();
+        assertTrue(content.contains("Test SVG image"));
+    }
+
 }

Modified: lucene/tika/trunk/src/test/resources/test-documents/testSVG.svg
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/resources/test-documents/testSVG.svg?rev=740526&r1=740525&r2=740526&view=diff
==============================================================================
--- lucene/tika/trunk/src/test/resources/test-documents/testSVG.svg (original)
+++ lucene/tika/trunk/src/test/resources/test-documents/testSVG.svg Wed Feb  4 
00:10:51 2009
@@ -2,5 +2,6 @@
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" 
           "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd";>
 <svg width="1cm" height="1cm" version="1.1" xmlns="http://www.w3.org/2000/svg";>
+  <desc>Test SVG image</desc>
   <rect x="0.1cm" y="0.1cm" width="0.8cm" height="0.8cm"/>
 </svg>
\ No newline at end of file

Added: lucene/tika/trunk/src/test/resources/test-documents/testSVG.svgz
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/resources/test-documents/testSVG.svgz?rev=740526&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/tika/trunk/src/test/resources/test-documents/testSVG.svgz
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream


Reply via email to