Author: jukka
Date: Sat Oct 24 23:34:21 2009
New Revision: 829467

URL: http://svn.apache.org/viewvc?rev=829467&view=rev
Log:
TIKA-314: Initial support for JPEG EXIF metadata extraction

Patch by Maxim Valyanskiy

Added:
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/
    
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java
    lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/
    
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
    
lucene/tika/trunk/tika-parsers/src/test/resources/test-documents/testJPEG_EXIF.jpg
   (with props)
Modified:
    
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml
    lucene/tika/trunk/tika-parsers/pom.xml

Modified: 
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml?rev=829467&r1=829466&r2=829467&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml 
(original)
+++ 
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml 
Sat Oct 24 23:34:21 2009
@@ -114,7 +114,6 @@
         <parser name="parse-image" 
class="org.apache.tika.parser.image.ImageParser">
                 <mime>image/bmp</mime>
                 <mime>image/gif</mime>
-                <mime>image/jpeg</mime>
                 <mime>image/png</mime>
                 <mime>image/tiff</mime>
                 <mime>image/vnd.wap.wbmp</mime>
@@ -123,6 +122,10 @@
                 <mime>image/x-xcf</mime>
         </parser>
 
+        <parser name="parse-image" 
class="org.apache.tika.parser.jpeg.JpegParser">
+                <mime>image/jpeg</mime>
+        </parser>
+
         <parser name="parse-zip" class="org.apache.tika.parser.pkg.ZipParser">
                 <mime>application/zip</mime>
         </parser>

Modified: lucene/tika/trunk/tika-parsers/pom.xml
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/pom.xml?rev=829467&r1=829466&r2=829467&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/pom.xml (original)
+++ lucene/tika/trunk/tika-parsers/pom.xml Sat Oct 24 23:34:21 2009
@@ -117,6 +117,13 @@
        <version>1.7</version>
        <scope>test</scope>
     </dependency>
+    <dependency>
+       <groupId>com.drewnoakes</groupId>
+       <artifactId>metadata-extractor</artifactId>
+       <version>2.4.0-beta-1</version>
+    </dependency>
+
+    
   </dependencies>
 
   <build>

Added: 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java?rev=829467&view=auto
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java
 (added)
+++ 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java
 Sat Oct 24 23:34:21 2009
@@ -0,0 +1,61 @@
+package org.apache.tika.parser.jpeg;
+
+import org.apache.tika.parser.Parser;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import java.io.InputStream;
+import java.io.IOException;
+import java.util.Map;
+import java.util.Collections;
+import java.util.Iterator;
+
+import com.drew.imaging.jpeg.JpegMetadataReader;
+import com.drew.imaging.jpeg.JpegProcessingException;
+import com.drew.metadata.Directory;
+import com.drew.metadata.Tag;
+import com.drew.metadata.MetadataException;
+
+public class JpegParser implements Parser {
+    /**
+     * @deprecated This method will be removed in Apache Tika 1.0.
+     */
+    public void parse(
+            InputStream stream, ContentHandler handler, Metadata metadata)
+            throws IOException, SAXException, TikaException {
+        Map<String, Object> context = Collections.emptyMap();
+        parse(stream, handler, metadata, context);
+    }
+
+    public void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, Map<String, Object> context)
+            throws IOException, SAXException, TikaException {
+        try {
+            com.drew.metadata.Metadata jpegMetadata = 
JpegMetadataReader.readMetadata(stream);
+
+            Iterator directories = jpegMetadata.getDirectoryIterator();
+            while (directories.hasNext()) {
+                Directory directory = (Directory) directories.next();
+                Iterator tags = directory.getTagIterator();
+
+                while (tags.hasNext()) {
+                    Tag tag = (Tag)tags.next();
+                    
+                    metadata.set(tag.getTagName(), tag.getDescription());
+                }
+            }
+        } catch (JpegProcessingException e) {
+            throw new TikaException("Can't read JPEG metadata", e);
+        } catch (MetadataException e) {
+            throw new TikaException("Can't read JPEG metadata", e);
+        }
+
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+        xhtml.endDocument();
+    }
+}

Added: 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java?rev=829467&view=auto
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
 (added)
+++ 
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
 Sat Oct 24 23:34:21 2009
@@ -0,0 +1,23 @@
+package org.apache.tika.parser.jpeg;
+
+import junit.framework.TestCase;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.metadata.Metadata;
+import org.xml.sax.helpers.DefaultHandler;
+
+import java.io.InputStream;
+
+public class JpegParserTest extends TestCase {
+    private final Parser parser = new JpegParser();
+
+    public void testJPEG() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+        InputStream stream =
+            
getClass().getResourceAsStream("/test-documents/testJPEG_EXIF.jpg");
+        parser.parse(stream, new DefaultHandler(), metadata);
+
+        assertEquals("Canon EOS 40D", metadata.get("Model"));
+    }
+
+}

Added: 
lucene/tika/trunk/tika-parsers/src/test/resources/test-documents/testJPEG_EXIF.jpg
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/resources/test-documents/testJPEG_EXIF.jpg?rev=829467&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
lucene/tika/trunk/tika-parsers/src/test/resources/test-documents/testJPEG_EXIF.jpg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream


Reply via email to