Author: jukka
Date: Sat Oct 24 23:34:21 2009
New Revision: 829467
URL: http://svn.apache.org/viewvc?rev=829467&view=rev
Log:
TIKA-314: Initial support for JPEG EXIF metadata extraction
Patch by Maxim Valyanskiy
Added:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
lucene/tika/trunk/tika-parsers/src/test/resources/test-documents/testJPEG_EXIF.jpg
(with props)
Modified:
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml
lucene/tika/trunk/tika-parsers/pom.xml
Modified:
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml?rev=829467&r1=829466&r2=829467&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml
(original)
+++
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml
Sat Oct 24 23:34:21 2009
@@ -114,7 +114,6 @@
<parser name="parse-image"
class="org.apache.tika.parser.image.ImageParser">
<mime>image/bmp</mime>
<mime>image/gif</mime>
- <mime>image/jpeg</mime>
<mime>image/png</mime>
<mime>image/tiff</mime>
<mime>image/vnd.wap.wbmp</mime>
@@ -123,6 +122,10 @@
<mime>image/x-xcf</mime>
</parser>
+ <parser name="parse-image"
class="org.apache.tika.parser.jpeg.JpegParser">
+ <mime>image/jpeg</mime>
+ </parser>
+
<parser name="parse-zip" class="org.apache.tika.parser.pkg.ZipParser">
<mime>application/zip</mime>
</parser>
Modified: lucene/tika/trunk/tika-parsers/pom.xml
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/pom.xml?rev=829467&r1=829466&r2=829467&view=diff
==============================================================================
--- lucene/tika/trunk/tika-parsers/pom.xml (original)
+++ lucene/tika/trunk/tika-parsers/pom.xml Sat Oct 24 23:34:21 2009
@@ -117,6 +117,13 @@
<version>1.7</version>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>com.drewnoakes</groupId>
+ <artifactId>metadata-extractor</artifactId>
+ <version>2.4.0-beta-1</version>
+ </dependency>
+
+
</dependencies>
<build>
Added:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java?rev=829467&view=auto
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java
(added)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java
Sat Oct 24 23:34:21 2009
@@ -0,0 +1,61 @@
+package org.apache.tika.parser.jpeg;
+
+import org.apache.tika.parser.Parser;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import java.io.InputStream;
+import java.io.IOException;
+import java.util.Map;
+import java.util.Collections;
+import java.util.Iterator;
+
+import com.drew.imaging.jpeg.JpegMetadataReader;
+import com.drew.imaging.jpeg.JpegProcessingException;
+import com.drew.metadata.Directory;
+import com.drew.metadata.Tag;
+import com.drew.metadata.MetadataException;
+
+public class JpegParser implements Parser {
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ Map<String, Object> context = Collections.emptyMap();
+ parse(stream, handler, metadata, context);
+ }
+
+ public void parse(
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, Map<String, Object> context)
+ throws IOException, SAXException, TikaException {
+ try {
+ com.drew.metadata.Metadata jpegMetadata =
JpegMetadataReader.readMetadata(stream);
+
+ Iterator directories = jpegMetadata.getDirectoryIterator();
+ while (directories.hasNext()) {
+ Directory directory = (Directory) directories.next();
+ Iterator tags = directory.getTagIterator();
+
+ while (tags.hasNext()) {
+ Tag tag = (Tag)tags.next();
+
+ metadata.set(tag.getTagName(), tag.getDescription());
+ }
+ }
+ } catch (JpegProcessingException e) {
+ throw new TikaException("Can't read JPEG metadata", e);
+ } catch (MetadataException e) {
+ throw new TikaException("Can't read JPEG metadata", e);
+ }
+
+ XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+ xhtml.startDocument();
+ xhtml.endDocument();
+ }
+}
Added:
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java?rev=829467&view=auto
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
(added)
+++
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
Sat Oct 24 23:34:21 2009
@@ -0,0 +1,23 @@
+package org.apache.tika.parser.jpeg;
+
+import junit.framework.TestCase;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.metadata.Metadata;
+import org.xml.sax.helpers.DefaultHandler;
+
+import java.io.InputStream;
+
+public class JpegParserTest extends TestCase {
+ private final Parser parser = new JpegParser();
+
+ public void testJPEG() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+ InputStream stream =
+
getClass().getResourceAsStream("/test-documents/testJPEG_EXIF.jpg");
+ parser.parse(stream, new DefaultHandler(), metadata);
+
+ assertEquals("Canon EOS 40D", metadata.get("Model"));
+ }
+
+}
Added:
lucene/tika/trunk/tika-parsers/src/test/resources/test-documents/testJPEG_EXIF.jpg
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/resources/test-documents/testJPEG_EXIF.jpg?rev=829467&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
lucene/tika/trunk/tika-parsers/src/test/resources/test-documents/testJPEG_EXIF.jpg
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream