Author: jukka
Date: Mon May 5 23:29:19 2008
New Revision: 653686
URL: http://svn.apache.org/viewvc?rev=653686&view=rev
Log:
TIKA-92: Image metadata extraction
- Added a simple ImageParser based on ImageIO
- Currently only supports custom "width" and "height" metadata fields
- Included a few test images
Added:
incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/
incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/ImageParser.java
incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/
incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
incubator/tika/trunk/src/test/resources/test-documents/testBMP.bmp (with
props)
incubator/tika/trunk/src/test/resources/test-documents/testGIF.gif (with
props)
incubator/tika/trunk/src/test/resources/test-documents/testJPEG.jpg (with
props)
incubator/tika/trunk/src/test/resources/test-documents/testPNG.png (with
props)
incubator/tika/trunk/src/test/resources/test-documents/testTIFF.tif (with
props)
Modified:
incubator/tika/trunk/src/main/resources/tika-config.xml
Added:
incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/ImageParser.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/ImageParser.java?rev=653686&view=auto
==============================================================================
---
incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/ImageParser.java
(added)
+++
incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/ImageParser.java
Mon May 5 23:29:19 2008
@@ -0,0 +1,46 @@
+package org.apache.tika.parser.image;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+
+import javax.imageio.ImageIO;
+import javax.imageio.ImageReader;
+
+import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class ImageParser implements Parser {
+
+ public void parse(InputStream stream, Metadata metadata)
+ throws IOException, TikaException {
+ String type = metadata.get(Metadata.CONTENT_TYPE);
+ if (type != null) {
+ Iterator<ImageReader> iterator =
+ ImageIO.getImageReadersByMIMEType(type);
+ if (iterator.hasNext()) {
+ ImageReader reader = iterator.next();
+ reader.setInput(ImageIO.createImageInputStream(
+ new CloseShieldInputStream(stream)));
+ metadata.set("height", Integer.toString(reader.getHeight(0)));
+ metadata.set("width", Integer.toString(reader.getWidth(0)));
+ reader.dispose();
+ }
+ }
+ }
+
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ parse(stream, metadata);
+ XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+ xhtml.startDocument();
+ xhtml.endDocument();
+ }
+
+}
Modified: incubator/tika/trunk/src/main/resources/tika-config.xml
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/resources/tika-config.xml?rev=653686&r1=653685&r2=653686&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/resources/tika-config.xml (original)
+++ incubator/tika/trunk/src/main/resources/tika-config.xml Mon May 5 23:29:19
2008
@@ -88,6 +88,18 @@
<mime>application/x-vnd.oasis.opendocument.formula-template</mime>
</parser>
+ <parser name="parse-image"
class="org.apache.tika.parser.image.ImageParser">
+ <mime>image/bmp</mime>
+ <mime>image/gif</mime>
+ <mime>image/jpeg</mime>
+ <mime>image/png</mime>
+ <mime>image/tiff</mime>
+ <mime>image/vnd.wap.wbmp</mime>
+ <mime>image/x-icon</mime>
+ <mime>image/x-psd</mime>
+ <mime>image/x-xcf</mime>
+ </parser>
+
</parsers>
</properties>
\ No newline at end of file
Added:
incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/ImageParserTest.java?rev=653686&view=auto
==============================================================================
---
incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
(added)
+++
incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
Mon May 5 23:29:19 2008
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+import java.io.InputStream;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.Parser;
+import org.xml.sax.helpers.DefaultHandler;
+
+import junit.framework.TestCase;
+
+public class ImageParserTest extends TestCase {
+
+ private final Parser parser = new ImageParser();
+
+ public void testBMP() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/bmp");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testBMP.bmp");
+ parser.parse(stream, new DefaultHandler(), metadata);
+
+ assertEquals("75", metadata.get("height"));
+ assertEquals("100", metadata.get("width"));
+ }
+
+ public void testGIF() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/gif");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testGIF.gif");
+ parser.parse(stream, new DefaultHandler(), metadata);
+
+ assertEquals("75", metadata.get("height"));
+ assertEquals("100", metadata.get("width"));
+ }
+
+ public void testJPEG() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG.jpg");
+ parser.parse(stream, new DefaultHandler(), metadata);
+
+ assertEquals("75", metadata.get("height"));
+ assertEquals("100", metadata.get("width"));
+ }
+
+ public void testPNG() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/png");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testPNG.png");
+ parser.parse(stream, new DefaultHandler(), metadata);
+
+ assertEquals("75", metadata.get("height"));
+ assertEquals("100", metadata.get("width"));
+ }
+
+// TODO: Add TIFF support
+// public void testTIFF() throws Exception {
+// Metadata metadata = new Metadata();
+// metadata.set(Metadata.CONTENT_TYPE, "image/tiff");
+// InputStream stream =
+// getClass().getResourceAsStream("/test-documents/testTIFF.tif");
+// parser.parse(stream, new DefaultHandler(), metadata);
+//
+// assertEquals("75", metadata.get("height"));
+// assertEquals("100", metadata.get("width"));
+// }
+
+}
Added: incubator/tika/trunk/src/test/resources/test-documents/testBMP.bmp
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testBMP.bmp?rev=653686&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/tika/trunk/src/test/resources/test-documents/testBMP.bmp
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/tika/trunk/src/test/resources/test-documents/testGIF.gif
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testGIF.gif?rev=653686&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/tika/trunk/src/test/resources/test-documents/testGIF.gif
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/tika/trunk/src/test/resources/test-documents/testJPEG.jpg
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testJPEG.jpg?rev=653686&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/tika/trunk/src/test/resources/test-documents/testJPEG.jpg
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/tika/trunk/src/test/resources/test-documents/testPNG.png
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testPNG.png?rev=653686&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/tika/trunk/src/test/resources/test-documents/testPNG.png
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/tika/trunk/src/test/resources/test-documents/testTIFF.tif
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testTIFF.tif?rev=653686&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/tika/trunk/src/test/resources/test-documents/testTIFF.tif
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream