Author: nick
Date: Fri Mar 18 17:00:42 2011
New Revision: 1082973
URL: http://svn.apache.org/viewvc?rev=1082973&view=rev
Log:
TIKA-534 - When parsing a jpeg file with unhandled tags in it, skip these
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testJPEG_oddTagComponent.jpg
(with props)
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java?rev=1082973&r1=1082972&r2=1082973&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
Fri Mar 18 17:00:42 2011
@@ -189,7 +189,13 @@ public class ImageMetadataExtractor {
Tag tag = (Tag) tags.next();
String name = tag.getTagName();
if (!MetadataFields.isMetadataField(name)) {
- metadata.set(name, tag.getDescription());
+ try {
+ String value = tag.getDescription();
+ metadata.set(name, value);
+ } catch(MetadataException e) {
+ // Either something's corrupt, or it's a JPEG tag
+ // that the library doesn't know about. Skip it
+ }
}
}
}
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java?rev=1082973&r1=1082972&r2=1082973&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
Fri Mar 18 17:00:42 2011
@@ -184,4 +184,17 @@ public class JpegParserTest extends Test
assertTrue("'coast'" + " not in " + keywords,
keywords.contains("coast"));
assertTrue("'nature reserve'" + " not in " + keywords,
keywords.contains("nature reserve"));
}
+
+ public void testJPEGoddTagComponent() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+ InputStream stream =
+
getClass().getResourceAsStream("/test-documents/testJPEG_oddTagComponent.jpg");
+ parser.parse(stream, new DefaultHandler(), metadata, new
ParseContext());
+
+ assertEquals(null, metadata.get(Metadata.TITLE));
+ assertEquals(null, metadata.get(Metadata.DESCRIPTION));
+ assertEquals("251", metadata.get(Metadata.IMAGE_WIDTH));
+ assertEquals("384", metadata.get(Metadata.IMAGE_LENGTH));
+ }
}
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testJPEG_oddTagComponent.jpg
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testJPEG_oddTagComponent.jpg?rev=1082973&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
tika/trunk/tika-parsers/src/test/resources/test-documents/testJPEG_oddTagComponent.jpg
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream