Author: rgauss
Date: Sun Jul 29 23:18:10 2012
New Revision: 1366967
URL: http://svn.apache.org/viewvc?rev=1366967&view=rev
Log:
TIKA-811: Upgrade metadatExtractor version for OpenJDK 7 support
- Upgraded metadata-extractor to 2.6.2
- Refactored calls to metadata-extractor library methods and tags for new API
- Simplified use of JpegMetadataReader to use readMetadata method
- Updated TIFF parsing to utilize a temp File since metadata-extractor
method accepting InputStream is now deprecated
TIKA-915: Image geodata being rounded to integers
- Refactored GeotagHandler to use metadata-extractor's GeoLocation object
- Updated JpegParserTest for the better precision now available from
metadata-extractor library
- Enabled testJPEGGeo2 test
Modified:
tika/trunk/tika-parsers/pom.xml
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
Modified: tika/trunk/tika-parsers/pom.xml
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/pom.xml?rev=1366967&r1=1366966&r2=1366967&view=diff
==============================================================================
--- tika/trunk/tika-parsers/pom.xml (original)
+++ tika/trunk/tika-parsers/pom.xml Sun Jul 29 23:18:10 2012
@@ -163,7 +163,7 @@
<dependency>
<groupId>com.drewnoakes</groupId>
<artifactId>metadata-extractor</artifactId>
- <version>2.4.0-beta-1</version>
+ <version>2.6.2</version>
</dependency>
<dependency>
<groupId>de.l3s.boilerpipe</groupId>
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java?rev=1366967&r1=1366966&r2=1366967&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
Sun Jul 29 23:18:10 2012
@@ -18,43 +18,34 @@ package org.apache.tika.parser.image;
import java.io.File;
import java.io.IOException;
-import java.io.InputStream;
-import java.lang.reflect.Constructor;
-import java.text.DecimalFormat;
-import java.text.DecimalFormatSymbols;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Iterator;
-import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Geographic;
import org.apache.tika.metadata.IPTC;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.xml.sax.SAXException;
+import com.drew.imaging.jpeg.JpegMetadataReader;
import com.drew.imaging.jpeg.JpegProcessingException;
-import com.drew.imaging.jpeg.JpegSegmentReader;
import com.drew.imaging.tiff.TiffMetadataReader;
-import com.drew.imaging.tiff.TiffProcessingException;
+import com.drew.lang.GeoLocation;
import com.drew.lang.Rational;
import com.drew.metadata.Directory;
import com.drew.metadata.MetadataException;
-import com.drew.metadata.MetadataReader;
import com.drew.metadata.Tag;
-import com.drew.metadata.exif.ExifDirectory;
-import com.drew.metadata.exif.ExifReader;
+import com.drew.metadata.exif.ExifIFD0Directory;
+import com.drew.metadata.exif.ExifSubIFDDirectory;
+import com.drew.metadata.exif.ExifThumbnailDirectory;
import com.drew.metadata.exif.GpsDirectory;
import com.drew.metadata.iptc.IptcDirectory;
-import com.drew.metadata.iptc.IptcReader;
import com.drew.metadata.jpeg.JpegCommentDirectory;
-import com.drew.metadata.jpeg.JpegCommentReader;
import com.drew.metadata.jpeg.JpegDirectory;
-import com.drew.metadata.jpeg.JpegReader;
/**
* Uses the <a href="http://www.drewnoakes.com/code/exif/">Metadata
Extractor</a> library
@@ -93,51 +84,20 @@ public class ImageMetadataExtractor {
public void parseJpeg(File file)
throws IOException, SAXException, TikaException {
try {
- JpegSegmentReader reader = new JpegSegmentReader(file);
- extractMetadataFromSegment(
- reader, JpegSegmentReader.SEGMENT_APP1, ExifReader.class);
- extractMetadataFromSegment(
- reader, JpegSegmentReader.SEGMENT_APPD, IptcReader.class);
- extractMetadataFromSegment(
- reader, JpegSegmentReader.SEGMENT_SOF0, JpegReader.class);
- extractMetadataFromSegment(
- reader, JpegSegmentReader.SEGMENT_COM,
JpegCommentReader.class);
+ com.drew.metadata.Metadata jpegMetadata =
JpegMetadataReader.readMetadata(file);
+ handle(jpegMetadata);
} catch (JpegProcessingException e) {
throw new TikaException("Can't read JPEG metadata", e);
+ } catch (MetadataException e) {
+ throw new TikaException("Can't read JPEG metadata", e);
}
}
- private void extractMetadataFromSegment(
- JpegSegmentReader reader, byte marker,
- Class<? extends MetadataReader> klass) {
- try {
- Constructor<? extends MetadataReader> constructor =
- klass.getConstructor(byte[].class);
-
- int n = reader.getSegmentCount(marker);
- for (int i = 0; i < n; i++) {
- byte[] segment = reader.readSegment(marker, i);
-
- com.drew.metadata.Metadata metadata =
- new com.drew.metadata.Metadata();
- constructor.newInstance(segment).extract(metadata);
-
- handle(metadata);
- }
- } catch (Exception e) {
- // Unable to read this kind of metadata, so skip
- }
- }
-
- protected void parseTiff(InputStream stream)
+ protected void parseTiff(File file)
throws IOException, SAXException, TikaException {
try {
- com.drew.metadata.Metadata tiffMetadata =
- TiffMetadataReader.readMetadata(stream);
-
+ com.drew.metadata.Metadata tiffMetadata =
TiffMetadataReader.readMetadata(file);
handle(tiffMetadata);
- } catch (TiffProcessingException e) {
- throw new TikaException("Can't read TIFF metadata", e);
} catch (MetadataException e) {
throw new TikaException("Can't read TIFF metadata", e);
}
@@ -148,10 +108,9 @@ public class ImageMetadataExtractor {
* @param metadataExtractor Tag directories from a Metadata Extractor
"reader"
* @throws MetadataException This method does not handle exceptions from
Metadata Extractor
*/
- @SuppressWarnings("unchecked")
protected void handle(com.drew.metadata.Metadata metadataExtractor)
throws MetadataException {
- handle(metadataExtractor.getDirectoryIterator());
+ handle(metadataExtractor.getDirectories().iterator());
}
/**
@@ -175,7 +134,7 @@ public class ImageMetadataExtractor {
*/
static interface DirectoryHandler {
/**
- * @param directorySubclass A Metadata Extractor directory class
+ * @param directoryType A Metadata Extractor directory class
* @return true if the directory type is supported by this handler
*/
boolean supports(Class<? extends Directory> directoryType);
@@ -198,10 +157,12 @@ public class ImageMetadataExtractor {
}
public void handle(Directory directory, Metadata metadata)
throws MetadataException {
- Iterator<?> tags = directory.getTagIterator();
- while (tags.hasNext()) {
- Tag tag = (Tag) tags.next();
- metadata.set(tag.getTagName(), tag.getDescription());
+ if (directory.getTags() != null) {
+ Iterator<?> tags = directory.getTags().iterator();
+ while (tags.hasNext()) {
+ Tag tag = (Tag) tags.next();
+ metadata.set(tag.getTagName(), tag.getDescription());
+ }
}
}
}
@@ -217,23 +178,20 @@ public class ImageMetadataExtractor {
}
public void handle(Directory directory, Metadata metadata)
throws MetadataException {
- Iterator<?> tags = directory.getTagIterator();
- while (tags.hasNext()) {
- Tag tag = (Tag) tags.next();
- String name = tag.getTagName();
- if (!MetadataFields.isMetadataField(name)) {
- try {
- String value = tag.getDescription().trim();
- if (Boolean.TRUE.toString().equalsIgnoreCase(value)) {
- value = Boolean.TRUE.toString();
- } else if
(Boolean.FALSE.toString().equalsIgnoreCase(value)) {
- value = Boolean.FALSE.toString();
- }
- metadata.set(name, value);
- } catch(MetadataException e) {
- // Either something's corrupt, or it's a JPEG tag
- // that the library doesn't know about. Skip it
- }
+ if (directory.getTags() != null) {
+ Iterator<?> tags = directory.getTags().iterator();
+ while (tags.hasNext()) {
+ Tag tag = (Tag) tags.next();
+ String name = tag.getTagName();
+ if (!MetadataFields.isMetadataField(name)) {
+ String value = tag.getDescription().trim();
+ if (Boolean.TRUE.toString().equalsIgnoreCase(value))
{
+ value = Boolean.TRUE.toString();
+ } else if
(Boolean.FALSE.toString().equalsIgnoreCase(value)) {
+ value = Boolean.FALSE.toString();
+ }
+ metadata.set(name, value);
+ }
}
}
}
@@ -245,22 +203,25 @@ public class ImageMetadataExtractor {
static class DimensionsHandler implements DirectoryHandler {
private final Pattern LEADING_NUMBERS =
Pattern.compile("(\\d+)\\s*.*");
public boolean supports(Class<? extends Directory> directoryType) {
- return directoryType == JpegDirectory.class || directoryType ==
ExifDirectory.class;
+ return directoryType == JpegDirectory.class ||
+ directoryType == ExifSubIFDDirectory.class ||
+ directoryType == ExifThumbnailDirectory.class ||
+ directoryType == ExifIFD0Directory.class;
}
public void handle(Directory directory, Metadata metadata) throws
MetadataException {
// The test TIFF has width and height stored as follows according
to exiv2
//Exif.Image.ImageWidth Short 1 100
//Exif.Image.ImageLength Short 1 75
// and the values are found in "Thumbnail Image Width" (and
Height) from Metadata Extractor
- set(directory, metadata, ExifDirectory.TAG_THUMBNAIL_IMAGE_WIDTH,
Metadata.IMAGE_WIDTH);
+ set(directory, metadata,
ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_WIDTH, Metadata.IMAGE_WIDTH);
set(directory, metadata, JpegDirectory.TAG_JPEG_IMAGE_WIDTH,
Metadata.IMAGE_WIDTH);
- set(directory, metadata, ExifDirectory.TAG_THUMBNAIL_IMAGE_HEIGHT,
Metadata.IMAGE_LENGTH);
+ set(directory, metadata,
ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_HEIGHT, Metadata.IMAGE_LENGTH);
set(directory, metadata, JpegDirectory.TAG_JPEG_IMAGE_HEIGHT,
Metadata.IMAGE_LENGTH);
// Bits per sample, two methods of extracting, exif overrides jpeg
set(directory, metadata, JpegDirectory.TAG_JPEG_DATA_PRECISION,
Metadata.BITS_PER_SAMPLE);
- set(directory, metadata, ExifDirectory.TAG_BITS_PER_SAMPLE,
Metadata.BITS_PER_SAMPLE);
+ set(directory, metadata, ExifSubIFDDirectory.TAG_BITS_PER_SAMPLE,
Metadata.BITS_PER_SAMPLE);
// Straightforward
- set(directory, metadata, ExifDirectory.TAG_SAMPLES_PER_PIXEL,
Metadata.SAMPLES_PER_PIXEL);
+ set(directory, metadata,
ExifSubIFDDirectory.TAG_SAMPLES_PER_PIXEL, Metadata.SAMPLES_PER_PIXEL);
}
private void set(Directory directory, Metadata metadata, int
extractTag, Property metadataField) {
if (directory.containsTag(extractTag)) {
@@ -286,7 +247,8 @@ public class ImageMetadataExtractor {
static class ExifHandler implements DirectoryHandler {
private static final SimpleDateFormat DATE_UNSPECIFIED_TZ = new
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
public boolean supports(Class<? extends Directory> directoryType) {
- return directoryType == ExifDirectory.class;
+ return directoryType == ExifIFD0Directory.class ||
+ directoryType == ExifSubIFDDirectory.class;
}
public void handle(Directory directory, Metadata metadata) {
try {
@@ -303,8 +265,9 @@ public class ImageMetadataExtractor {
*/
public void handleCommentTags(Directory directory, Metadata metadata) {
if (metadata.get(TikaCoreProperties.DESCRIPTION) == null &&
-
directory.containsTag(ExifDirectory.TAG_IMAGE_DESCRIPTION)) {
- metadata.set(TikaCoreProperties.DESCRIPTION,
directory.getString(ExifDirectory.TAG_IMAGE_DESCRIPTION));
+
directory.containsTag(ExifIFD0Directory.TAG_IMAGE_DESCRIPTION)) {
+ metadata.set(TikaCoreProperties.DESCRIPTION,
+
directory.getString(ExifIFD0Directory.TAG_IMAGE_DESCRIPTION));
}
}
/**
@@ -312,22 +275,17 @@ public class ImageMetadataExtractor {
* TIFF image metadata namespace.
*/
public void handlePhotoTags(Directory directory, Metadata metadata) {
- if(directory.containsTag(ExifDirectory.TAG_EXPOSURE_TIME)) {
- Object exposure =
directory.getObject(ExifDirectory.TAG_EXPOSURE_TIME);
+ if(directory.containsTag(ExifSubIFDDirectory.TAG_EXPOSURE_TIME)) {
+ Object exposure =
directory.getObject(ExifSubIFDDirectory.TAG_EXPOSURE_TIME);
if(exposure instanceof Rational) {
metadata.set(Metadata.EXPOSURE_TIME,
((Rational)exposure).doubleValue());
} else {
- metadata.set(Metadata.EXPOSURE_TIME,
directory.getString(ExifDirectory.TAG_EXPOSURE_TIME));
+ metadata.set(Metadata.EXPOSURE_TIME,
directory.getString(ExifSubIFDDirectory.TAG_EXPOSURE_TIME));
}
}
- if(directory.containsTag(ExifDirectory.TAG_FLASH)) {
- String flash = "";
- try {
- flash = directory.getDescription(ExifDirectory.TAG_FLASH);
- } catch (MetadataException e) {
- // ignore
- }
+ if(directory.containsTag(ExifSubIFDDirectory.TAG_FLASH)) {
+ String flash =
directory.getDescription(ExifSubIFDDirectory.TAG_FLASH);
if(flash.indexOf("Flash fired") > -1) {
metadata.set(Metadata.FLASH_FIRED, Boolean.TRUE.toString());
}
@@ -339,70 +297,72 @@ public class ImageMetadataExtractor {
}
}
- if(directory.containsTag(ExifDirectory.TAG_FNUMBER)) {
- Object fnumber = directory.getObject(ExifDirectory.TAG_FNUMBER);
+ if(directory.containsTag(ExifSubIFDDirectory.TAG_FNUMBER)) {
+ Object fnumber =
directory.getObject(ExifSubIFDDirectory.TAG_FNUMBER);
if(fnumber instanceof Rational) {
metadata.set(Metadata.F_NUMBER,
((Rational)fnumber).doubleValue());
} else {
- metadata.set(Metadata.F_NUMBER,
directory.getString(ExifDirectory.TAG_FNUMBER));
+ metadata.set(Metadata.F_NUMBER,
directory.getString(ExifSubIFDDirectory.TAG_FNUMBER));
}
}
- if(directory.containsTag(ExifDirectory.TAG_FOCAL_LENGTH)) {
- Object length =
directory.getObject(ExifDirectory.TAG_FOCAL_LENGTH);
+ if(directory.containsTag(ExifSubIFDDirectory.TAG_FOCAL_LENGTH)) {
+ Object length =
directory.getObject(ExifSubIFDDirectory.TAG_FOCAL_LENGTH);
if(length instanceof Rational) {
metadata.set(Metadata.FOCAL_LENGTH,
((Rational)length).doubleValue());
} else {
- metadata.set(Metadata.FOCAL_LENGTH,
directory.getString(ExifDirectory.TAG_FOCAL_LENGTH));
+ metadata.set(Metadata.FOCAL_LENGTH,
directory.getString(ExifSubIFDDirectory.TAG_FOCAL_LENGTH));
}
}
- if(directory.containsTag(ExifDirectory.TAG_ISO_EQUIVALENT)) {
- metadata.set(Metadata.ISO_SPEED_RATINGS,
directory.getString(ExifDirectory.TAG_ISO_EQUIVALENT));
+ if(directory.containsTag(ExifSubIFDDirectory.TAG_ISO_EQUIVALENT)) {
+ metadata.set(Metadata.ISO_SPEED_RATINGS,
directory.getString(ExifSubIFDDirectory.TAG_ISO_EQUIVALENT));
}
- if(directory.containsTag(ExifDirectory.TAG_MAKE)) {
- metadata.set(Metadata.EQUIPMENT_MAKE,
directory.getString(ExifDirectory.TAG_MAKE));
+ if(directory.containsTag(ExifIFD0Directory.TAG_MAKE)) {
+ metadata.set(Metadata.EQUIPMENT_MAKE,
directory.getString(ExifIFD0Directory.TAG_MAKE));
}
- if(directory.containsTag(ExifDirectory.TAG_MODEL)) {
- metadata.set(Metadata.EQUIPMENT_MODEL,
directory.getString(ExifDirectory.TAG_MODEL));
+ if(directory.containsTag(ExifIFD0Directory.TAG_MODEL)) {
+ metadata.set(Metadata.EQUIPMENT_MODEL,
directory.getString(ExifIFD0Directory.TAG_MODEL));
}
- if(directory.containsTag(ExifDirectory.TAG_ORIENTATION)) {
- Object length =
directory.getObject(ExifDirectory.TAG_ORIENTATION);
+ if(directory.containsTag(ExifIFD0Directory.TAG_ORIENTATION)) {
+ Object length =
directory.getObject(ExifIFD0Directory.TAG_ORIENTATION);
if(length instanceof Integer) {
metadata.set(Metadata.ORIENTATION, Integer.toString(
((Integer)length).intValue() ));
} else {
- metadata.set(Metadata.ORIENTATION,
directory.getString(ExifDirectory.TAG_ORIENTATION));
+ metadata.set(Metadata.ORIENTATION,
directory.getString(ExifIFD0Directory.TAG_ORIENTATION));
}
}
- if(directory.containsTag(ExifDirectory.TAG_SOFTWARE)) {
- metadata.set(Metadata.SOFTWARE,
directory.getString(ExifDirectory.TAG_SOFTWARE));
+ if(directory.containsTag(ExifIFD0Directory.TAG_SOFTWARE)) {
+ metadata.set(Metadata.SOFTWARE,
directory.getString(ExifIFD0Directory.TAG_SOFTWARE));
}
- if(directory.containsTag(ExifDirectory.TAG_X_RESOLUTION)) {
- Object resolution =
directory.getObject(ExifDirectory.TAG_X_RESOLUTION);
+ if(directory.containsTag(ExifIFD0Directory.TAG_X_RESOLUTION)) {
+ Object resolution =
directory.getObject(ExifIFD0Directory.TAG_X_RESOLUTION);
if(resolution instanceof Rational) {
metadata.set(Metadata.RESOLUTION_HORIZONTAL,
((Rational)resolution).doubleValue());
} else {
- metadata.set(Metadata.RESOLUTION_HORIZONTAL,
directory.getString(ExifDirectory.TAG_X_RESOLUTION));
+ metadata.set(Metadata.RESOLUTION_HORIZONTAL,
directory.getString(ExifIFD0Directory.TAG_X_RESOLUTION));
}
}
- if(directory.containsTag(ExifDirectory.TAG_Y_RESOLUTION)) {
- Object resolution =
directory.getObject(ExifDirectory.TAG_Y_RESOLUTION);
+ if(directory.containsTag(ExifIFD0Directory.TAG_Y_RESOLUTION)) {
+ Object resolution =
directory.getObject(ExifIFD0Directory.TAG_Y_RESOLUTION);
if(resolution instanceof Rational) {
metadata.set(Metadata.RESOLUTION_VERTICAL,
((Rational)resolution).doubleValue());
} else {
- metadata.set(Metadata.RESOLUTION_VERTICAL,
directory.getString(ExifDirectory.TAG_Y_RESOLUTION));
+ metadata.set(Metadata.RESOLUTION_VERTICAL,
directory.getString(ExifIFD0Directory.TAG_Y_RESOLUTION));
}
}
- if(directory.containsTag(ExifDirectory.TAG_RESOLUTION_UNIT)) {
- try {
- metadata.set(Metadata.RESOLUTION_UNIT,
directory.getDescription(ExifDirectory.TAG_RESOLUTION_UNIT));
- } catch (MetadataException e) {
- // ignore
- }
+ if(directory.containsTag(ExifIFD0Directory.TAG_RESOLUTION_UNIT)) {
+ metadata.set(Metadata.RESOLUTION_UNIT,
directory.getDescription(ExifIFD0Directory.TAG_RESOLUTION_UNIT));
+ }
+
if(directory.containsTag(ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_WIDTH)) {
+ metadata.set(Metadata.IMAGE_WIDTH,
directory.getDescription(ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_WIDTH));
+ }
+
if(directory.containsTag(ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_HEIGHT)) {
+ metadata.set(Metadata.IMAGE_LENGTH,
directory.getDescription(ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_HEIGHT));
}
}
/**
@@ -412,20 +372,22 @@ public class ImageMetadataExtractor {
throws MetadataException {
// Date/Time Original overrides value from
ExifDirectory.TAG_DATETIME
Date original = null;
- if (directory.containsTag(ExifDirectory.TAG_DATETIME_ORIGINAL)) {
- original =
directory.getDate(ExifDirectory.TAG_DATETIME_ORIGINAL);
+ if
(directory.containsTag(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)) {
+ original =
directory.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL);
// Unless we have GPS time we don't know the time zone so date
must be set
// as ISO 8601 datetime without timezone suffix (no Z or +/-)
- String datetimeNoTimeZone =
DATE_UNSPECIFIED_TZ.format(original); // Same time zone as Metadata Extractor
uses
- metadata.set(TikaCoreProperties.CREATED, datetimeNoTimeZone);
- metadata.set(Metadata.ORIGINAL_DATE, datetimeNoTimeZone);
+ if (original != null) {
+ String datetimeNoTimeZone =
DATE_UNSPECIFIED_TZ.format(original); // Same time zone as Metadata Extractor
uses
+ metadata.set(TikaCoreProperties.CREATED,
datetimeNoTimeZone);
+ metadata.set(Metadata.ORIGINAL_DATE, datetimeNoTimeZone);
+ }
}
- if (directory.containsTag(ExifDirectory.TAG_DATETIME)) {
- Date datetime = directory.getDate(ExifDirectory.TAG_DATETIME);
+ if (directory.containsTag(ExifIFD0Directory.TAG_DATETIME)) {
+ Date datetime =
directory.getDate(ExifIFD0Directory.TAG_DATETIME);
String datetimeNoTimeZone =
DATE_UNSPECIFIED_TZ.format(datetime);
metadata.set(TikaCoreProperties.MODIFIED, datetimeNoTimeZone);
// If Date/Time Original does not exist this might be creation
date
- if (original == null) {
+ if (metadata.get(TikaCoreProperties.CREATED) == null) {
metadata.set(TikaCoreProperties.CREATED,
datetimeNoTimeZone);
}
}
@@ -474,52 +436,12 @@ public class ImageMetadataExtractor {
return directoryType == GpsDirectory.class;
}
public void handle(Directory directory, Metadata metadata) throws
MetadataException {
- String lat =
directory.getDescription(GpsDirectory.TAG_GPS_LATITUDE);
- String latNS =
directory.getDescription(GpsDirectory.TAG_GPS_LATITUDE_REF);
- if(lat != null) {
- Double latitude = parseHMS(lat);
- if(latitude != null) {
- if(latNS != null && latNS.equalsIgnoreCase("S") &&
- latitude > 0) {
- latitude *= -1;
- }
- metadata.set(TikaCoreProperties.LATITUDE,
LAT_LONG_FORMAT.format(latitude));
- }
- }
-
- String lng =
directory.getDescription(GpsDirectory.TAG_GPS_LONGITUDE);
- String lngEW =
directory.getDescription(GpsDirectory.TAG_GPS_LONGITUDE_REF);
- if(lng != null) {
- Double longitude = parseHMS(lng);
- if(longitude != null) {
- if(lngEW != null && lngEW.equalsIgnoreCase("W") &&
- longitude > 0) {
- longitude *= -1;
- }
- metadata.set(TikaCoreProperties.LONGITUDE,
LAT_LONG_FORMAT.format(longitude));
- }
+ GeoLocation geoLocation = ((GpsDirectory)
directory).getGeoLocation();
+ if (geoLocation != null) {
+ metadata.set(TikaCoreProperties.LATITUDE,
geoLocation.getLatitude());
+ metadata.set(TikaCoreProperties.LONGITUDE,
geoLocation.getLongitude());
}
}
- private Double parseHMS(String hms) {
- Matcher m = HOURS_MINUTES_SECONDS.matcher(hms);
- if(m.matches()) {
- double value =
- Integer.parseInt(m.group(1)) +
- (Integer.parseInt(m.group(2))/60.0) +
- (Double.parseDouble(m.group(3))/60.0/60.0);
- return value;
- }
- return null;
- }
- private static final Pattern HOURS_MINUTES_SECONDS =
Pattern.compile("(-?\\d+)\"(\\d+)'(\\d+\\.?\\d*)");
- /**
- * The decimal format used for expressing latitudes and longitudes.
- * The basic geo vocabulary defined by W3C (@see {@link Geographic})
- * refers to the "float" type in XML Schema as the recommended format
- * for latitude and longitude values.
- */
- private static final DecimalFormat LAT_LONG_FORMAT =
- new DecimalFormat("##0.0####", new
DecimalFormatSymbols(Locale.US));
}
}
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java?rev=1366967&r1=1366966&r2=1366967&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java
Sun Jul 29 23:18:10 2012
@@ -16,13 +16,14 @@
*/
package org.apache.tika.parser.image;
-import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.Set;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
@@ -48,18 +49,14 @@ public class TiffParser extends Abstract
InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
-
- // read stream twice - exif and xmp extractors
- stream.mark(Integer.MAX_VALUE);
- FilterInputStream first = new FilterInputStream(stream) {
- @Override
- public void close() throws IOException {
- }
- };
- new ImageMetadataExtractor(metadata).parseTiff(first);
- stream.reset();
-
- new JempboxExtractor(metadata).parse(stream);
+ TemporaryResources tmp = new TemporaryResources();
+ try {
+ TikaInputStream tis = TikaInputStream.get(stream, tmp);
+ new ImageMetadataExtractor(metadata).parseTiff(tis.getFile());
+ new JempboxExtractor(metadata).parse(tis);
+ } finally {
+ tmp.dispose();
+ }
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java?rev=1366967&r1=1366966&r2=1366967&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
Sun Jul 29 23:18:10 2012
@@ -19,6 +19,7 @@ package org.apache.tika.parser.image;
import java.util.Arrays;
import java.util.GregorianCalendar;
import java.util.Iterator;
+import java.util.List;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
@@ -26,7 +27,8 @@ import org.apache.tika.metadata.TikaCore
import com.drew.metadata.Directory;
import com.drew.metadata.MetadataException;
import com.drew.metadata.Tag;
-import com.drew.metadata.exif.ExifDirectory;
+import com.drew.metadata.exif.ExifIFD0Directory;
+import com.drew.metadata.exif.ExifSubIFDDirectory;
import com.drew.metadata.jpeg.JpegCommentDirectory;
import junit.framework.TestCase;
@@ -53,15 +55,16 @@ public class ImageMetadataExtractorTest
}
public void testExifHandlerSupports() {
- assertTrue(new
ImageMetadataExtractor.ExifHandler().supports(ExifDirectory.class));
+ assertTrue(new
ImageMetadataExtractor.ExifHandler().supports(ExifIFD0Directory.class));
+ assertTrue(new
ImageMetadataExtractor.ExifHandler().supports(ExifSubIFDDirectory.class));
assertFalse(new
ImageMetadataExtractor.ExifHandler().supports(Directory.class));
assertFalse(new
ImageMetadataExtractor.ExifHandler().supports(JpegCommentDirectory.class));
}
public void testExifHandlerParseDate() throws MetadataException {
- ExifDirectory exif = mock(ExifDirectory.class);
-
when(exif.containsTag(ExifDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(true);
- when(exif.getDate(ExifDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(
+ ExifSubIFDDirectory exif = mock(ExifSubIFDDirectory.class);
+
when(exif.containsTag(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(true);
+
when(exif.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(
new GregorianCalendar(2000, 0, 1, 0, 0, 0).getTime()); // jvm
default timezone as in Metadata Extractor
Metadata metadata = new Metadata();
@@ -71,9 +74,9 @@ public class ImageMetadataExtractorTest
}
public void testExifHandlerParseDateFallback() throws MetadataException {
- ExifDirectory exif = mock(ExifDirectory.class);
- when(exif.containsTag(ExifDirectory.TAG_DATETIME)).thenReturn(true);
- when(exif.getDate(ExifDirectory.TAG_DATETIME)).thenReturn(
+ ExifIFD0Directory exif = mock(ExifIFD0Directory.class);
+
when(exif.containsTag(ExifIFD0Directory.TAG_DATETIME)).thenReturn(true);
+ when(exif.getDate(ExifIFD0Directory.TAG_DATETIME)).thenReturn(
new GregorianCalendar(1999, 0, 1, 0, 0, 0).getTime()); // jvm
default timezone as in Metadata Extractor
Metadata metadata = new Metadata();
@@ -83,10 +86,9 @@ public class ImageMetadataExtractorTest
}
public void testExifHandlerParseDateError() throws MetadataException {
- ExifDirectory exif = mock(ExifDirectory.class);
-
when(exif.containsTag(ExifDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(true);
- when(exif.getDate(ExifDirectory.TAG_DATETIME_ORIGINAL)).thenThrow(
- new MetadataException("Tag 'X' cannot be cast to a
java.util.Date."));
+ ExifIFD0Directory exif = mock(ExifIFD0Directory.class);
+
when(exif.containsTag(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(true);
+
when(exif.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(null);
Metadata metadata = new Metadata();
new ImageMetadataExtractor.ExifHandler().handle(exif, metadata);
@@ -105,8 +107,8 @@ public class ImageMetadataExtractorTest
Tag t3 = mock(Tag.class);
when(t3.getTagName()).thenReturn(TikaCoreProperties.DESCRIPTION.getName());
when(t3.getDescription()).thenReturn("known");
- Iterator<Tag> tags = Arrays.asList(t1, t2, t3).iterator();
- when(d.getTagIterator()).thenReturn(tags);
+ List<Tag> tags = Arrays.asList(t1, t2, t3);
+ when(d.getTags()).thenReturn(tags);
Metadata metadata = new Metadata();
new ImageMetadataExtractor.CopyUnknownFieldsHandler().handle(d,
metadata);
assertEquals("t1", metadata.get("Image Description"));
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java?rev=1366967&r1=1366966&r2=1366967&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
Sun Jul 29 23:18:10 2012
@@ -17,6 +17,7 @@
package org.apache.tika.parser.jpeg;
import java.io.InputStream;
+import java.text.DecimalFormat;
import java.util.Arrays;
import java.util.List;
@@ -31,6 +32,8 @@ import org.xml.sax.helpers.DefaultHandle
public class JpegParserTest extends TestCase {
private final Parser parser = new JpegParser();
+
+ private DecimalFormat geoDecimalFormatter = new DecimalFormat("#.#####");
public void testJPEG() throws Exception {
Metadata metadata = new Metadata();
@@ -90,8 +93,8 @@ public class JpegParserTest extends Test
parser.parse(stream, new DefaultHandler(), metadata, new
ParseContext());
// Geo tags
- assertEquals("12.54321", metadata.get(Metadata.LATITUDE));
- assertEquals("-54.1234", metadata.get(Metadata.LONGITUDE));
+ assertEquals("12.54321", geoDecimalFormatter.format(new
Double(metadata.get(Metadata.LATITUDE))));
+ assertEquals("-54.1234", geoDecimalFormatter.format(new
Double(metadata.get(Metadata.LONGITUDE))));
// Core EXIF/TIFF tags
assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
@@ -128,7 +131,7 @@ public class JpegParserTest extends Test
* different way, see TIKA-915 for details
* Disabled for now, pending a fix to the underlying library
*/
- public void DISABLEDtestJPEGGeo2() throws Exception {
+ public void testJPEGGeo2() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
InputStream stream =
@@ -136,8 +139,8 @@ public class JpegParserTest extends Test
parser.parse(stream, new DefaultHandler(), metadata, new
ParseContext());
// Geo tags should be there with 5dp, and not rounded
- assertEquals("51.57576", metadata.get(Metadata.LATITUDE));
- assertEquals("-1.56788", metadata.get(Metadata.LONGITUDE));
+ assertEquals("51.57576", geoDecimalFormatter.format(new
Double(metadata.get(Metadata.LATITUDE))));
+ assertEquals("-1.56789", geoDecimalFormatter.format(new
Double(metadata.get(Metadata.LONGITUDE))));
}
public void testJPEGTitleAndDescription() throws Exception {