Author: rgauss
Date: Sun Jul 29 23:18:10 2012
New Revision: 1366967

URL: http://svn.apache.org/viewvc?rev=1366967&view=rev
Log:
TIKA-811: Upgrade metadatExtractor version for OpenJDK 7 support
   - Upgraded metadata-extractor to 2.6.2
   - Refactored calls to metadata-extractor library methods and tags for new API
   - Simplified use of JpegMetadataReader to use readMetadata method
   - Updated TIFF parsing to utilize a temp File since metadata-extractor 
method accepting InputStream is now deprecated
TIKA-915: Image geodata being rounded to integers
   - Refactored GeotagHandler to use metadata-extractor's GeoLocation object
   - Updated JpegParserTest for the better precision now available from 
metadata-extractor library
   - Enabled testJPEGGeo2 test

Modified:
    tika/trunk/tika-parsers/pom.xml
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java

Modified: tika/trunk/tika-parsers/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/pom.xml?rev=1366967&r1=1366966&r2=1366967&view=diff
==============================================================================
--- tika/trunk/tika-parsers/pom.xml (original)
+++ tika/trunk/tika-parsers/pom.xml Sun Jul 29 23:18:10 2012
@@ -163,7 +163,7 @@
     <dependency>
        <groupId>com.drewnoakes</groupId>
        <artifactId>metadata-extractor</artifactId>
-       <version>2.4.0-beta-1</version>
+       <version>2.6.2</version>
     </dependency>
     <dependency>
       <groupId>de.l3s.boilerpipe</groupId>

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java?rev=1366967&r1=1366966&r2=1366967&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
 Sun Jul 29 23:18:10 2012
@@ -18,43 +18,34 @@ package org.apache.tika.parser.image;
 
 import java.io.File;
 import java.io.IOException;
-import java.io.InputStream;
-import java.lang.reflect.Constructor;
-import java.text.DecimalFormat;
-import java.text.DecimalFormatSymbols;
 import java.text.SimpleDateFormat;
 import java.util.Date;
 import java.util.Iterator;
-import java.util.Locale;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Geographic;
 import org.apache.tika.metadata.IPTC;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.Property;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.xml.sax.SAXException;
 
+import com.drew.imaging.jpeg.JpegMetadataReader;
 import com.drew.imaging.jpeg.JpegProcessingException;
-import com.drew.imaging.jpeg.JpegSegmentReader;
 import com.drew.imaging.tiff.TiffMetadataReader;
-import com.drew.imaging.tiff.TiffProcessingException;
+import com.drew.lang.GeoLocation;
 import com.drew.lang.Rational;
 import com.drew.metadata.Directory;
 import com.drew.metadata.MetadataException;
-import com.drew.metadata.MetadataReader;
 import com.drew.metadata.Tag;
-import com.drew.metadata.exif.ExifDirectory;
-import com.drew.metadata.exif.ExifReader;
+import com.drew.metadata.exif.ExifIFD0Directory;
+import com.drew.metadata.exif.ExifSubIFDDirectory;
+import com.drew.metadata.exif.ExifThumbnailDirectory;
 import com.drew.metadata.exif.GpsDirectory;
 import com.drew.metadata.iptc.IptcDirectory;
-import com.drew.metadata.iptc.IptcReader;
 import com.drew.metadata.jpeg.JpegCommentDirectory;
-import com.drew.metadata.jpeg.JpegCommentReader;
 import com.drew.metadata.jpeg.JpegDirectory;
-import com.drew.metadata.jpeg.JpegReader;
 
 /**
  * Uses the <a href="http://www.drewnoakes.com/code/exif/";>Metadata 
Extractor</a> library
@@ -93,51 +84,20 @@ public class ImageMetadataExtractor {
     public void parseJpeg(File file)
             throws IOException, SAXException, TikaException {
         try {
-            JpegSegmentReader reader = new JpegSegmentReader(file);
-            extractMetadataFromSegment(
-                    reader, JpegSegmentReader.SEGMENT_APP1, ExifReader.class);
-            extractMetadataFromSegment(
-                    reader, JpegSegmentReader.SEGMENT_APPD, IptcReader.class);
-            extractMetadataFromSegment(
-                    reader, JpegSegmentReader.SEGMENT_SOF0, JpegReader.class);
-            extractMetadataFromSegment(
-                    reader, JpegSegmentReader.SEGMENT_COM, 
JpegCommentReader.class);
+            com.drew.metadata.Metadata jpegMetadata = 
JpegMetadataReader.readMetadata(file);
+            handle(jpegMetadata);
         } catch (JpegProcessingException e) {
             throw new TikaException("Can't read JPEG metadata", e);
+        } catch (MetadataException e) {
+            throw new TikaException("Can't read JPEG metadata", e);
         }
     }
 
-    private void extractMetadataFromSegment(
-            JpegSegmentReader reader, byte marker,
-            Class<? extends MetadataReader> klass) {
-        try {
-            Constructor<? extends MetadataReader> constructor =
-                    klass.getConstructor(byte[].class);
-
-            int n = reader.getSegmentCount(marker);
-            for (int i = 0; i < n; i++) {
-                byte[] segment = reader.readSegment(marker, i);
-
-                com.drew.metadata.Metadata metadata =
-                        new com.drew.metadata.Metadata();
-                constructor.newInstance(segment).extract(metadata);
-
-                handle(metadata);
-            }
-        } catch (Exception e) {
-            // Unable to read this kind of metadata, so skip
-        }
-    }
-
-    protected void parseTiff(InputStream stream)
+    protected void parseTiff(File file)
             throws IOException, SAXException, TikaException {
         try {
-            com.drew.metadata.Metadata tiffMetadata =
-                TiffMetadataReader.readMetadata(stream);
-
+            com.drew.metadata.Metadata tiffMetadata = 
TiffMetadataReader.readMetadata(file);
             handle(tiffMetadata);
-        } catch (TiffProcessingException e) {
-            throw new TikaException("Can't read TIFF metadata", e);
         } catch (MetadataException e) {
             throw new TikaException("Can't read TIFF metadata", e);
         }
@@ -148,10 +108,9 @@ public class ImageMetadataExtractor {
      * @param metadataExtractor Tag directories from a Metadata Extractor 
"reader"
      * @throws MetadataException This method does not handle exceptions from 
Metadata Extractor
      */
-    @SuppressWarnings("unchecked")
     protected void handle(com.drew.metadata.Metadata metadataExtractor) 
             throws MetadataException {
-        handle(metadataExtractor.getDirectoryIterator());
+        handle(metadataExtractor.getDirectories().iterator());
     }
 
     /**
@@ -175,7 +134,7 @@ public class ImageMetadataExtractor {
      */
     static interface DirectoryHandler {
         /**
-         * @param directorySubclass A Metadata Extractor directory class
+         * @param directoryType A Metadata Extractor directory class
          * @return true if the directory type is supported by this handler
          */
         boolean supports(Class<? extends Directory> directoryType);
@@ -198,10 +157,12 @@ public class ImageMetadataExtractor {
         }
         public void handle(Directory directory, Metadata metadata)
                 throws MetadataException {
-            Iterator<?> tags = directory.getTagIterator();
-            while (tags.hasNext()) {
-                Tag tag = (Tag) tags.next();
-                metadata.set(tag.getTagName(), tag.getDescription());
+            if (directory.getTags() != null) {
+                Iterator<?> tags = directory.getTags().iterator();
+                while (tags.hasNext()) {
+                    Tag tag = (Tag) tags.next();
+                    metadata.set(tag.getTagName(), tag.getDescription());
+                }
             }
         }
     }    
@@ -217,23 +178,20 @@ public class ImageMetadataExtractor {
         }
         public void handle(Directory directory, Metadata metadata)
                 throws MetadataException {
-            Iterator<?> tags = directory.getTagIterator();
-            while (tags.hasNext()) {
-                Tag tag = (Tag) tags.next();
-                String name = tag.getTagName();
-                if (!MetadataFields.isMetadataField(name)) {
-                   try {
-                      String value = tag.getDescription().trim();
-                      if (Boolean.TRUE.toString().equalsIgnoreCase(value)) {
-                          value = Boolean.TRUE.toString();
-                      } else if 
(Boolean.FALSE.toString().equalsIgnoreCase(value)) {
-                          value = Boolean.FALSE.toString();
-                      }
-                      metadata.set(name, value);
-                   } catch(MetadataException e) {
-                      // Either something's corrupt, or it's a JPEG tag
-                      //  that the library doesn't know about. Skip it
-                   }
+            if (directory.getTags() != null) {
+                Iterator<?> tags = directory.getTags().iterator();
+                while (tags.hasNext()) {
+                    Tag tag = (Tag) tags.next();
+                    String name = tag.getTagName();
+                    if (!MetadataFields.isMetadataField(name)) {
+                          String value = tag.getDescription().trim();
+                          if (Boolean.TRUE.toString().equalsIgnoreCase(value)) 
{
+                              value = Boolean.TRUE.toString();
+                          } else if 
(Boolean.FALSE.toString().equalsIgnoreCase(value)) {
+                              value = Boolean.FALSE.toString();
+                          }
+                          metadata.set(name, value);
+                    }
                 }
             }
         }
@@ -245,22 +203,25 @@ public class ImageMetadataExtractor {
     static class DimensionsHandler implements DirectoryHandler {
         private final Pattern LEADING_NUMBERS = 
Pattern.compile("(\\d+)\\s*.*");
         public boolean supports(Class<? extends Directory> directoryType) {
-            return directoryType == JpegDirectory.class || directoryType == 
ExifDirectory.class;
+            return directoryType == JpegDirectory.class || 
+                        directoryType == ExifSubIFDDirectory.class ||
+                        directoryType == ExifThumbnailDirectory.class ||
+                        directoryType == ExifIFD0Directory.class;
         }
         public void handle(Directory directory, Metadata metadata) throws 
MetadataException {
             // The test TIFF has width and height stored as follows according 
to exiv2
             //Exif.Image.ImageWidth                        Short       1  100
             //Exif.Image.ImageLength                       Short       1  75
             // and the values are found in "Thumbnail Image Width" (and 
Height) from Metadata Extractor
-            set(directory, metadata, ExifDirectory.TAG_THUMBNAIL_IMAGE_WIDTH, 
Metadata.IMAGE_WIDTH);
+            set(directory, metadata, 
ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_WIDTH, Metadata.IMAGE_WIDTH);
             set(directory, metadata, JpegDirectory.TAG_JPEG_IMAGE_WIDTH, 
Metadata.IMAGE_WIDTH);
-            set(directory, metadata, ExifDirectory.TAG_THUMBNAIL_IMAGE_HEIGHT, 
Metadata.IMAGE_LENGTH);
+            set(directory, metadata, 
ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_HEIGHT, Metadata.IMAGE_LENGTH);
             set(directory, metadata, JpegDirectory.TAG_JPEG_IMAGE_HEIGHT, 
Metadata.IMAGE_LENGTH);
             // Bits per sample, two methods of extracting, exif overrides jpeg
             set(directory, metadata, JpegDirectory.TAG_JPEG_DATA_PRECISION, 
Metadata.BITS_PER_SAMPLE);
-            set(directory, metadata, ExifDirectory.TAG_BITS_PER_SAMPLE, 
Metadata.BITS_PER_SAMPLE);
+            set(directory, metadata, ExifSubIFDDirectory.TAG_BITS_PER_SAMPLE, 
Metadata.BITS_PER_SAMPLE);
             // Straightforward
-            set(directory, metadata, ExifDirectory.TAG_SAMPLES_PER_PIXEL, 
Metadata.SAMPLES_PER_PIXEL);
+            set(directory, metadata, 
ExifSubIFDDirectory.TAG_SAMPLES_PER_PIXEL, Metadata.SAMPLES_PER_PIXEL);
         }
         private void set(Directory directory, Metadata metadata, int 
extractTag, Property metadataField) {
             if (directory.containsTag(extractTag)) {
@@ -286,7 +247,8 @@ public class ImageMetadataExtractor {
     static class ExifHandler implements DirectoryHandler {
         private static final SimpleDateFormat DATE_UNSPECIFIED_TZ = new 
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
         public boolean supports(Class<? extends Directory> directoryType) {
-            return directoryType == ExifDirectory.class;
+            return directoryType == ExifIFD0Directory.class || 
+                    directoryType == ExifSubIFDDirectory.class;
         }
         public void handle(Directory directory, Metadata metadata) {
             try {
@@ -303,8 +265,9 @@ public class ImageMetadataExtractor {
          */
         public void handleCommentTags(Directory directory, Metadata metadata) {
             if (metadata.get(TikaCoreProperties.DESCRIPTION) == null &&
-                    
directory.containsTag(ExifDirectory.TAG_IMAGE_DESCRIPTION)) {
-                metadata.set(TikaCoreProperties.DESCRIPTION, 
directory.getString(ExifDirectory.TAG_IMAGE_DESCRIPTION));
+                    
directory.containsTag(ExifIFD0Directory.TAG_IMAGE_DESCRIPTION)) {
+                metadata.set(TikaCoreProperties.DESCRIPTION, 
+                        
directory.getString(ExifIFD0Directory.TAG_IMAGE_DESCRIPTION));
             }
         }
         /**
@@ -312,22 +275,17 @@ public class ImageMetadataExtractor {
          *  TIFF image metadata namespace.
          */       
         public void handlePhotoTags(Directory directory, Metadata metadata) {
-            if(directory.containsTag(ExifDirectory.TAG_EXPOSURE_TIME)) {
-               Object exposure = 
directory.getObject(ExifDirectory.TAG_EXPOSURE_TIME);
+            if(directory.containsTag(ExifSubIFDDirectory.TAG_EXPOSURE_TIME)) {
+               Object exposure = 
directory.getObject(ExifSubIFDDirectory.TAG_EXPOSURE_TIME);
                if(exposure instanceof Rational) {
                   metadata.set(Metadata.EXPOSURE_TIME, 
((Rational)exposure).doubleValue());
                } else {
-                  metadata.set(Metadata.EXPOSURE_TIME, 
directory.getString(ExifDirectory.TAG_EXPOSURE_TIME));
+                  metadata.set(Metadata.EXPOSURE_TIME, 
directory.getString(ExifSubIFDDirectory.TAG_EXPOSURE_TIME));
                }
             }
             
-            if(directory.containsTag(ExifDirectory.TAG_FLASH)) {
-               String flash = "";
-               try {
-                  flash = directory.getDescription(ExifDirectory.TAG_FLASH);
-               } catch (MetadataException e) {
-                  // ignore
-               }
+            if(directory.containsTag(ExifSubIFDDirectory.TAG_FLASH)) {
+               String flash = 
directory.getDescription(ExifSubIFDDirectory.TAG_FLASH);
                if(flash.indexOf("Flash fired") > -1) {
                   metadata.set(Metadata.FLASH_FIRED, Boolean.TRUE.toString());
                }
@@ -339,70 +297,72 @@ public class ImageMetadataExtractor {
                }
             }
 
-            if(directory.containsTag(ExifDirectory.TAG_FNUMBER)) {
-               Object fnumber = directory.getObject(ExifDirectory.TAG_FNUMBER);
+            if(directory.containsTag(ExifSubIFDDirectory.TAG_FNUMBER)) {
+               Object fnumber = 
directory.getObject(ExifSubIFDDirectory.TAG_FNUMBER);
                if(fnumber instanceof Rational) {
                   metadata.set(Metadata.F_NUMBER, 
((Rational)fnumber).doubleValue());
                } else {
-                  metadata.set(Metadata.F_NUMBER, 
directory.getString(ExifDirectory.TAG_FNUMBER));
+                  metadata.set(Metadata.F_NUMBER, 
directory.getString(ExifSubIFDDirectory.TAG_FNUMBER));
                }
             }
             
-            if(directory.containsTag(ExifDirectory.TAG_FOCAL_LENGTH)) {
-               Object length = 
directory.getObject(ExifDirectory.TAG_FOCAL_LENGTH);
+            if(directory.containsTag(ExifSubIFDDirectory.TAG_FOCAL_LENGTH)) {
+               Object length = 
directory.getObject(ExifSubIFDDirectory.TAG_FOCAL_LENGTH);
                if(length instanceof Rational) {
                   metadata.set(Metadata.FOCAL_LENGTH, 
((Rational)length).doubleValue());
                } else {
-                  metadata.set(Metadata.FOCAL_LENGTH, 
directory.getString(ExifDirectory.TAG_FOCAL_LENGTH));
+                  metadata.set(Metadata.FOCAL_LENGTH, 
directory.getString(ExifSubIFDDirectory.TAG_FOCAL_LENGTH));
                }
             }
             
-            if(directory.containsTag(ExifDirectory.TAG_ISO_EQUIVALENT)) {
-               metadata.set(Metadata.ISO_SPEED_RATINGS, 
directory.getString(ExifDirectory.TAG_ISO_EQUIVALENT));
+            if(directory.containsTag(ExifSubIFDDirectory.TAG_ISO_EQUIVALENT)) {
+               metadata.set(Metadata.ISO_SPEED_RATINGS, 
directory.getString(ExifSubIFDDirectory.TAG_ISO_EQUIVALENT));
             }
           
-            if(directory.containsTag(ExifDirectory.TAG_MAKE)) {
-               metadata.set(Metadata.EQUIPMENT_MAKE, 
directory.getString(ExifDirectory.TAG_MAKE));
+            if(directory.containsTag(ExifIFD0Directory.TAG_MAKE)) {
+               metadata.set(Metadata.EQUIPMENT_MAKE, 
directory.getString(ExifIFD0Directory.TAG_MAKE));
             }
-            if(directory.containsTag(ExifDirectory.TAG_MODEL)) {
-               metadata.set(Metadata.EQUIPMENT_MODEL, 
directory.getString(ExifDirectory.TAG_MODEL));
+            if(directory.containsTag(ExifIFD0Directory.TAG_MODEL)) {
+               metadata.set(Metadata.EQUIPMENT_MODEL, 
directory.getString(ExifIFD0Directory.TAG_MODEL));
             }
           
-            if(directory.containsTag(ExifDirectory.TAG_ORIENTATION)) {
-               Object length = 
directory.getObject(ExifDirectory.TAG_ORIENTATION);
+            if(directory.containsTag(ExifIFD0Directory.TAG_ORIENTATION)) {
+               Object length = 
directory.getObject(ExifIFD0Directory.TAG_ORIENTATION);
                if(length instanceof Integer) {
                   metadata.set(Metadata.ORIENTATION, Integer.toString( 
((Integer)length).intValue() ));
                } else {
-                  metadata.set(Metadata.ORIENTATION, 
directory.getString(ExifDirectory.TAG_ORIENTATION));
+                  metadata.set(Metadata.ORIENTATION, 
directory.getString(ExifIFD0Directory.TAG_ORIENTATION));
                }
             }
             
-            if(directory.containsTag(ExifDirectory.TAG_SOFTWARE)) {
-               metadata.set(Metadata.SOFTWARE, 
directory.getString(ExifDirectory.TAG_SOFTWARE));
+            if(directory.containsTag(ExifIFD0Directory.TAG_SOFTWARE)) {
+               metadata.set(Metadata.SOFTWARE, 
directory.getString(ExifIFD0Directory.TAG_SOFTWARE));
             }
             
-            if(directory.containsTag(ExifDirectory.TAG_X_RESOLUTION)) {
-               Object resolution = 
directory.getObject(ExifDirectory.TAG_X_RESOLUTION);
+            if(directory.containsTag(ExifIFD0Directory.TAG_X_RESOLUTION)) {
+               Object resolution = 
directory.getObject(ExifIFD0Directory.TAG_X_RESOLUTION);
                if(resolution instanceof Rational) {
                   metadata.set(Metadata.RESOLUTION_HORIZONTAL, 
((Rational)resolution).doubleValue());
                } else {
-                  metadata.set(Metadata.RESOLUTION_HORIZONTAL, 
directory.getString(ExifDirectory.TAG_X_RESOLUTION));
+                  metadata.set(Metadata.RESOLUTION_HORIZONTAL, 
directory.getString(ExifIFD0Directory.TAG_X_RESOLUTION));
                }
             }
-            if(directory.containsTag(ExifDirectory.TAG_Y_RESOLUTION)) {
-               Object resolution = 
directory.getObject(ExifDirectory.TAG_Y_RESOLUTION);
+            if(directory.containsTag(ExifIFD0Directory.TAG_Y_RESOLUTION)) {
+               Object resolution = 
directory.getObject(ExifIFD0Directory.TAG_Y_RESOLUTION);
                if(resolution instanceof Rational) {
                   metadata.set(Metadata.RESOLUTION_VERTICAL, 
((Rational)resolution).doubleValue());
                } else {
-                  metadata.set(Metadata.RESOLUTION_VERTICAL, 
directory.getString(ExifDirectory.TAG_Y_RESOLUTION));
+                  metadata.set(Metadata.RESOLUTION_VERTICAL, 
directory.getString(ExifIFD0Directory.TAG_Y_RESOLUTION));
                }
             }
-            if(directory.containsTag(ExifDirectory.TAG_RESOLUTION_UNIT)) {
-               try {
-                  metadata.set(Metadata.RESOLUTION_UNIT, 
directory.getDescription(ExifDirectory.TAG_RESOLUTION_UNIT));
-               } catch (MetadataException e) {
-                  // ignore
-               }
+            if(directory.containsTag(ExifIFD0Directory.TAG_RESOLUTION_UNIT)) {
+               metadata.set(Metadata.RESOLUTION_UNIT, 
directory.getDescription(ExifIFD0Directory.TAG_RESOLUTION_UNIT));
+            }
+            
if(directory.containsTag(ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_WIDTH)) {
+                metadata.set(Metadata.IMAGE_WIDTH, 
directory.getDescription(ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_WIDTH));
+            }
+            
if(directory.containsTag(ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_HEIGHT)) {
+                metadata.set(Metadata.IMAGE_LENGTH, 
directory.getDescription(ExifThumbnailDirectory.TAG_THUMBNAIL_IMAGE_HEIGHT));
             }
         }
         /**
@@ -412,20 +372,22 @@ public class ImageMetadataExtractor {
                 throws MetadataException {
             // Date/Time Original overrides value from 
ExifDirectory.TAG_DATETIME
             Date original = null;
-            if (directory.containsTag(ExifDirectory.TAG_DATETIME_ORIGINAL)) {
-                original = 
directory.getDate(ExifDirectory.TAG_DATETIME_ORIGINAL);
+            if 
(directory.containsTag(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)) {
+                original = 
directory.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL);
                 // Unless we have GPS time we don't know the time zone so date 
must be set
                 // as ISO 8601 datetime without timezone suffix (no Z or +/-)
-                String datetimeNoTimeZone = 
DATE_UNSPECIFIED_TZ.format(original); // Same time zone as Metadata Extractor 
uses
-                metadata.set(TikaCoreProperties.CREATED, datetimeNoTimeZone);
-                metadata.set(Metadata.ORIGINAL_DATE, datetimeNoTimeZone);
+                if (original != null) {
+                    String datetimeNoTimeZone = 
DATE_UNSPECIFIED_TZ.format(original); // Same time zone as Metadata Extractor 
uses
+                    metadata.set(TikaCoreProperties.CREATED, 
datetimeNoTimeZone);
+                    metadata.set(Metadata.ORIGINAL_DATE, datetimeNoTimeZone);
+                }
             }
-            if (directory.containsTag(ExifDirectory.TAG_DATETIME)) {
-                Date datetime = directory.getDate(ExifDirectory.TAG_DATETIME);
+            if (directory.containsTag(ExifIFD0Directory.TAG_DATETIME)) {
+                Date datetime = 
directory.getDate(ExifIFD0Directory.TAG_DATETIME);
                 String datetimeNoTimeZone = 
DATE_UNSPECIFIED_TZ.format(datetime);
                 metadata.set(TikaCoreProperties.MODIFIED, datetimeNoTimeZone);
                 // If Date/Time Original does not exist this might be creation 
date
-                if (original == null) {
+                if (metadata.get(TikaCoreProperties.CREATED) == null) {
                     metadata.set(TikaCoreProperties.CREATED, 
datetimeNoTimeZone);
                 }
             }
@@ -474,52 +436,12 @@ public class ImageMetadataExtractor {
             return directoryType == GpsDirectory.class;
         }
         public void handle(Directory directory, Metadata metadata) throws 
MetadataException {
-            String lat = 
directory.getDescription(GpsDirectory.TAG_GPS_LATITUDE);
-            String latNS = 
directory.getDescription(GpsDirectory.TAG_GPS_LATITUDE_REF);
-            if(lat != null) {
-                Double latitude = parseHMS(lat);
-                if(latitude != null) {
-                    if(latNS != null && latNS.equalsIgnoreCase("S") &&
-                            latitude > 0) {
-                        latitude *= -1;
-                    }
-                    metadata.set(TikaCoreProperties.LATITUDE, 
LAT_LONG_FORMAT.format(latitude)); 
-                }
-            }
-
-            String lng = 
directory.getDescription(GpsDirectory.TAG_GPS_LONGITUDE);
-            String lngEW = 
directory.getDescription(GpsDirectory.TAG_GPS_LONGITUDE_REF);
-            if(lng != null) {
-                Double longitude = parseHMS(lng);
-                if(longitude != null) {
-                    if(lngEW != null && lngEW.equalsIgnoreCase("W") &&
-                            longitude > 0) {
-                        longitude *= -1;
-                    }
-                    metadata.set(TikaCoreProperties.LONGITUDE, 
LAT_LONG_FORMAT.format(longitude));
-                }
+            GeoLocation geoLocation = ((GpsDirectory) 
directory).getGeoLocation();
+            if (geoLocation != null) {
+                metadata.set(TikaCoreProperties.LATITUDE, 
geoLocation.getLatitude());
+                metadata.set(TikaCoreProperties.LONGITUDE, 
geoLocation.getLongitude());
             }
         }
-        private Double parseHMS(String hms) {
-           Matcher m = HOURS_MINUTES_SECONDS.matcher(hms);
-           if(m.matches()) {
-              double value = 
-                Integer.parseInt(m.group(1)) +
-                (Integer.parseInt(m.group(2))/60.0) +
-                (Double.parseDouble(m.group(3))/60.0/60.0);
-              return value;
-           }
-           return null;
-        }
-        private static final Pattern HOURS_MINUTES_SECONDS = 
Pattern.compile("(-?\\d+)\"(\\d+)'(\\d+\\.?\\d*)");
-        /**
-         * The decimal format used for expressing latitudes and longitudes.
-         * The basic geo vocabulary defined by W3C (@see {@link Geographic})
-         * refers to the "float" type in XML Schema as the recommended format
-         * for latitude and longitude values.
-         */
-        private static final DecimalFormat LAT_LONG_FORMAT =
-            new DecimalFormat("##0.0####", new 
DecimalFormatSymbols(Locale.US));
     }
 
 }

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java?rev=1366967&r1=1366966&r2=1366967&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java
 Sun Jul 29 23:18:10 2012
@@ -16,13 +16,14 @@
  */
 package org.apache.tika.parser.image;
 
-import java.io.FilterInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Collections;
 import java.util.Set;
 
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AbstractParser;
@@ -48,18 +49,14 @@ public class TiffParser extends Abstract
             InputStream stream, ContentHandler handler,
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
-
-        // read stream twice - exif and xmp extractors
-        stream.mark(Integer.MAX_VALUE);
-        FilterInputStream first = new FilterInputStream(stream) {
-            @Override
-            public void close() throws IOException {
-            }
-        };
-        new ImageMetadataExtractor(metadata).parseTiff(first);
-        stream.reset();
-        
-        new JempboxExtractor(metadata).parse(stream);
+        TemporaryResources tmp = new TemporaryResources();
+        try {
+            TikaInputStream tis = TikaInputStream.get(stream, tmp);
+            new ImageMetadataExtractor(metadata).parseTiff(tis.getFile());
+            new JempboxExtractor(metadata).parse(tis);
+        } finally {
+            tmp.dispose();
+        }
 
         XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
         xhtml.startDocument();

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java?rev=1366967&r1=1366966&r2=1366967&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
 Sun Jul 29 23:18:10 2012
@@ -19,6 +19,7 @@ package org.apache.tika.parser.image;
 import java.util.Arrays;
 import java.util.GregorianCalendar;
 import java.util.Iterator;
+import java.util.List;
 
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
@@ -26,7 +27,8 @@ import org.apache.tika.metadata.TikaCore
 import com.drew.metadata.Directory;
 import com.drew.metadata.MetadataException;
 import com.drew.metadata.Tag;
-import com.drew.metadata.exif.ExifDirectory;
+import com.drew.metadata.exif.ExifIFD0Directory;
+import com.drew.metadata.exif.ExifSubIFDDirectory;
 import com.drew.metadata.jpeg.JpegCommentDirectory;
 
 import junit.framework.TestCase;
@@ -53,15 +55,16 @@ public class ImageMetadataExtractorTest 
     }
     
     public void testExifHandlerSupports() {
-        assertTrue(new 
ImageMetadataExtractor.ExifHandler().supports(ExifDirectory.class));
+        assertTrue(new 
ImageMetadataExtractor.ExifHandler().supports(ExifIFD0Directory.class));
+        assertTrue(new 
ImageMetadataExtractor.ExifHandler().supports(ExifSubIFDDirectory.class));
         assertFalse(new 
ImageMetadataExtractor.ExifHandler().supports(Directory.class));
         assertFalse(new 
ImageMetadataExtractor.ExifHandler().supports(JpegCommentDirectory.class));
     }
     
     public void testExifHandlerParseDate() throws MetadataException {
-        ExifDirectory exif = mock(ExifDirectory.class);
-        
when(exif.containsTag(ExifDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(true);
-        when(exif.getDate(ExifDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(
+        ExifSubIFDDirectory exif = mock(ExifSubIFDDirectory.class);
+        
when(exif.containsTag(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(true);
+        
when(exif.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(
                 new GregorianCalendar(2000, 0, 1, 0, 0, 0).getTime()); // jvm 
default timezone as in Metadata Extractor
         Metadata metadata = new Metadata();
         
@@ -71,9 +74,9 @@ public class ImageMetadataExtractorTest 
     }
 
     public void testExifHandlerParseDateFallback() throws MetadataException {
-        ExifDirectory exif = mock(ExifDirectory.class);
-        when(exif.containsTag(ExifDirectory.TAG_DATETIME)).thenReturn(true);
-        when(exif.getDate(ExifDirectory.TAG_DATETIME)).thenReturn(
+        ExifIFD0Directory exif = mock(ExifIFD0Directory.class);
+        
when(exif.containsTag(ExifIFD0Directory.TAG_DATETIME)).thenReturn(true);
+        when(exif.getDate(ExifIFD0Directory.TAG_DATETIME)).thenReturn(
                 new GregorianCalendar(1999, 0, 1, 0, 0, 0).getTime()); // jvm 
default timezone as in Metadata Extractor
         Metadata metadata = new Metadata();
         
@@ -83,10 +86,9 @@ public class ImageMetadataExtractorTest 
     }
     
     public void testExifHandlerParseDateError() throws MetadataException {
-        ExifDirectory exif = mock(ExifDirectory.class);
-        
when(exif.containsTag(ExifDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(true);
-        when(exif.getDate(ExifDirectory.TAG_DATETIME_ORIGINAL)).thenThrow(
-                new MetadataException("Tag 'X' cannot be cast to a 
java.util.Date."));
+        ExifIFD0Directory exif = mock(ExifIFD0Directory.class);
+        
when(exif.containsTag(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(true);
+        
when(exif.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(null);
         Metadata metadata = new Metadata();
         
         new ImageMetadataExtractor.ExifHandler().handle(exif, metadata);
@@ -105,8 +107,8 @@ public class ImageMetadataExtractorTest 
         Tag t3 = mock(Tag.class);
         
when(t3.getTagName()).thenReturn(TikaCoreProperties.DESCRIPTION.getName());
         when(t3.getDescription()).thenReturn("known");
-        Iterator<Tag> tags = Arrays.asList(t1, t2, t3).iterator();
-        when(d.getTagIterator()).thenReturn(tags);
+        List<Tag> tags = Arrays.asList(t1, t2, t3);
+        when(d.getTags()).thenReturn(tags);
         Metadata metadata = new Metadata();
         new ImageMetadataExtractor.CopyUnknownFieldsHandler().handle(d, 
metadata);
         assertEquals("t1", metadata.get("Image Description"));

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java?rev=1366967&r1=1366966&r2=1366967&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
 Sun Jul 29 23:18:10 2012
@@ -17,6 +17,7 @@
 package org.apache.tika.parser.jpeg;
 
 import java.io.InputStream;
+import java.text.DecimalFormat;
 import java.util.Arrays;
 import java.util.List;
 
@@ -31,6 +32,8 @@ import org.xml.sax.helpers.DefaultHandle
 
 public class JpegParserTest extends TestCase {
     private final Parser parser = new JpegParser();
+    
+    private DecimalFormat geoDecimalFormatter = new DecimalFormat("#.#####");
 
     public void testJPEG() throws Exception {
         Metadata metadata = new Metadata();
@@ -90,8 +93,8 @@ public class JpegParserTest extends Test
         parser.parse(stream, new DefaultHandler(), metadata, new 
ParseContext());
         
         // Geo tags
-        assertEquals("12.54321", metadata.get(Metadata.LATITUDE));
-        assertEquals("-54.1234", metadata.get(Metadata.LONGITUDE));
+        assertEquals("12.54321", geoDecimalFormatter.format(new 
Double(metadata.get(Metadata.LATITUDE))));
+        assertEquals("-54.1234", geoDecimalFormatter.format(new 
Double(metadata.get(Metadata.LONGITUDE))));
         
         // Core EXIF/TIFF tags
         assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
@@ -128,7 +131,7 @@ public class JpegParserTest extends Test
      *  different way, see TIKA-915 for details
      * Disabled for now, pending a fix to the underlying library
      */
-    public void DISABLEDtestJPEGGeo2() throws Exception {
+    public void testJPEGGeo2() throws Exception {
        Metadata metadata = new Metadata();
        metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
        InputStream stream =
@@ -136,8 +139,8 @@ public class JpegParserTest extends Test
        parser.parse(stream, new DefaultHandler(), metadata, new 
ParseContext());
 
        // Geo tags should be there with 5dp, and not rounded
-       assertEquals("51.57576", metadata.get(Metadata.LATITUDE));
-       assertEquals("-1.56788", metadata.get(Metadata.LONGITUDE));
+       assertEquals("51.57576", geoDecimalFormatter.format(new 
Double(metadata.get(Metadata.LATITUDE))));
+       assertEquals("-1.56789", geoDecimalFormatter.format(new 
Double(metadata.get(Metadata.LONGITUDE))));
     }
     
     public void testJPEGTitleAndDescription() throws Exception {


Reply via email to