Repository: tika Updated Branches: refs/heads/master 404d42037 -> 30e4e614e
TIKA-1921 -- note: need to set default timezone to UTC both programmatically and in surefire plugin at least with Java 8. Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/30e4e614 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/30e4e614 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/30e4e614 Branch: refs/heads/master Commit: 30e4e614ef7342ae114944264d7b487803e7e1a9 Parents: 404d420 Author: tballison <[email protected]> Authored: Thu Mar 31 14:53:20 2016 -0400 Committer: tballison <[email protected]> Committed: Thu Mar 31 14:53:20 2016 -0400 ---------------------------------------------------------------------- tika-parent/pom.xml | 2 +- tika-parsers/pom.xml | 2 +- .../parser/image/ImageMetadataExtractor.java | 2 ++ .../apache/tika/parser/jpeg/JpegParserTest.java | 21 +++++++++++++++++++- 4 files changed, 24 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/30e4e614/tika-parent/pom.xml ---------------------------------------------------------------------- diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml index b0cb21a..6f789d8 100644 --- a/tika-parent/pom.xml +++ b/tika-parent/pom.xml @@ -350,7 +350,7 @@ <artifactId>maven-surefire-plugin</artifactId> <version>2.18.1</version> <configuration> - <argLine>-Xmx2048m</argLine> + <argLine>-Xmx2048m -Duser.timezone=UTC</argLine> </configuration> </plugin> <plugin> http://git-wip-us.apache.org/repos/asf/tika/blob/30e4e614/tika-parsers/pom.xml ---------------------------------------------------------------------- diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml index bd0a734..e9abe2f 100644 --- a/tika-parsers/pom.xml +++ b/tika-parsers/pom.xml @@ -200,7 +200,7 @@ <dependency> <groupId>com.drewnoakes</groupId> <artifactId>metadata-extractor</artifactId> - <version>2.8.0</version> + <version>2.8.1</version> </dependency> <dependency> <groupId>de.l3s.boilerpipe</groupId> http://git-wip-us.apache.org/repos/asf/tika/blob/30e4e614/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java index c73fe85..3c3ec45 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java @@ -467,6 +467,8 @@ public class ImageMetadataExtractor { */ public void handleDateTags(Directory directory, Metadata metadata) throws MetadataException { + //TODO: should we try to process ExifSubIFDDirectory.TAG_TIME_ZONE_OFFSET + //if it exists? // Date/Time Original overrides value from ExifDirectory.TAG_DATETIME Date original = null; if (directory.containsTag(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)) { http://git-wip-us.apache.org/repos/asf/tika/blob/30e4e614/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java index 6c90680..d32dfc4 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java @@ -23,6 +23,7 @@ import static org.junit.Assert.assertTrue; import java.io.InputStream; import java.util.Arrays; import java.util.List; +import java.util.TimeZone; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TIFF; @@ -30,13 +31,31 @@ import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.metadata.XMPMM; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; +import org.junit.AfterClass; +import org.junit.BeforeClass; import org.junit.Test; import org.xml.sax.helpers.DefaultHandler; public class JpegParserTest { private final Parser parser = new JpegParser(); + static TimeZone CURR_TIME_ZONE = TimeZone.getDefault(); + + //As of Drew Noakes' metadata-extractor 2.8.1, + //unspecified timezones appear to be set to + //TimeZone.getDefault(). We need to normalize this + //for testing across different time zones. + //We also appear to have to specify it in the surefire config: + //<argLine>-Duser.timezone=UTC</argLine> + @BeforeClass + public static void setDefaultTimeZone() { + TimeZone.setDefault(TimeZone.getTimeZone("UTC")); + } + @AfterClass + public static void resetDefaultTimeZone() { + TimeZone.setDefault(CURR_TIME_ZONE); + } @Test public void testJPEG() throws Exception { Metadata metadata = new Metadata(); @@ -70,7 +89,7 @@ public class JpegParserTest { assertEquals("Canon EOS 40D", metadata.get("Model")); // Common tags - //assertEquals("2009-10-02T23:02:49", metadata.get(Metadata.LAST_MODIFIED)); + assertEquals("2009-10-02T23:02:49", metadata.get(Metadata.LAST_MODIFIED)); assertEquals("Date/Time Original for when the photo was taken, unspecified time zone", "2009-08-11T09:09:45", metadata.get(TikaCoreProperties.CREATED)); List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS));
