This is an automated email from the ASF dual-hosted git repository. lfcnassif pushed a commit to branch TIKA-3815 in repository https://gitbox.apache.org/repos/asf/tika.git
commit e3c1a0c11c2cd693a3599c7f71ae262d9d7795f5 Author: Luis Nassif <[email protected]> AuthorDate: Mon Jul 11 23:29:29 2022 -0300 TIKA-3815: set GMT timezone for unspecified timezones like drew noakes, fix tests to work on different timezones --- .../java/org/apache/tika/metadata/TestMetadata.java | 2 +- tika-parent/pom.xml | 2 +- .../tika/parser/image/ImageMetadataExtractor.java | 12 ++++++++++-- .../tika/parser/image/ImageMetadataExtractorTest.java | 8 ++++---- .../org/apache/tika/parser/image/JpegParserTest.java | 18 ------------------ 5 files changed, 16 insertions(+), 26 deletions(-) diff --git a/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java b/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java index dad712681..13de1ae54 100644 --- a/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java +++ b/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java @@ -499,8 +499,8 @@ public class TestMetadata { start += random.nextInt(1000000); Date now = new Date(start); DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US); - m.set(TikaCoreProperties.CREATED, df.format(now)); df.setTimeZone(TimeZone.getTimeZone("UTC")); + m.set(TikaCoreProperties.CREATED, df.format(now)); assertTrue( Math.abs(now.getTime() - m.getDate(TikaCoreProperties.CREATED).getTime()) < 2000); diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml index 325a57dba..3f2d409b3 100644 --- a/tika-parent/pom.xml +++ b/tika-parent/pom.xml @@ -885,7 +885,7 @@ <version>${maven.surefire.version} </version> <!-- versions greater than this don't like System.exit calls in tika-batch --> <configuration> - <argLine>-Xmx3072m -Duser.timezone=UTC -Djava.awt.headless=true</argLine> + <argLine>-Xmx3072m -Djava.awt.headless=true</argLine> </configuration> </plugin> <plugin> diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java index 28f92e487..3870ae939 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java @@ -26,6 +26,7 @@ import java.text.SimpleDateFormat; import java.util.Date; import java.util.Iterator; import java.util.Locale; +import java.util.TimeZone; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -377,8 +378,15 @@ public class ImageMetadataExtractor { static class ExifHandler implements DirectoryHandler { // There's a new ExifHandler for each file processed, so this is thread safe - private final SimpleDateFormat dateUnspecifiedTz = - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US); + private final SimpleDateFormat dateUnspecifiedTz = getUnspecifiedTzDateFormat(); + + private SimpleDateFormat getUnspecifiedTzDateFormat() { + SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US); + // As of Drew Noakes' metadata-extractor 2.8.1, unspecified + // timezones are set to TimeZone.getTimeZone("GMT") + df.setTimeZone(TimeZone.getTimeZone("GMT")); + return df; + } public boolean supports(Class<? extends Directory> directoryType) { diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java index 4a32c6ce8..ab8f0c805 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java @@ -73,11 +73,11 @@ public class ImageMetadataExtractorTest { public void testExifHandlerParseDate() throws MetadataException { ExifSubIFDDirectory exif = Mockito.mock(ExifSubIFDDirectory.class); Mockito.when(exif.containsTag(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(true); - GregorianCalendar calendar = new GregorianCalendar(TimeZone.getDefault(), Locale.ROOT); + GregorianCalendar calendar = new GregorianCalendar(TimeZone.getTimeZone("UTC"), Locale.ROOT); calendar.setTimeInMillis(0); calendar.set(2000, 0, 1, 0, 0, 0); Mockito.when(exif.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)) - .thenReturn(calendar.getTime()); // jvm default timezone as in Metadata Extractor + .thenReturn(calendar.getTime()); // UTC timezone as in Metadata Extractor Metadata metadata = new Metadata(); new ImageMetadataExtractor.ExifHandler().handle(exif, metadata); @@ -89,11 +89,11 @@ public class ImageMetadataExtractorTest { public void testExifHandlerParseDateFallback() throws MetadataException { ExifIFD0Directory exif = Mockito.mock(ExifIFD0Directory.class); Mockito.when(exif.containsTag(ExifIFD0Directory.TAG_DATETIME)).thenReturn(true); - GregorianCalendar calendar = new GregorianCalendar(TimeZone.getDefault(), Locale.ROOT); + GregorianCalendar calendar = new GregorianCalendar(TimeZone.getTimeZone("UTC"), Locale.ROOT); calendar.setTimeInMillis(0); calendar.set(1999, 0, 1, 0, 0, 0); Mockito.when(exif.getDate(ExifIFD0Directory.TAG_DATETIME)) - .thenReturn(calendar.getTime()); // jvm default timezone as in Metadata Extractor + .thenReturn(calendar.getTime()); // UTC timezone as in Metadata Extractor Metadata metadata = new Metadata(); new ImageMetadataExtractor.ExifHandler().handle(exif, metadata); diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JpegParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JpegParserTest.java index 69051aa4b..d6fbd60a3 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JpegParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JpegParserTest.java @@ -26,8 +26,6 @@ import java.util.Arrays; import java.util.List; import java.util.TimeZone; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.xml.sax.helpers.DefaultHandler; @@ -43,22 +41,6 @@ public class JpegParserTest { static TimeZone CURR_TIME_ZONE = TimeZone.getDefault(); private final Parser parser = new JpegParser(); - //As of Drew Noakes' metadata-extractor 2.8.1, - //unspecified timezones appear to be set to - //TimeZone.getDefault(). We need to normalize this - //for testing across different time zones. - //We also appear to have to specify it in the surefire config: - //<argLine>-Duser.timezone=UTC</argLine> - @BeforeAll - public static void setDefaultTimeZone() { - TimeZone.setDefault(TimeZone.getTimeZone("UTC")); - } - - @AfterAll - public static void resetDefaultTimeZone() { - TimeZone.setDefault(CURR_TIME_ZONE); - } - @Test public void testJPEG() throws Exception { Metadata metadata = new Metadata();
