[ https://issues.apache.org/jira/browse/PARQUET-1436?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16639525#comment-16639525 ]
ASF GitHub Bot commented on PARQUET-1436: ----------------------------------------- zivanfi closed pull request #529: PARQUET-1436: TimestampMicrosStringifier shows wrong microseconds for timestamps before 1970 URL: https://github.com/apache/parquet-mr/pull/529 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java index 3c3417e0d..03786ed73 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java @@ -29,8 +29,9 @@ import java.math.BigInteger; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.text.SimpleDateFormat; -import java.util.TimeZone; +import java.time.Instant; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; import java.util.concurrent.TimeUnit; import javax.naming.OperationNotSupportedException; @@ -243,80 +244,68 @@ String stringifyNotNull(Binary value) { }; private static class DateStringifier extends PrimitiveStringifier { - private final SimpleDateFormat formatter; - private static final TimeZone UTC = TimeZone.getTimeZone("utc"); + private final DateTimeFormatter formatter; private DateStringifier(String name, String format) { super(name); - formatter = new SimpleDateFormat(format); - formatter.setTimeZone(UTC); + formatter = DateTimeFormatter.ofPattern(format).withZone(ZoneOffset.UTC); } @Override public String stringify(int value) { - return toFormattedString(toMillis(value)); + return toFormattedString(getInstant(value)); } @Override public String stringify(long value) { - return toFormattedString(toMillis(value)); + return toFormattedString(getInstant(value)); } - private String toFormattedString(long millis) { - return formatter.format(millis); + private String toFormattedString(Instant instant) { + return formatter.format(instant); } - long toMillis(int value) { + Instant getInstant(int value) { // throw the related unsupported exception super.stringify(value); - return 0; + return null; } - long toMillis(long value) { + Instant getInstant(long value) { // throw the related unsupported exception super.stringify(value); - return 0; + return null; } } static final PrimitiveStringifier DATE_STRINGIFIER = new DateStringifier("DATE_STRINGIFIER", "yyyy-MM-dd") { @Override - long toMillis(int value) { - return TimeUnit.DAYS.toMillis(value); + Instant getInstant(int value) { + return Instant.ofEpochMilli(TimeUnit.DAYS.toMillis(value)); }; }; static final PrimitiveStringifier TIMESTAMP_MILLIS_STRINGIFIER = new DateStringifier( "TIMESTAMP_MILLIS_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSS") { @Override - long toMillis(long value) { - return value; + Instant getInstant(long value) { + return Instant.ofEpochMilli(value); } }; static final PrimitiveStringifier TIMESTAMP_MICROS_STRINGIFIER = new DateStringifier( - "TIMESTAMP_MICROS_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSS") { + "TIMESTAMP_MICROS_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSSSSS") { @Override - public String stringify(long value) { - return super.stringify(value) + String.format("%03d", Math.abs(value % 1000)); - } - - @Override - long toMillis(long value) { - return value / 1000; + Instant getInstant(long value) { + return Instant.ofEpochSecond(MICROSECONDS.toSeconds(value), MICROSECONDS.toNanos(value % SECONDS.toMicros(1))); } }; static final PrimitiveStringifier TIMESTAMP_NANOS_STRINGIFIER = new DateStringifier( - "TIMESTAMP_NANOS_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSS") { - @Override - public String stringify(long value) { - return super.stringify(value) + String.format("%06d", Math.abs(value % 1_000_000)); - } - + "TIMESTAMP_NANOS_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS") { @Override - long toMillis(long value) { - return value / 1_000_000; + Instant getInstant(long value) { + return Instant.ofEpochSecond(NANOSECONDS.toSeconds(value), NANOSECONDS.toNanos(value % SECONDS.toNanos(1))); } }; diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java index b4e706296..afc8684cf 100644 --- a/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java @@ -154,8 +154,8 @@ public void testDateStringifier() { assertEquals("2017-12-14", stringifier.stringify((int) MILLISECONDS.toDays(cal.getTimeInMillis()))); cal.clear(); - cal.set(1492, Calendar.AUGUST, 3); - assertEquals("1492-08-03", stringifier.stringify((int) MILLISECONDS.toDays(cal.getTimeInMillis()))); + cal.set(1583, Calendar.AUGUST, 3); + assertEquals("1583-08-03", stringifier.stringify((int) MILLISECONDS.toDays(cal.getTimeInMillis()))); checkThrowingUnsupportedException(stringifier, Integer.TYPE); } @@ -197,7 +197,7 @@ public void testTimestampMicrosStringifier() { cal.set(1848, Calendar.MARCH, 15, 9, 23, 59); cal.set(Calendar.MILLISECOND, 765); micros = cal.getTimeInMillis() * 1000 - 1; - assertEquals("1848-03-15T09:23:59.765001", stringifier.stringify(micros)); + assertEquals("1848-03-15T09:23:59.764999", stringifier.stringify(micros)); checkThrowingUnsupportedException(stringifier, Long.TYPE); } @@ -219,7 +219,7 @@ public void testTimestampNanosStringifier() { cal.set(1848, Calendar.MARCH, 15, 9, 23, 59); cal.set(Calendar.MILLISECOND, 765); nanos = cal.getTimeInMillis() * 1_000_000 - 1; - assertEquals("1848-03-15T09:23:59.765000001", stringifier.stringify(nanos)); + assertEquals("1848-03-15T09:23:59.764999999", stringifier.stringify(nanos)); checkThrowingUnsupportedException(stringifier, Long.TYPE); } ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > TimestampMicrosStringifier shows wrong microseconds for timestamps before 1970 > ------------------------------------------------------------------------------ > > Key: PARQUET-1436 > URL: https://issues.apache.org/jira/browse/PARQUET-1436 > Project: Parquet > Issue Type: Task > Components: parquet-mr > Reporter: Zoltan Ivanfi > Priority: Major > Labels: pull-request-available > Fix For: 1.11.0 > > > testTimestampMicrosStringifier takes the timestamp 1848-03-15T09:23:59.765 > and subtracts 1 microseconds from it. The result (both expected and actual) > is 1848-03-15T09:23:59.765001, but it should be 1848-03-15T09:23:59.764999 > instead. -- This message was sent by Atlassian JIRA (v7.6.3#76005)