This is an automated email from the ASF dual-hosted git repository.

pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new eb3bfe9e310 HIVE-26233: Problems reading back PARQUET timestamps above 
10000 years (#3295) (Peter Vary reviewed by  Stamatis Zampetakis)
eb3bfe9e310 is described below

commit eb3bfe9e31054b7e203f32bde128dcae2556928a
Author: pvary <[email protected]>
AuthorDate: Thu May 26 17:15:35 2022 +0200

    HIVE-26233: Problems reading back PARQUET timestamps above 10000 years 
(#3295) (Peter Vary reviewed by  Stamatis Zampetakis)
---
 .../hadoop/hive/common/type/TimestampTZUtil.java   | 20 +++++++++++++++++++-
 .../TestParquetTimestampsHive2Compatibility.java   | 22 ++++++++++++++++++++--
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git 
a/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java 
b/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java
index 1853d4c569d..e71e0e85228 100644
--- a/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java
+++ b/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java
@@ -31,6 +31,7 @@ import java.time.ZonedDateTime;
 import java.time.format.DateTimeFormatter;
 import java.time.format.DateTimeFormatterBuilder;
 import java.time.format.DateTimeParseException;
+import java.time.format.SignStyle;
 import java.time.format.TextStyle;
 import java.time.temporal.ChronoField;
 import java.time.temporal.TemporalAccessor;
@@ -43,6 +44,13 @@ import org.apache.hive.common.util.DateUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static java.time.temporal.ChronoField.DAY_OF_MONTH;
+import static java.time.temporal.ChronoField.HOUR_OF_DAY;
+import static java.time.temporal.ChronoField.MINUTE_OF_HOUR;
+import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
+import static java.time.temporal.ChronoField.SECOND_OF_MINUTE;
+import static java.time.temporal.ChronoField.YEAR;
+
 public class TimestampTZUtil {
 
   private static final Logger LOG = LoggerFactory.getLogger(TimestampTZ.class);
@@ -50,6 +58,16 @@ public class TimestampTZUtil {
   private static final LocalTime DEFAULT_LOCAL_TIME = LocalTime.of(0, 0);
   private static final Pattern SINGLE_DIGIT_PATTERN = 
Pattern.compile("[\\+-]\\d:\\d\\d");
 
+  private static final DateTimeFormatter TIMESTAMP_FORMATTER = new 
DateTimeFormatterBuilder()
+      // Date and Time Parts
+      .appendValue(YEAR, 4, 10, 
SignStyle.NORMAL).appendLiteral('-').appendValue(MONTH_OF_YEAR, 2, 2, 
SignStyle.NORMAL)
+      .appendLiteral('-').appendValue(DAY_OF_MONTH, 2, 2, SignStyle.NORMAL)
+      .appendLiteral(" ").appendValue(HOUR_OF_DAY, 2, 2, 
SignStyle.NORMAL).appendLiteral(':')
+      .appendValue(MINUTE_OF_HOUR, 2, 2, SignStyle.NORMAL).appendLiteral(':')
+      .appendValue(SECOND_OF_MINUTE, 2, 2, SignStyle.NORMAL)
+      // Fractional Part (Optional)
+      .optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, 
true).optionalEnd().toFormatter();
+
   static final DateTimeFormatter FORMATTER;
   static {
     DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
@@ -168,7 +186,7 @@ public class TimestampTZUtil {
       try {
         DateFormat formatter = getLegacyDateFormatter();
         formatter.setTimeZone(TimeZone.getTimeZone(fromZone));
-        java.util.Date date = formatter.parse(ts.toString());
+        java.util.Date date = formatter.parse(ts.format(TIMESTAMP_FORMATTER));
         // Set the formatter to use a different timezone
         formatter.setTimeZone(TimeZone.getTimeZone(toZone));
         Timestamp result = Timestamp.valueOf(formatter.format(date));
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampsHive2Compatibility.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampsHive2Compatibility.java
index 733964a3183..71c3304f842 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampsHive2Compatibility.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampsHive2Compatibility.java
@@ -79,6 +79,24 @@ class TestParquetTimestampsHive2Compatibility {
     assertEquals(timestampString, ts.toString());
   }
 
+  /**
+   * Tests that timestamps written using Hive2 APIs are read correctly by 
Hive4 APIs when legacy conversion is on.
+   */
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("generateTimestamps")
+  void testWriteHive2ReadHive4UsingLegacyConversionWithZone(String 
timestampString) {
+    TimeZone original = TimeZone.getDefault();
+    try {
+      String zoneId = "US/Pacific";
+      TimeZone.setDefault(TimeZone.getTimeZone(zoneId));
+      NanoTime nt = writeHive2(timestampString);
+      Timestamp ts = readHive4(nt, zoneId, true);
+      assertEquals(timestampString, ts.toString());
+    } finally {
+      TimeZone.setDefault(original);
+    }
+  }
+
   /**
    * Tests that timestamps written using Hive4 APIs are read correctly by 
Hive4 APIs when legacy conversion is on. 
    */
@@ -116,7 +134,7 @@ class TestParquetTimestampsHive2Compatibility {
   }
 
   private static Stream<String> generateTimestamps() {
-    return Stream.generate(new Supplier<String>() {
+    return Stream.concat(Stream.generate(new Supplier<String>() {
       int i = 0;
 
       @Override
@@ -157,7 +175,7 @@ class TestParquetTimestampsHive2Compatibility {
     // Exclude dates falling in the default Gregorian change date since legacy 
code does not handle that interval
     // gracefully. It is expected that these do not work well when legacy APIs 
are in use. 
     .filter(s -> !s.startsWith("1582-10"))
-    .limit(3000);
+    .limit(3), Stream.of("9999-12-31 23:59:59.999"));
   }
 
   private static int digits(int number) {

Reply via email to