This is an automated email from the ASF dual-hosted git repository.
pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new eb3bfe9e310 HIVE-26233: Problems reading back PARQUET timestamps above
10000 years (#3295) (Peter Vary reviewed by Stamatis Zampetakis)
eb3bfe9e310 is described below
commit eb3bfe9e31054b7e203f32bde128dcae2556928a
Author: pvary <[email protected]>
AuthorDate: Thu May 26 17:15:35 2022 +0200
HIVE-26233: Problems reading back PARQUET timestamps above 10000 years
(#3295) (Peter Vary reviewed by Stamatis Zampetakis)
---
.../hadoop/hive/common/type/TimestampTZUtil.java | 20 +++++++++++++++++++-
.../TestParquetTimestampsHive2Compatibility.java | 22 ++++++++++++++++++++--
2 files changed, 39 insertions(+), 3 deletions(-)
diff --git
a/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java
b/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java
index 1853d4c569d..e71e0e85228 100644
--- a/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java
+++ b/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZUtil.java
@@ -31,6 +31,7 @@ import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.DateTimeParseException;
+import java.time.format.SignStyle;
import java.time.format.TextStyle;
import java.time.temporal.ChronoField;
import java.time.temporal.TemporalAccessor;
@@ -43,6 +44,13 @@ import org.apache.hive.common.util.DateUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static java.time.temporal.ChronoField.DAY_OF_MONTH;
+import static java.time.temporal.ChronoField.HOUR_OF_DAY;
+import static java.time.temporal.ChronoField.MINUTE_OF_HOUR;
+import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
+import static java.time.temporal.ChronoField.SECOND_OF_MINUTE;
+import static java.time.temporal.ChronoField.YEAR;
+
public class TimestampTZUtil {
private static final Logger LOG = LoggerFactory.getLogger(TimestampTZ.class);
@@ -50,6 +58,16 @@ public class TimestampTZUtil {
private static final LocalTime DEFAULT_LOCAL_TIME = LocalTime.of(0, 0);
private static final Pattern SINGLE_DIGIT_PATTERN =
Pattern.compile("[\\+-]\\d:\\d\\d");
+ private static final DateTimeFormatter TIMESTAMP_FORMATTER = new
DateTimeFormatterBuilder()
+ // Date and Time Parts
+ .appendValue(YEAR, 4, 10,
SignStyle.NORMAL).appendLiteral('-').appendValue(MONTH_OF_YEAR, 2, 2,
SignStyle.NORMAL)
+ .appendLiteral('-').appendValue(DAY_OF_MONTH, 2, 2, SignStyle.NORMAL)
+ .appendLiteral(" ").appendValue(HOUR_OF_DAY, 2, 2,
SignStyle.NORMAL).appendLiteral(':')
+ .appendValue(MINUTE_OF_HOUR, 2, 2, SignStyle.NORMAL).appendLiteral(':')
+ .appendValue(SECOND_OF_MINUTE, 2, 2, SignStyle.NORMAL)
+ // Fractional Part (Optional)
+ .optionalStart().appendFraction(ChronoField.NANO_OF_SECOND, 0, 9,
true).optionalEnd().toFormatter();
+
static final DateTimeFormatter FORMATTER;
static {
DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
@@ -168,7 +186,7 @@ public class TimestampTZUtil {
try {
DateFormat formatter = getLegacyDateFormatter();
formatter.setTimeZone(TimeZone.getTimeZone(fromZone));
- java.util.Date date = formatter.parse(ts.toString());
+ java.util.Date date = formatter.parse(ts.format(TIMESTAMP_FORMATTER));
// Set the formatter to use a different timezone
formatter.setTimeZone(TimeZone.getTimeZone(toZone));
Timestamp result = Timestamp.valueOf(formatter.format(date));
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampsHive2Compatibility.java
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampsHive2Compatibility.java
index 733964a3183..71c3304f842 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampsHive2Compatibility.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampsHive2Compatibility.java
@@ -79,6 +79,24 @@ class TestParquetTimestampsHive2Compatibility {
assertEquals(timestampString, ts.toString());
}
+ /**
+ * Tests that timestamps written using Hive2 APIs are read correctly by
Hive4 APIs when legacy conversion is on.
+ */
+ @ParameterizedTest(name = "{0}")
+ @MethodSource("generateTimestamps")
+ void testWriteHive2ReadHive4UsingLegacyConversionWithZone(String
timestampString) {
+ TimeZone original = TimeZone.getDefault();
+ try {
+ String zoneId = "US/Pacific";
+ TimeZone.setDefault(TimeZone.getTimeZone(zoneId));
+ NanoTime nt = writeHive2(timestampString);
+ Timestamp ts = readHive4(nt, zoneId, true);
+ assertEquals(timestampString, ts.toString());
+ } finally {
+ TimeZone.setDefault(original);
+ }
+ }
+
/**
* Tests that timestamps written using Hive4 APIs are read correctly by
Hive4 APIs when legacy conversion is on.
*/
@@ -116,7 +134,7 @@ class TestParquetTimestampsHive2Compatibility {
}
private static Stream<String> generateTimestamps() {
- return Stream.generate(new Supplier<String>() {
+ return Stream.concat(Stream.generate(new Supplier<String>() {
int i = 0;
@Override
@@ -157,7 +175,7 @@ class TestParquetTimestampsHive2Compatibility {
// Exclude dates falling in the default Gregorian change date since legacy
code does not handle that interval
// gracefully. It is expected that these do not work well when legacy APIs
are in use.
.filter(s -> !s.startsWith("1582-10"))
- .limit(3000);
+ .limit(3), Stream.of("9999-12-31 23:59:59.999"));
}
private static int digits(int number) {