Repository: hive Updated Branches: refs/heads/master 1002e89b6 -> 54bba9cbf
HIVE-20792: Inserting timestamp with zones truncates the data (Jaume Marhuenda, reviewed by Jesus Camacho Rodriguez) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/54bba9cb Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/54bba9cb Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/54bba9cb Branch: refs/heads/master Commit: 54bba9cbfe991f08b295b2dd0874414770630ee7 Parents: 1002e89 Author: Jaume Marhuenda <[email protected]> Authored: Sun Oct 28 08:23:56 2018 -0700 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Sun Oct 28 08:24:22 2018 -0700 ---------------------------------------------------------------------- .../hadoop/hive/common/type/TimestampUtils.java | 23 +++++++++++++ .../test/queries/clientpositive/timestamptz_4.q | 8 +++++ .../results/clientpositive/timestamptz_4.q.out | 31 +++++++++++++++++ .../PrimitiveObjectInspectorUtils.java | 35 ++++---------------- 4 files changed, 69 insertions(+), 28 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/54bba9cb/common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java b/common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java index ab60db1..f26f8ae 100644 --- a/common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/type/TimestampUtils.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import java.math.BigDecimal; import java.time.DateTimeException; +import java.time.format.DateTimeParseException; /** * Utilities for Timestamps and the relevant conversions. @@ -168,4 +169,26 @@ public class TimestampUtils { } } + private static final int DATE_LENGTH = "YYYY-MM-DD".length(); + + public static Timestamp stringToTimestamp(String s) { + s = s.trim(); + // Handle simpler cases directly avoiding exceptions + if (s.length() == DATE_LENGTH) { + // Its a date! + return Timestamp.ofEpochMilli(Date.valueOf(s).toEpochMilli()); + } + try { + return Timestamp.valueOf(s); + } catch (IllegalArgumentException eT) { + // Try zoned timestamp + try { + return Timestamp.valueOf( + TimestampTZUtil.parse(s).getZonedDateTime().toLocalDateTime().toString()); + } catch (IllegalArgumentException | DateTimeParseException eTZ) { + // Last attempt + return Timestamp.ofEpochMilli(Date.valueOf(s).toEpochMilli()); + } + } + } } http://git-wip-us.apache.org/repos/asf/hive/blob/54bba9cb/ql/src/test/queries/clientpositive/timestamptz_4.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/timestamptz_4.q b/ql/src/test/queries/clientpositive/timestamptz_4.q new file mode 100644 index 0000000..2bde967 --- /dev/null +++ b/ql/src/test/queries/clientpositive/timestamptz_4.q @@ -0,0 +1,8 @@ +set hive.fetch.task.conversion=more; + +create table tstz4(t timestamp); + +insert into tstz4 VALUES ('2013-06-03 02:01:00.30547 GMT+01:00'), ('2013-06-03 02:01:00.30547 America/Los_Angeles'), ('2013-06-03 02:01:00.30547+01:00'), ('2013-06-03 02:01:00 GMT+01:00'), ('2013-06-03 02:01:00+07:00'), ('2013-06-03 02:01:00 America/Los_Angeles'); + +select * from tstz4; + http://git-wip-us.apache.org/repos/asf/hive/blob/54bba9cb/ql/src/test/results/clientpositive/timestamptz_4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/timestamptz_4.q.out b/ql/src/test/results/clientpositive/timestamptz_4.q.out new file mode 100644 index 0000000..c33a856 --- /dev/null +++ b/ql/src/test/results/clientpositive/timestamptz_4.q.out @@ -0,0 +1,31 @@ +PREHOOK: query: create table tstz4(t timestamp) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tstz4 +POSTHOOK: query: create table tstz4(t timestamp) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tstz4 +PREHOOK: query: insert into tstz4 VALUES ('2013-06-03 02:01:00.30547 GMT+01:00'), ('2013-06-03 02:01:00.30547 America/Los_Angeles'), ('2013-06-03 02:01:00.30547+01:00'), ('2013-06-03 02:01:00 GMT+01:00'), ('2013-06-03 02:01:00+07:00'), ('2013-06-03 02:01:00 America/Los_Angeles') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tstz4 +POSTHOOK: query: insert into tstz4 VALUES ('2013-06-03 02:01:00.30547 GMT+01:00'), ('2013-06-03 02:01:00.30547 America/Los_Angeles'), ('2013-06-03 02:01:00.30547+01:00'), ('2013-06-03 02:01:00 GMT+01:00'), ('2013-06-03 02:01:00+07:00'), ('2013-06-03 02:01:00 America/Los_Angeles') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tstz4 +POSTHOOK: Lineage: tstz4.t SCRIPT [] +PREHOOK: query: select * from tstz4 +PREHOOK: type: QUERY +PREHOOK: Input: default@tstz4 +#### A masked pattern was here #### +POSTHOOK: query: select * from tstz4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstz4 +#### A masked pattern was here #### +2013-06-03 02:01:00.30547 +2013-06-03 02:01:00.30547 +2013-06-03 02:01:00.30547 +2013-06-03 02:01:00 +2013-06-03 02:01:00 +2013-06-03 02:01:00 http://git-wip-us.apache.org/repos/asf/hive/blob/54bba9cb/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java index 5624315..3886b20 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java @@ -23,7 +23,6 @@ import java.io.DataOutput; import java.io.IOException; import java.nio.charset.CharacterCodingException; import java.nio.charset.StandardCharsets; -import java.time.DateTimeException; import java.time.ZoneId; import java.util.HashMap; import java.util.Map; @@ -1255,40 +1254,18 @@ public final class PrimitiveObjectInspectorUtils { return result; } - private final static int DATE_LENGTH = "YYYY-MM-DD".length(); private final static int TS_LENGTH = "yyyy-mm-dd hh:mm:ss".length(); + private final static int DATE_LENGTH = "YYYY-MM-DD".length(); public static Timestamp getTimestampFromString(String s) { - Timestamp result = null; s = s.trim(); s = trimNanoTimestamp(s); - // Handle simpler cases directly avoiding exceptions try { - if (s.length() == DATE_LENGTH) { - // Its a date! - return Timestamp.ofEpochMilli(Date.valueOf(s).toEpochMilli()); - } else if (isValidTimeStamp(s)) { - return Timestamp.valueOf(s); - } - // If a timestamp does not have a space, then it is likely zoned time. - if (s.contains("+") || (s.length() > DATE_LENGTH && s.charAt(DATE_LENGTH) == '-')) { - // Timestamp with timezone - // Let's try to parse it as timestamp with time zone and transform - try { - result = Timestamp.valueOf(TimestampTZUtil.parse(s).getZonedDateTime() - .toLocalDateTime().toString()); - } catch (DateTimeException e2) { - // Do nothing - } - } else { - // Last attempt - result = Timestamp.ofEpochMilli(Date.valueOf(s).toEpochMilli()); - } + return TimestampUtils.stringToTimestamp(s); } catch (IllegalArgumentException e) { - // Do nothing + return null; } - return result; } private static String trimNanoTimestamp(String s) { @@ -1296,8 +1273,10 @@ public final class PrimitiveObjectInspectorUtils { // Throw away extra if more than 9 decimal places int periodIdx = s.indexOf("."); if (periodIdx != -1) { - int secondSpace = firstSpace < 0 ? -1 : s.indexOf(' ', firstSpace + 1); - int maxLength = secondSpace == -1 ? s.length() : secondSpace; + int secondSpaceOrPlus = firstSpace < 0 ? -1 : s.indexOf(' ', firstSpace + 1); + secondSpaceOrPlus = firstSpace < 0 || secondSpaceOrPlus != -1 ? + secondSpaceOrPlus : s.indexOf('+', firstSpace + 1); + int maxLength = secondSpaceOrPlus == -1 ? s.length() : secondSpaceOrPlus; if (maxLength - periodIdx > 9) { s = s.substring(0, periodIdx + 10).concat(s.substring(maxLength, s.length())); }
