This is an automated email from the ASF dual-hosted git repository.
zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 5519bb37fb5 HIVE-26658: INT64 Parquet timestamps cannot be mapped to
most Hive numeric types (Stamatis Zampetakis reviewed by Chris Nauroth, Steve
Carlin, Ayush Saxena)
5519bb37fb5 is described below
commit 5519bb37fb5189004804435f09ca9227bd6d9d6b
Author: Stamatis Zampetakis <[email protected]>
AuthorDate: Mon Oct 24 11:50:42 2022 +0200
HIVE-26658: INT64 Parquet timestamps cannot be mapped to most Hive numeric
types (Stamatis Zampetakis reviewed by Chris Nauroth, Steve Carlin, Ayush
Saxena)
Closes #3698
---
.../hive/ql/io/parquet/convert/ETypeConverter.java | 53 +++----
.../ql/io/parquet/convert/TestETypeConverter.java | 85 ++++++++++-
.../parquet_int64_timestamp_to_numeric.q | 37 +++++
.../llap/parquet_int64_timestamp_to_numeric.q.out | 162 +++++++++++++++++++++
4 files changed, 295 insertions(+), 42 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
index 4c3ab70958e..91f19418356 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.io.BooleanWritable;
@@ -448,6 +449,21 @@ public enum ETypeConverter {
}
}
};
+ case serdeConstants.TIMESTAMP_TYPE_NAME:
+ case serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME:
+ if (type.getLogicalTypeAnnotation() instanceof
TimestampLogicalTypeAnnotation) {
+ TimestampLogicalTypeAnnotation logicalType =
+ (TimestampLogicalTypeAnnotation)
type.getLogicalTypeAnnotation();
+ return new PrimitiveConverter() {
+ @Override
+ public void addLong(final long value) {
+ Timestamp timestamp =
+ ParquetTimestampUtils.getTimestamp(value,
logicalType.getUnit(), logicalType.isAdjustedToUTC());
+ parent.set(index, new TimestampWritableV2(timestamp));
+ }
+ };
+ }
+ throw new IllegalStateException("Cannot reliably convert INT64 value
to timestamp without type annotation");
default:
return new PrimitiveConverter() {
@Override
@@ -743,40 +759,6 @@ public enum ETypeConverter {
};
}
},
- EINT64_TIMESTAMP_CONVERTER(TimestampWritableV2.class) {
- @Override
- PrimitiveConverter getConverter(final PrimitiveType type, final int index,
final ConverterParent parent,
- TypeInfo hiveTypeInfo) {
- if (hiveTypeInfo != null) {
- String typeName =
TypeInfoUtils.getBaseName(hiveTypeInfo.getTypeName());
- final long min = getMinValue(type, typeName, Long.MIN_VALUE);
- final long max = getMaxValue(typeName, Long.MAX_VALUE);
-
- switch (typeName) {
- case serdeConstants.BIGINT_TYPE_NAME:
- return new PrimitiveConverter() {
- @Override
- public void addLong(long value) {
- if ((value >= min) && (value <= max)) {
- parent.set(index, new LongWritable(value));
- } else {
- parent.set(index, null);
- }
- }
- };
- }
- }
- return new PrimitiveConverter() {
- @Override
- public void addLong(final long value) {
- TimestampLogicalTypeAnnotation logicalType =
(TimestampLogicalTypeAnnotation) type.getLogicalTypeAnnotation();
- Timestamp timestamp =
- ParquetTimestampUtils.getTimestamp(value, logicalType.getUnit(),
logicalType.isAdjustedToUTC());
- parent.set(index, new TimestampWritableV2(timestamp));
- }
- };
- }
- },
EDATE_CONVERTER(DateWritableV2.class) {
@Override
PrimitiveConverter getConverter(final PrimitiveType type, final int index,
final ConverterParent parent, TypeInfo hiveTypeInfo) {
@@ -833,7 +815,8 @@ public enum ETypeConverter {
@Override
public Optional<PrimitiveConverter>
visit(TimestampLogicalTypeAnnotation logicalTypeAnnotation) {
- return Optional.of(EINT64_TIMESTAMP_CONVERTER.getConverter(type,
index, parent, hiveTypeInfo));
+ TypeInfo info = hiveTypeInfo == null ?
TypeInfoFactory.timestampTypeInfo : hiveTypeInfo;
+ return Optional.of(EINT64_CONVERTER.getConverter(type, index,
parent, info));
}
});
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java
index cf6444c9c04..3173d2db900 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.io.parquet.convert;
+import static
org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getPrimitiveTypeInfo;
import static
org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -27,6 +28,7 @@ import java.nio.ByteOrder;
import java.time.ZoneId;
import java.time.ZoneOffset;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.Timestamp;
import
org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter.BinaryConverter;
import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
@@ -39,6 +41,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
@@ -115,16 +118,84 @@ public class TestETypeConverter {
assertEquals(22, (int) doubleWritable.get());
}
+ @Test
+ public void testGetInt64TimestampConverterTinyIntHiveType() {
+ testGetInt64TimestampConverterNumericHiveType("1970-01-01 00:00:00.005",
"tinyint", 5);
+ }
+
+ @Test
+ public void testGetInt64TimestampConverterSmallIntHiveType() {
+ testGetInt64TimestampConverterNumericHiveType("1970-01-01 00:00:00.005",
"smallint", 5);
+ }
+
+ @Test
+ public void testGetInt64TimestampConverterIntHiveType() {
+ testGetInt64TimestampConverterNumericHiveType("1970-01-01 00:00:00.005",
"int", 5);
+ }
+
@Test
public void testGetInt64TimestampConverterBigIntHiveType() {
- Timestamp timestamp = Timestamp.valueOf("1998-10-03 09:58:31.231");
- long msTime = timestamp.toEpochMilli();
- // Need TimeStamp logicalType annotation here
+ testGetInt64TimestampConverterNumericHiveType("1998-10-03 09:58:31.231",
"bigint", 907408711231L);
+ }
+
+ @Test
+ public void testGetInt64TimestampConverterFloatHiveType() {
+ testGetInt64TimestampConverterNumericHiveType("1970-01-01 00:00:00.005",
"float", 5.0f);
+ }
+
+ @Test
+ public void testGetInt64TimestampConverterDoubleHiveType() {
+ testGetInt64TimestampConverterNumericHiveType("1970-01-01 00:00:00.005",
"double", 5.0d);
+ }
+
+ @Test
+ public void testGetInt64TimestampConverterDecimalHiveType() {
+ testGetInt64TimestampConverterNumericHiveType("1970-01-01 00:00:00.005",
"decimal(1,0)", HiveDecimal.create(5));
+ }
+
+ @Test
+ public void testGetInt64TimestampConverterNoHiveType() {
+ Timestamp ts = Timestamp.valueOf("2022-10-24 11:35:00.005");
PrimitiveType primitiveType = createInt64TimestampType(false,
TimeUnit.MILLIS);
- Writable writable =
getWritableFromPrimitiveConverter(createHiveTypeInfo("bigint"), primitiveType,
msTime);
- // Retrieve as BigInt
- LongWritable longWritable = (LongWritable) writable;
- assertEquals(msTime, longWritable.get());
+ Writable writable = getWritableFromPrimitiveConverter(null, primitiveType,
ts.toEpochMilli());
+ assertEquals("2022-10-24 11:35:00.005", ((TimestampWritableV2)
writable).getTimestamp().toString());
+ }
+
+ @Test(expected = IllegalStateException.class)
+ public void testGetInt64NoLogicalAnnotationTimestampHiveType() {
+ Timestamp ts = Timestamp.valueOf("2022-10-24 11:43:00.005");
+ PrimitiveType primitiveType =
Types.optional(PrimitiveTypeName.INT64).named("int64");
+ getWritableFromPrimitiveConverter(TypeInfoFactory.timestampTypeInfo,
primitiveType, ts.toEpochMilli());
+ }
+
+ private void testGetInt64TimestampConverterNumericHiveType(String timestamp,
String type, Object expected) {
+ Timestamp ts = Timestamp.valueOf(timestamp);
+ PrimitiveType primitiveType = createInt64TimestampType(false,
TimeUnit.MILLIS);
+ PrimitiveTypeInfo info = getPrimitiveTypeInfo(type);
+ Writable writable = getWritableFromPrimitiveConverter(info, primitiveType,
ts.toEpochMilli());
+ final Object actual;
+ switch (info.getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ actual = ((IntWritable) writable).get();
+ break;
+ case LONG:
+ actual = ((LongWritable) writable).get();
+ break;
+ case FLOAT:
+ actual = ((FloatWritable) writable).get();
+ break;
+ case DOUBLE:
+ actual = ((DoubleWritable) writable).get();
+ break;
+ case DECIMAL:
+ actual = ((HiveDecimalWritable) writable).getHiveDecimal();
+ break;
+ default:
+ throw new IllegalStateException(info.toString());
+ }
+ assertEquals(expected, actual);
}
@Test
diff --git
a/ql/src/test/queries/clientpositive/parquet_int64_timestamp_to_numeric.q
b/ql/src/test/queries/clientpositive/parquet_int64_timestamp_to_numeric.q
new file mode 100644
index 00000000000..24b3336028d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_int64_timestamp_to_numeric.q
@@ -0,0 +1,37 @@
+set hive.parquet.write.int64.timestamp=true;
+set hive.parquet.timestamp.time.unit=micros;
+CREATE TABLE hive_26658_table (ts TIMESTAMP) STORED AS PARQUET;
+
+INSERT INTO hive_26658_table VALUES ('2022-10-21 15:58:32');
+INSERT INTO hive_26658_table VALUES ('1970-01-01 00:00:00.000009');
+
+SELECT * FROM hive_26658_table;
+
+set metastore.disallow.incompatible.col.type.changes=false;
+ALTER TABLE hive_26658_table CHANGE ts ts TINYINT;
+
+SELECT * FROM hive_26658_table;
+
+ALTER TABLE hive_26658_table CHANGE ts ts SMALLINT;
+
+SELECT * FROM hive_26658_table;
+
+ALTER TABLE hive_26658_table CHANGE ts ts INT;
+
+SELECT * FROM hive_26658_table;
+
+ALTER TABLE hive_26658_table CHANGE ts ts BIGINT;
+
+SELECT * FROM hive_26658_table;
+
+ALTER TABLE hive_26658_table CHANGE ts ts DOUBLE;
+
+SELECT * FROM hive_26658_table;
+
+ALTER TABLE hive_26658_table CHANGE ts ts FLOAT;
+
+SELECT * FROM hive_26658_table;
+
+ALTER TABLE hive_26658_table CHANGE ts ts Decimal;
+
+SELECT * FROM hive_26658_table;
diff --git
a/ql/src/test/results/clientpositive/llap/parquet_int64_timestamp_to_numeric.q.out
b/ql/src/test/results/clientpositive/llap/parquet_int64_timestamp_to_numeric.q.out
new file mode 100644
index 00000000000..dc2be4032a4
--- /dev/null
+++
b/ql/src/test/results/clientpositive/llap/parquet_int64_timestamp_to_numeric.q.out
@@ -0,0 +1,162 @@
+PREHOOK: query: CREATE TABLE hive_26658_table (ts TIMESTAMP) STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: CREATE TABLE hive_26658_table (ts TIMESTAMP) STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hive_26658_table
+PREHOOK: query: INSERT INTO hive_26658_table VALUES ('2022-10-21 15:58:32')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: INSERT INTO hive_26658_table VALUES ('2022-10-21 15:58:32')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@hive_26658_table
+POSTHOOK: Lineage: hive_26658_table.ts SCRIPT []
+PREHOOK: query: INSERT INTO hive_26658_table VALUES ('1970-01-01
00:00:00.000009')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: INSERT INTO hive_26658_table VALUES ('1970-01-01
00:00:00.000009')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@hive_26658_table
+POSTHOOK: Lineage: hive_26658_table.ts SCRIPT []
+PREHOOK: query: SELECT * FROM hive_26658_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM hive_26658_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+2022-10-21 15:58:32
+1970-01-01 00:00:00.000009
+PREHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts TINYINT
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@hive_26658_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts TINYINT
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@hive_26658_table
+POSTHOOK: Output: default@hive_26658_table
+PREHOOK: query: SELECT * FROM hive_26658_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM hive_26658_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+NULL
+9
+PREHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts SMALLINT
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@hive_26658_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts SMALLINT
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@hive_26658_table
+POSTHOOK: Output: default@hive_26658_table
+PREHOOK: query: SELECT * FROM hive_26658_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM hive_26658_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+NULL
+9
+PREHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts INT
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@hive_26658_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts INT
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@hive_26658_table
+POSTHOOK: Output: default@hive_26658_table
+PREHOOK: query: SELECT * FROM hive_26658_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM hive_26658_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+NULL
+9
+PREHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts BIGINT
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@hive_26658_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts BIGINT
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@hive_26658_table
+POSTHOOK: Output: default@hive_26658_table
+PREHOOK: query: SELECT * FROM hive_26658_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM hive_26658_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+1666367912000000
+9
+PREHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts DOUBLE
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@hive_26658_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts DOUBLE
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@hive_26658_table
+POSTHOOK: Output: default@hive_26658_table
+PREHOOK: query: SELECT * FROM hive_26658_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM hive_26658_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+1.666367912E15
+9.0
+PREHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts FLOAT
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@hive_26658_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts FLOAT
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@hive_26658_table
+POSTHOOK: Output: default@hive_26658_table
+PREHOOK: query: SELECT * FROM hive_26658_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM hive_26658_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+1.66636785E15
+9.0
+PREHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts Decimal
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@hive_26658_table
+PREHOOK: Output: default@hive_26658_table
+POSTHOOK: query: ALTER TABLE hive_26658_table CHANGE ts ts Decimal
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@hive_26658_table
+POSTHOOK: Output: default@hive_26658_table
+PREHOOK: query: SELECT * FROM hive_26658_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM hive_26658_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hive_26658_table
+#### A masked pattern was here ####
+NULL
+9