This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git
The following commit(s) were added to refs/heads/master by this push:
new d1696aab4 [hive] Fix Hive 3 timestamp precision conversion error
(#2674)
d1696aab4 is described below
commit d1696aab4c78d7bd54e25bfba6d13be4ae031914
Author: yuzelin <[email protected]>
AuthorDate: Fri Jan 12 10:53:32 2024 +0800
[hive] Fix Hive 3 timestamp precision conversion error (#2674)
---
.../PaimonTimestampObjectInspector.java | 13 ++++--
.../org/apache/paimon/hive/HiveWriteITCase.java | 37 ++++++++++++-----
.../PaimonTimestampObjectInspector.java | 2 +-
.../paimon/hive/PaimonStorageHandlerITCase.java | 47 ++++++++++++++++------
4 files changed, 72 insertions(+), 27 deletions(-)
diff --git
a/paimon-hive/paimon-hive-connector-3.1/src/main/java/org/apache/paimon/hive/objectinspector/PaimonTimestampObjectInspector.java
b/paimon-hive/paimon-hive-connector-3.1/src/main/java/org/apache/paimon/hive/objectinspector/PaimonTimestampObjectInspector.java
index 4b96dbfb2..a70064e23 100644
---
a/paimon-hive/paimon-hive-connector-3.1/src/main/java/org/apache/paimon/hive/objectinspector/PaimonTimestampObjectInspector.java
+++
b/paimon-hive/paimon-hive-connector-3.1/src/main/java/org/apache/paimon/hive/objectinspector/PaimonTimestampObjectInspector.java
@@ -24,7 +24,7 @@ import
org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitive
import
org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-/** {@link AbstractPrimitiveJavaObjectInspector} for TIMESTAMP type. */
+/** {@link AbstractPrimitiveJavaObjectInspector} for TIMESTAMP type. The
precision is maintained. */
public class PaimonTimestampObjectInspector extends
AbstractPrimitiveJavaObjectInspector
implements TimestampObjectInspector, WriteableObjectInspector {
@@ -34,9 +34,14 @@ public class PaimonTimestampObjectInspector extends
AbstractPrimitiveJavaObjectI
@Override
public Timestamp getPrimitiveJavaObject(Object o) {
- return o == null
- ? null
- : Timestamp.ofEpochMilli(((org.apache.paimon.data.Timestamp)
o).getMillisecond());
+ if (o == null) {
+ return null;
+ }
+
+ org.apache.paimon.data.Timestamp timestamp =
(org.apache.paimon.data.Timestamp) o;
+ long millis = timestamp.getMillisecond();
+ int nanos = (int) (millis % 1000 * 1_000_000) +
timestamp.getNanoOfMillisecond();
+ return Timestamp.ofEpochMilli(millis, nanos);
}
@Override
diff --git
a/paimon-hive/paimon-hive-connector-3.1/src/test/java/org/apache/paimon/hive/HiveWriteITCase.java
b/paimon-hive/paimon-hive-connector-3.1/src/test/java/org/apache/paimon/hive/HiveWriteITCase.java
index babbbe30f..1fd39dd94 100644
---
a/paimon-hive/paimon-hive-connector-3.1/src/test/java/org/apache/paimon/hive/HiveWriteITCase.java
+++
b/paimon-hive/paimon-hive-connector-3.1/src/test/java/org/apache/paimon/hive/HiveWriteITCase.java
@@ -44,6 +44,8 @@ import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith;
+import javax.annotation.Nullable;
+
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@@ -91,7 +93,11 @@ public class HiveWriteITCase {
}
private String createAppendOnlyExternalTable(
- RowType rowType, List<String> partitionKeys, List<InternalRow>
data, String tableName)
+ RowType rowType,
+ List<String> partitionKeys,
+ List<InternalRow> data,
+ String tableName,
+ @Nullable CoreOptions.FileFormatType fileFormatType)
throws Exception {
String path = folder.newFolder().toURI().toString();
String tableNameNotNull =
@@ -100,7 +106,9 @@ public class HiveWriteITCase {
Options conf = new Options();
conf.set(CatalogOptions.WAREHOUSE, path);
conf.set(CoreOptions.BUCKET, 2);
- conf.set(CoreOptions.FILE_FORMAT, CoreOptions.FileFormatType.AVRO);
+ conf.set(
+ CoreOptions.FILE_FORMAT,
+ fileFormatType == null ? CoreOptions.FileFormatType.AVRO :
fileFormatType);
Identifier identifier = Identifier.create(DATABASE_NAME,
tableNameNotNull);
Table table =
FileStoreTestUtils.createFileStoreTable(
@@ -152,7 +160,8 @@ public class HiveWriteITCase {
new String[] {"pt", "a", "b", "c"}),
Collections.singletonList("pt"),
emptyData,
- "hive_test_table_output");
+ "hive_test_table_output",
+ null);
hiveShell.execute(
"insert into " + outputTableName + " values
(1,2,3,'Hello'),(4,5,6,'Fine')");
@@ -164,22 +173,30 @@ public class HiveWriteITCase {
public void testInsertTimestampAndDate() throws Exception {
List<InternalRow> emptyData = Collections.emptyList();
+ // test different precisions
+ int precision = ThreadLocalRandom.current().nextInt(10);
+ String fraction = precision == 0 ? "" : "." + "123456789".substring(0,
precision);
+
String outputTableName =
createAppendOnlyExternalTable(
RowType.of(
new DataType[] {
- DataTypes.INT(), DataTypes.TIMESTAMP(),
DataTypes.DATE()
+ DataTypes.INT(),
+ DataTypes.TIMESTAMP(precision),
+ DataTypes.DATE(),
},
new String[] {"pt", "a", "b"}),
Collections.singletonList("pt"),
emptyData,
- "hive_test_table_output");
+ "hive_test_table_output",
+ CoreOptions.FileFormatType.ORC);
hiveShell.execute(
- "insert into "
- + outputTableName
- + " values(1,'2023-01-13 20:00:01.123','2023-12-23')");
- List<String> select = hiveShell.executeQuery("select * from " +
outputTableName);
+ String.format(
+ "INSERT INTO %s VALUES (1, '2023-01-13 20:00:01%s',
'2023-12-23')",
+ outputTableName, fraction));
+
+ List<String> select = hiveShell.executeQuery("SELECT * FROM " +
outputTableName);
assertThat(select)
- .isEqualTo(Collections.singletonList("1\t2023-01-13
20:00:01.123\t2023-12-23"));
+ .containsExactly(String.format("1\t2023-01-13
20:00:01%s\t2023-12-23", fraction));
}
}
diff --git
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/objectinspector/PaimonTimestampObjectInspector.java
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/objectinspector/PaimonTimestampObjectInspector.java
index 7f57b242b..c18eb7759 100644
---
a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/objectinspector/PaimonTimestampObjectInspector.java
+++
b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/objectinspector/PaimonTimestampObjectInspector.java
@@ -27,7 +27,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import java.time.LocalDateTime;
-/** {@link AbstractPrimitiveJavaObjectInspector} for TIMESTAMP type. */
+/** {@link AbstractPrimitiveJavaObjectInspector} for TIMESTAMP type. The
precision is maintained. */
public class PaimonTimestampObjectInspector extends
AbstractPrimitiveJavaObjectInspector
implements TimestampObjectInspector, WriteableObjectInspector {
diff --git
a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/PaimonStorageHandlerITCase.java
b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/PaimonStorageHandlerITCase.java
index 16f6e191a..8ba1fe357 100644
---
a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/PaimonStorageHandlerITCase.java
+++
b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/PaimonStorageHandlerITCase.java
@@ -740,18 +740,20 @@ public class PaimonStorageHandlerITCase {
public void testDateAndTimestamp() throws Exception {
ThreadLocalRandom random = ThreadLocalRandom.current();
Options conf = getBasicConf();
- conf.set(
- CoreOptions.FILE_FORMAT,
+
+ CoreOptions.FileFormatType fileFormatType =
random.nextBoolean()
? CoreOptions.FileFormatType.ORC
- : CoreOptions.FileFormatType.PARQUET);
+ : CoreOptions.FileFormatType.PARQUET;
+ conf.set(CoreOptions.FILE_FORMAT, fileFormatType);
+
+ int precision = random.nextInt(10);
+
Table table =
FileStoreTestUtils.createFileStoreTable(
conf,
RowType.of(
- new DataType[] {
- DataTypes.DATE(),
DataTypes.TIMESTAMP(random.nextInt(10))
- },
+ new DataType[] {DataTypes.DATE(),
DataTypes.TIMESTAMP(precision)},
new String[] {"dt", "ts"}),
Collections.emptyList(),
Collections.emptyList());
@@ -772,32 +774,53 @@ public class PaimonStorageHandlerITCase {
GenericRow.of(
null,
Timestamp.fromLocalDateTime(
- LocalDateTime.of(2022, 6, 18, 8, 30, 0,
100_000_000))));
+ // to test different precisions
+ LocalDateTime.of(2022, 6, 18, 8, 30, 0,
123_456_789))));
commit.commit(2, write.prepareCommit(true, 2));
write.close();
commit.close();
createExternalTable();
+
assertThat(
hiveShell.executeQuery(
"SELECT * FROM `" + externalTable + "` WHERE
dt = '1971-01-11'"))
.containsExactly("1971-01-11\t2022-05-17 17:29:20.1");
assertThat(
hiveShell.executeQuery(
- "SELECT * FROM `"
- + externalTable
- + "` WHERE ts = '2022-05-17
17:29:20.1'"))
+ String.format(
+ // do not test '.123456789' because
the filter pushdown will
+ // cause wrong result
+ "SELECT * FROM `%s` WHERE ts =
'2022-05-17 17:29:20.1'",
+ externalTable)))
.containsExactly("1971-01-11\t2022-05-17 17:29:20.1");
+
assertThat(
hiveShell.executeQuery(
"SELECT * FROM `" + externalTable + "` WHERE
dt = '1971-01-12'"))
.containsExactly("1971-01-12\tNULL");
+
+ // validate '2022-06-18 08:30:00.123456789'
+ // the original precision is maintained, but the file format will
affect the result
+ // parquet stores timestamp with three forms
+ String fraction;
+ if (fileFormatType == CoreOptions.FileFormatType.ORC) {
+ fraction = ".123456789";
+ } else {
+ if (precision <= 3) {
+ fraction = ".123";
+ } else if (precision <= 6) {
+ fraction = ".123456";
+ } else {
+ fraction = ".123456789";
+ }
+ }
assertThat(
hiveShell.executeQuery(
"SELECT * FROM `"
+ externalTable
- + "` WHERE ts = '2022-06-18
08:30:00.1'"))
- .containsExactly("NULL\t2022-06-18 08:30:00.1");
+ + "` WHERE dt IS NULL and ts IS NOT
NULL"))
+ .containsExactly("NULL\t2022-06-18 08:30:00" + fraction);
}
@Test