This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 88d531157 [orc] Optimize ORC timestamp type read (#2333)
88d531157 is described below

commit 88d5311573d2d3aa5894f327a49f3d33ff260b51
Author: Jingsong Lee <[email protected]>
AuthorDate: Thu Nov 16 19:51:24 2023 -0800

    [orc] Optimize ORC timestamp type read (#2333)
---
 .../org/apache/paimon/utils/DateTimeUtils.java     |  4 ++
 .../org/apache/paimon/utils/DateTimeUtilsTest.java | 16 ++++++
 .../apache/paimon/flink/PreAggregationITCase.java  | 64 +++++++++++-----------
 .../orc/reader/OrcTimestampColumnVector.java       |  5 +-
 4 files changed, 54 insertions(+), 35 deletions(-)

diff --git 
a/paimon-common/src/main/java/org/apache/paimon/utils/DateTimeUtils.java 
b/paimon-common/src/main/java/org/apache/paimon/utils/DateTimeUtils.java
index a088129af..ad48b2d14 100644
--- a/paimon-common/src/main/java/org/apache/paimon/utils/DateTimeUtils.java
+++ b/paimon-common/src/main/java/org/apache/paimon/utils/DateTimeUtils.java
@@ -115,6 +115,10 @@ public class DateTimeUtils {
         return (int) (ts % MILLIS_PER_DAY);
     }
 
+    public static Timestamp toInternal(long millis, int nanos) {
+        return Timestamp.fromEpochMillis(millis + LOCAL_TZ.getOffset(millis), 
nanos);
+    }
+
     public static int toInternal(LocalDate date) {
         return ymdToUnixDate(date.getYear(), date.getMonthValue(), 
date.getDayOfMonth());
     }
diff --git 
a/paimon-common/src/test/java/org/apache/paimon/utils/DateTimeUtilsTest.java 
b/paimon-common/src/test/java/org/apache/paimon/utils/DateTimeUtilsTest.java
index e5c8facdc..6c1376d29 100644
--- a/paimon-common/src/test/java/org/apache/paimon/utils/DateTimeUtilsTest.java
+++ b/paimon-common/src/test/java/org/apache/paimon/utils/DateTimeUtilsTest.java
@@ -18,6 +18,8 @@
 
 package org.apache.paimon.utils;
 
+import org.apache.paimon.data.Timestamp;
+
 import org.junit.jupiter.api.Test;
 
 import java.time.LocalDateTime;
@@ -42,4 +44,18 @@ public class DateTimeUtilsTest {
                     .isEqualTo(expectations[precision]);
         }
     }
+
+    @Test
+    public void testTimestamp() {
+        int nanos = 100;
+        java.sql.Timestamp timestamp = new 
java.sql.Timestamp(System.currentTimeMillis());
+        for (int i = 0; i < 2000; i++) {
+            timestamp = new java.sql.Timestamp(timestamp.getTime() + 60 * 
1000);
+            timestamp.setNanos(nanos + timestamp.getNanos());
+
+            Timestamp t1 = Timestamp.fromSQLTimestamp(timestamp);
+            Timestamp t2 = DateTimeUtils.toInternal(timestamp.getTime(), 
nanos);
+            assertThat(t1).isEqualTo(t2);
+        }
+    }
 }
diff --git 
a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/PreAggregationITCase.java
 
b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/PreAggregationITCase.java
index 9c10c345a..8eb0e68fe 100644
--- 
a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/PreAggregationITCase.java
+++ 
b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/PreAggregationITCase.java
@@ -384,13 +384,13 @@ public class PreAggregationITCase {
                     "INSERT INTO T3 VALUES "
                             + "(1, 2, CAST(NULL AS INT), 1.01, CAST(-1 AS 
TINYINT), CAST(-1 AS SMALLINT), "
                             + "CAST(1000 AS BIGINT), 1.11, CAST(1.11 AS 
DOUBLE), CAST('2020-01-01' AS DATE), "
-                            + "CAST('0001-01-01 01:01:01' AS TIMESTAMP)),"
+                            + "CAST('2021-01-01 01:01:01' AS TIMESTAMP)),"
                             + "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS 
SMALLINT), "
                             + "CAST(100000 AS BIGINT), -1.11, CAST(1.21 AS 
DOUBLE), CAST('2020-01-02' AS DATE), "
-                            + "CAST('0002-01-01 01:01:01' AS TIMESTAMP)), "
+                            + "CAST('2022-01-01 01:01:01' AS TIMESTAMP)), "
                             + "(1, 2, 3, 10.00, CAST(1 AS TINYINT), CAST(1 AS 
SMALLINT), "
                             + "CAST(10000000 AS BIGINT), 0, CAST(-1.11 AS 
DOUBLE), CAST('2022-01-02' AS DATE), "
-                            + "CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+                            + "CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
             List<Row> result = batchSql("SELECT * FROM T3");
             assertThat(result)
                     .containsExactlyInAnyOrder(
@@ -405,7 +405,7 @@ public class PreAggregationITCase {
                                     (float) -1.11,
                                     -1.11,
                                     LocalDate.of(2020, 1, 1),
-                                    LocalDateTime.of(1, 1, 1, 1, 1, 1)));
+                                    LocalDateTime.of(2021, 1, 1, 1, 1, 1)));
         }
 
         @Test
@@ -413,15 +413,15 @@ public class PreAggregationITCase {
             batchSql(
                     "INSERT INTO T3 VALUES "
                             + "(1, 2, CAST(NULL AS INT), 1.01, CAST(1 AS 
TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
-                            + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' 
AS DATE), CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+                            + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' 
AS DATE), CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
             batchSql(
                     "INSERT INTO T3 VALUES "
                             + "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS 
SMALLINT), CAST(100000 AS BIGINT), "
-                            + "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02' 
AS DATE), CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+                            + "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02' 
AS DATE), CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
             batchSql(
                     "INSERT INTO T3 VALUES "
                             + "(1, 2, 3, 10.00, CAST(-1 AS TINYINT), CAST(1 AS 
SMALLINT), CAST(10000000 AS BIGINT), "
-                            + "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS 
DATE), CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+                            + "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS 
DATE), CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
 
             List<Row> result = batchSql("SELECT * FROM T3");
             assertThat(result)
@@ -437,7 +437,7 @@ public class PreAggregationITCase {
                                     (float) -1.11,
                                     -1.11,
                                     LocalDate.of(2020, 1, 1),
-                                    LocalDateTime.of(1, 1, 1, 1, 1, 1)));
+                                    LocalDateTime.of(2021, 1, 1, 1, 1, 1)));
         }
 
         @Test
@@ -449,29 +449,29 @@ public class PreAggregationITCase {
             batchSql(
                     "INSERT INTO T3 VALUES "
                             + "(1, 2, CAST(NULL AS INT), 1.01, CAST(1 AS 
TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
-                            + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' 
AS DATE), CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+                            + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' 
AS DATE), CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
             batchSql(
                     "INSERT INTO T3 VALUES "
                             + "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS 
SMALLINT), CAST(100000 AS BIGINT), "
-                            + "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02' 
AS DATE), CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+                            + "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02' 
AS DATE), CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
             batchSql(
                     "INSERT INTO T3 VALUES "
                             + "(1, 2, 3, 10.00, CAST(-1 AS TINYINT), CAST(1 AS 
SMALLINT), CAST(10000000 AS BIGINT), "
-                            + "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS 
DATE), CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+                            + "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS 
DATE), CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
 
             // key 1 3
             batchSql(
                     "INSERT INTO T3 VALUES "
                             + "(1, 3, CAST(NULL AS INT), 1.01, CAST(1 AS 
TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
-                            + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' 
AS DATE), CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+                            + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' 
AS DATE), CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
             batchSql(
                     "INSERT INTO T3 VALUES "
                             + "(1, 3, 6, 1.10, CAST(2 AS TINYINT), CAST(2 AS 
SMALLINT), CAST(100000 AS BIGINT), "
-                            + "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02' 
AS DATE), CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+                            + "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02' 
AS DATE), CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
             batchSql(
                     "INSERT INTO T3 VALUES "
                             + "(1, 3, 3, 10.00, CAST(-1 AS TINYINT), CAST(1 AS 
SMALLINT), CAST(10000000 AS BIGINT), "
-                            + "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS 
DATE), CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+                            + "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS 
DATE), CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
 
             assertThat(batchSql("SELECT * FROM T3"))
                     .containsExactlyInAnyOrder(
@@ -486,7 +486,7 @@ public class PreAggregationITCase {
                                     (float) -1.11,
                                     -1.11,
                                     LocalDate.of(2020, 1, 1),
-                                    LocalDateTime.of(1, 1, 1, 1, 1, 1)),
+                                    LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
                             Row.of(
                                     1,
                                     3,
@@ -498,7 +498,7 @@ public class PreAggregationITCase {
                                     (float) -1.11,
                                     -1.11,
                                     LocalDate.of(2020, 1, 1),
-                                    LocalDateTime.of(1, 1, 1, 1, 1, 1)));
+                                    LocalDateTime.of(2021, 1, 1, 1, 1, 1)));
         }
 
         @Test
@@ -545,13 +545,13 @@ public class PreAggregationITCase {
                     "INSERT INTO T2 VALUES "
                             + "(1, 2, CAST(NULL AS INT), 1.01, CAST(1 AS 
TINYINT), CAST(-1 AS SMALLINT), "
                             + "CAST(1000 AS BIGINT), 1.11, CAST(1.11 AS 
DOUBLE), CAST('2020-01-01' AS DATE), "
-                            + "CAST('0001-01-01 01:01:01' AS TIMESTAMP)),"
+                            + "CAST('2021-01-01 01:01:01' AS TIMESTAMP)),"
                             + "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS 
SMALLINT), CAST(100000 AS BIGINT), "
                             + "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02' 
AS DATE), "
-                            + "CAST('0002-01-01 01:01:01' AS TIMESTAMP)), "
+                            + "CAST('2022-01-01 01:01:01' AS TIMESTAMP)), "
                             + "(1, 2, 3, 10.00, CAST(1 AS TINYINT), CAST(1 AS 
SMALLINT), CAST(10000000 AS BIGINT), "
                             + "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS 
DATE), "
-                            + "CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+                            + "CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
             List<Row> result = batchSql("SELECT * FROM T2");
             assertThat(result)
                     .containsExactlyInAnyOrder(
@@ -566,7 +566,7 @@ public class PreAggregationITCase {
                                     (float) 1.11,
                                     1.21,
                                     LocalDate.of(2022, 1, 2),
-                                    LocalDateTime.of(2, 1, 1, 2, 0, 0)));
+                                    LocalDateTime.of(2022, 1, 1, 2, 0, 0)));
         }
 
         @Test
@@ -575,17 +575,17 @@ public class PreAggregationITCase {
                     "INSERT INTO T2 VALUES "
                             + "(1, 2, CAST(NULL AS INT), 1.01, CAST(1 AS 
TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
                             + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' 
AS DATE), "
-                            + "CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+                            + "CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
             batchSql(
                     "INSERT INTO T2 VALUES "
                             + "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS 
SMALLINT), CAST(100000 AS BIGINT), -1.11, "
                             + "CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS 
DATE), "
-                            + "CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+                            + "CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
             batchSql(
                     "INSERT INTO T2 VALUES "
                             + "(1, 2, 3, 10.00, CAST(1 AS TINYINT), CAST(1 AS 
SMALLINT), CAST(10000000 AS BIGINT), 0, "
                             + "CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS 
DATE), "
-                            + "CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+                            + "CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
 
             List<Row> result = batchSql("SELECT * FROM T2");
             assertThat(result)
@@ -601,7 +601,7 @@ public class PreAggregationITCase {
                                     (float) 1.11,
                                     1.21,
                                     LocalDate.of(2022, 1, 2),
-                                    LocalDateTime.of(2, 1, 1, 2, 0, 0)));
+                                    LocalDateTime.of(2022, 1, 1, 2, 0, 0)));
         }
 
         @Test
@@ -613,29 +613,29 @@ public class PreAggregationITCase {
             batchSql(
                     "INSERT INTO T2 VALUES "
                             + "(1, 2, CAST(NULL AS INT), 1.01, CAST(1 AS 
TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
-                            + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' 
AS DATE), CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+                            + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' 
AS DATE), CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
             batchSql(
                     "INSERT INTO T2 VALUES "
                             + "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS 
SMALLINT), CAST(100000 AS BIGINT), -1.11, "
-                            + "CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS 
DATE), CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+                            + "CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS 
DATE), CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
             batchSql(
                     "INSERT INTO T2 VALUES "
                             + "(1, 2, 3, 10.00, CAST(1 AS TINYINT), CAST(1 AS 
SMALLINT), CAST(10000000 AS BIGINT), 0, "
-                            + "CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS 
DATE), CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+                            + "CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS 
DATE), CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
 
             // key 1 3
             batchSql(
                     "INSERT INTO T2 VALUES "
                             + "(1, 3, CAST(NULL AS INT), 1.01, CAST(1 AS 
TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
-                            + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' 
AS DATE), CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+                            + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01' 
AS DATE), CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
             batchSql(
                     "INSERT INTO T2 VALUES "
                             + "(1, 3, 6, 1.10, CAST(2 AS TINYINT), CAST(2 AS 
SMALLINT), CAST(100000 AS BIGINT), -1.11, "
-                            + "CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS 
DATE), CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+                            + "CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS 
DATE), CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
             batchSql(
                     "INSERT INTO T2 VALUES "
                             + "(1, 3, 3, 10.00, CAST(1 AS TINYINT), CAST(1 AS 
SMALLINT), CAST(10000000 AS BIGINT), 0, "
-                            + "CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS 
DATE), CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+                            + "CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS 
DATE), CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
 
             assertThat(batchSql("SELECT * FROM T2"))
                     .containsExactlyInAnyOrder(
@@ -650,7 +650,7 @@ public class PreAggregationITCase {
                                     (float) 1.11,
                                     1.21,
                                     LocalDate.of(2022, 1, 2),
-                                    LocalDateTime.of(2, 1, 1, 2, 0, 0)),
+                                    LocalDateTime.of(2022, 1, 1, 2, 0, 0)),
                             Row.of(
                                     1,
                                     3,
@@ -662,7 +662,7 @@ public class PreAggregationITCase {
                                     (float) 1.11,
                                     1.21,
                                     LocalDate.of(2022, 1, 2),
-                                    LocalDateTime.of(2, 1, 1, 2, 0, 0)));
+                                    LocalDateTime.of(2022, 1, 1, 2, 0, 0)));
         }
 
         @Test
diff --git 
a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java
 
b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java
index 1da7232fc..dd8ac08f2 100644
--- 
a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java
+++ 
b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java
@@ -19,6 +19,7 @@
 package org.apache.paimon.format.orc.reader;
 
 import org.apache.paimon.data.Timestamp;
+import org.apache.paimon.utils.DateTimeUtils;
 
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
@@ -40,8 +41,6 @@ public class OrcTimestampColumnVector extends 
AbstractOrcColumnVector
     @Override
     public Timestamp getTimestamp(int i, int precision) {
         int index = vector.isRepeating ? 0 : i;
-        java.sql.Timestamp timestamp = new 
java.sql.Timestamp(vector.time[index]);
-        timestamp.setNanos(vector.nanos[index]);
-        return Timestamp.fromSQLTimestamp(timestamp);
+        return DateTimeUtils.toInternal(vector.time[index], 
vector.nanos[index] % 1_000_000);
     }
 }

Reply via email to