This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 88d531157 [orc] Optimize ORC timestamp type read (#2333)
88d531157 is described below
commit 88d5311573d2d3aa5894f327a49f3d33ff260b51
Author: Jingsong Lee <[email protected]>
AuthorDate: Thu Nov 16 19:51:24 2023 -0800
[orc] Optimize ORC timestamp type read (#2333)
---
.../org/apache/paimon/utils/DateTimeUtils.java | 4 ++
.../org/apache/paimon/utils/DateTimeUtilsTest.java | 16 ++++++
.../apache/paimon/flink/PreAggregationITCase.java | 64 +++++++++++-----------
.../orc/reader/OrcTimestampColumnVector.java | 5 +-
4 files changed, 54 insertions(+), 35 deletions(-)
diff --git
a/paimon-common/src/main/java/org/apache/paimon/utils/DateTimeUtils.java
b/paimon-common/src/main/java/org/apache/paimon/utils/DateTimeUtils.java
index a088129af..ad48b2d14 100644
--- a/paimon-common/src/main/java/org/apache/paimon/utils/DateTimeUtils.java
+++ b/paimon-common/src/main/java/org/apache/paimon/utils/DateTimeUtils.java
@@ -115,6 +115,10 @@ public class DateTimeUtils {
return (int) (ts % MILLIS_PER_DAY);
}
+ public static Timestamp toInternal(long millis, int nanos) {
+ return Timestamp.fromEpochMillis(millis + LOCAL_TZ.getOffset(millis),
nanos);
+ }
+
public static int toInternal(LocalDate date) {
return ymdToUnixDate(date.getYear(), date.getMonthValue(),
date.getDayOfMonth());
}
diff --git
a/paimon-common/src/test/java/org/apache/paimon/utils/DateTimeUtilsTest.java
b/paimon-common/src/test/java/org/apache/paimon/utils/DateTimeUtilsTest.java
index e5c8facdc..6c1376d29 100644
--- a/paimon-common/src/test/java/org/apache/paimon/utils/DateTimeUtilsTest.java
+++ b/paimon-common/src/test/java/org/apache/paimon/utils/DateTimeUtilsTest.java
@@ -18,6 +18,8 @@
package org.apache.paimon.utils;
+import org.apache.paimon.data.Timestamp;
+
import org.junit.jupiter.api.Test;
import java.time.LocalDateTime;
@@ -42,4 +44,18 @@ public class DateTimeUtilsTest {
.isEqualTo(expectations[precision]);
}
}
+
+ @Test
+ public void testTimestamp() {
+ int nanos = 100;
+ java.sql.Timestamp timestamp = new
java.sql.Timestamp(System.currentTimeMillis());
+ for (int i = 0; i < 2000; i++) {
+ timestamp = new java.sql.Timestamp(timestamp.getTime() + 60 *
1000);
+ timestamp.setNanos(nanos + timestamp.getNanos());
+
+ Timestamp t1 = Timestamp.fromSQLTimestamp(timestamp);
+ Timestamp t2 = DateTimeUtils.toInternal(timestamp.getTime(),
nanos);
+ assertThat(t1).isEqualTo(t2);
+ }
+ }
}
diff --git
a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/PreAggregationITCase.java
b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/PreAggregationITCase.java
index 9c10c345a..8eb0e68fe 100644
---
a/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/PreAggregationITCase.java
+++
b/paimon-flink/paimon-flink-common/src/test/java/org/apache/paimon/flink/PreAggregationITCase.java
@@ -384,13 +384,13 @@ public class PreAggregationITCase {
"INSERT INTO T3 VALUES "
+ "(1, 2, CAST(NULL AS INT), 1.01, CAST(-1 AS
TINYINT), CAST(-1 AS SMALLINT), "
+ "CAST(1000 AS BIGINT), 1.11, CAST(1.11 AS
DOUBLE), CAST('2020-01-01' AS DATE), "
- + "CAST('0001-01-01 01:01:01' AS TIMESTAMP)),"
+ + "CAST('2021-01-01 01:01:01' AS TIMESTAMP)),"
+ "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS
SMALLINT), "
+ "CAST(100000 AS BIGINT), -1.11, CAST(1.21 AS
DOUBLE), CAST('2020-01-02' AS DATE), "
- + "CAST('0002-01-01 01:01:01' AS TIMESTAMP)), "
+ + "CAST('2022-01-01 01:01:01' AS TIMESTAMP)), "
+ "(1, 2, 3, 10.00, CAST(1 AS TINYINT), CAST(1 AS
SMALLINT), "
+ "CAST(10000000 AS BIGINT), 0, CAST(-1.11 AS
DOUBLE), CAST('2022-01-02' AS DATE), "
- + "CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+ + "CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
List<Row> result = batchSql("SELECT * FROM T3");
assertThat(result)
.containsExactlyInAnyOrder(
@@ -405,7 +405,7 @@ public class PreAggregationITCase {
(float) -1.11,
-1.11,
LocalDate.of(2020, 1, 1),
- LocalDateTime.of(1, 1, 1, 1, 1, 1)));
+ LocalDateTime.of(2021, 1, 1, 1, 1, 1)));
}
@Test
@@ -413,15 +413,15 @@ public class PreAggregationITCase {
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 2, CAST(NULL AS INT), 1.01, CAST(1 AS
TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
- + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01'
AS DATE), CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+ + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01'
AS DATE), CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS
SMALLINT), CAST(100000 AS BIGINT), "
- + "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02'
AS DATE), CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+ + "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02'
AS DATE), CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 2, 3, 10.00, CAST(-1 AS TINYINT), CAST(1 AS
SMALLINT), CAST(10000000 AS BIGINT), "
- + "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS
DATE), CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+ + "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS
DATE), CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
List<Row> result = batchSql("SELECT * FROM T3");
assertThat(result)
@@ -437,7 +437,7 @@ public class PreAggregationITCase {
(float) -1.11,
-1.11,
LocalDate.of(2020, 1, 1),
- LocalDateTime.of(1, 1, 1, 1, 1, 1)));
+ LocalDateTime.of(2021, 1, 1, 1, 1, 1)));
}
@Test
@@ -449,29 +449,29 @@ public class PreAggregationITCase {
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 2, CAST(NULL AS INT), 1.01, CAST(1 AS
TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
- + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01'
AS DATE), CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+ + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01'
AS DATE), CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS
SMALLINT), CAST(100000 AS BIGINT), "
- + "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02'
AS DATE), CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+ + "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02'
AS DATE), CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 2, 3, 10.00, CAST(-1 AS TINYINT), CAST(1 AS
SMALLINT), CAST(10000000 AS BIGINT), "
- + "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS
DATE), CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+ + "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS
DATE), CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
// key 1 3
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 3, CAST(NULL AS INT), 1.01, CAST(1 AS
TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
- + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01'
AS DATE), CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+ + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01'
AS DATE), CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 3, 6, 1.10, CAST(2 AS TINYINT), CAST(2 AS
SMALLINT), CAST(100000 AS BIGINT), "
- + "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02'
AS DATE), CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+ + "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02'
AS DATE), CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T3 VALUES "
+ "(1, 3, 3, 10.00, CAST(-1 AS TINYINT), CAST(1 AS
SMALLINT), CAST(10000000 AS BIGINT), "
- + "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS
DATE), CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+ + "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS
DATE), CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
assertThat(batchSql("SELECT * FROM T3"))
.containsExactlyInAnyOrder(
@@ -486,7 +486,7 @@ public class PreAggregationITCase {
(float) -1.11,
-1.11,
LocalDate.of(2020, 1, 1),
- LocalDateTime.of(1, 1, 1, 1, 1, 1)),
+ LocalDateTime.of(2021, 1, 1, 1, 1, 1)),
Row.of(
1,
3,
@@ -498,7 +498,7 @@ public class PreAggregationITCase {
(float) -1.11,
-1.11,
LocalDate.of(2020, 1, 1),
- LocalDateTime.of(1, 1, 1, 1, 1, 1)));
+ LocalDateTime.of(2021, 1, 1, 1, 1, 1)));
}
@Test
@@ -545,13 +545,13 @@ public class PreAggregationITCase {
"INSERT INTO T2 VALUES "
+ "(1, 2, CAST(NULL AS INT), 1.01, CAST(1 AS
TINYINT), CAST(-1 AS SMALLINT), "
+ "CAST(1000 AS BIGINT), 1.11, CAST(1.11 AS
DOUBLE), CAST('2020-01-01' AS DATE), "
- + "CAST('0001-01-01 01:01:01' AS TIMESTAMP)),"
+ + "CAST('2021-01-01 01:01:01' AS TIMESTAMP)),"
+ "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS
SMALLINT), CAST(100000 AS BIGINT), "
+ "-1.11, CAST(1.21 AS DOUBLE), CAST('2020-01-02'
AS DATE), "
- + "CAST('0002-01-01 01:01:01' AS TIMESTAMP)), "
+ + "CAST('2022-01-01 01:01:01' AS TIMESTAMP)), "
+ "(1, 2, 3, 10.00, CAST(1 AS TINYINT), CAST(1 AS
SMALLINT), CAST(10000000 AS BIGINT), "
+ "0, CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS
DATE), "
- + "CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+ + "CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
List<Row> result = batchSql("SELECT * FROM T2");
assertThat(result)
.containsExactlyInAnyOrder(
@@ -566,7 +566,7 @@ public class PreAggregationITCase {
(float) 1.11,
1.21,
LocalDate.of(2022, 1, 2),
- LocalDateTime.of(2, 1, 1, 2, 0, 0)));
+ LocalDateTime.of(2022, 1, 1, 2, 0, 0)));
}
@Test
@@ -575,17 +575,17 @@ public class PreAggregationITCase {
"INSERT INTO T2 VALUES "
+ "(1, 2, CAST(NULL AS INT), 1.01, CAST(1 AS
TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
+ "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01'
AS DATE), "
- + "CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+ + "CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T2 VALUES "
+ "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS
SMALLINT), CAST(100000 AS BIGINT), -1.11, "
+ "CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS
DATE), "
- + "CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+ + "CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T2 VALUES "
+ "(1, 2, 3, 10.00, CAST(1 AS TINYINT), CAST(1 AS
SMALLINT), CAST(10000000 AS BIGINT), 0, "
+ "CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS
DATE), "
- + "CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+ + "CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
List<Row> result = batchSql("SELECT * FROM T2");
assertThat(result)
@@ -601,7 +601,7 @@ public class PreAggregationITCase {
(float) 1.11,
1.21,
LocalDate.of(2022, 1, 2),
- LocalDateTime.of(2, 1, 1, 2, 0, 0)));
+ LocalDateTime.of(2022, 1, 1, 2, 0, 0)));
}
@Test
@@ -613,29 +613,29 @@ public class PreAggregationITCase {
batchSql(
"INSERT INTO T2 VALUES "
+ "(1, 2, CAST(NULL AS INT), 1.01, CAST(1 AS
TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
- + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01'
AS DATE), CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+ + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01'
AS DATE), CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T2 VALUES "
+ "(1, 2, 2, 1.10, CAST(2 AS TINYINT), CAST(2 AS
SMALLINT), CAST(100000 AS BIGINT), -1.11, "
- + "CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS
DATE), CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+ + "CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS
DATE), CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T2 VALUES "
+ "(1, 2, 3, 10.00, CAST(1 AS TINYINT), CAST(1 AS
SMALLINT), CAST(10000000 AS BIGINT), 0, "
- + "CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS
DATE), CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+ + "CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS
DATE), CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
// key 1 3
batchSql(
"INSERT INTO T2 VALUES "
+ "(1, 3, CAST(NULL AS INT), 1.01, CAST(1 AS
TINYINT), CAST(-1 AS SMALLINT), CAST(1000 AS BIGINT), "
- + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01'
AS DATE), CAST('0001-01-01 01:01:01' AS TIMESTAMP))");
+ + "1.11, CAST(1.11 AS DOUBLE), CAST('2020-01-01'
AS DATE), CAST('2021-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T2 VALUES "
+ "(1, 3, 6, 1.10, CAST(2 AS TINYINT), CAST(2 AS
SMALLINT), CAST(100000 AS BIGINT), -1.11, "
- + "CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS
DATE), CAST('0002-01-01 01:01:01' AS TIMESTAMP))");
+ + "CAST(1.21 AS DOUBLE), CAST('2020-01-02' AS
DATE), CAST('2022-01-01 01:01:01' AS TIMESTAMP))");
batchSql(
"INSERT INTO T2 VALUES "
+ "(1, 3, 3, 10.00, CAST(1 AS TINYINT), CAST(1 AS
SMALLINT), CAST(10000000 AS BIGINT), 0, "
- + "CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS
DATE), CAST('0002-01-01 02:00:00' AS TIMESTAMP))");
+ + "CAST(-1.11 AS DOUBLE), CAST('2022-01-02' AS
DATE), CAST('2022-01-01 02:00:00' AS TIMESTAMP))");
assertThat(batchSql("SELECT * FROM T2"))
.containsExactlyInAnyOrder(
@@ -650,7 +650,7 @@ public class PreAggregationITCase {
(float) 1.11,
1.21,
LocalDate.of(2022, 1, 2),
- LocalDateTime.of(2, 1, 1, 2, 0, 0)),
+ LocalDateTime.of(2022, 1, 1, 2, 0, 0)),
Row.of(
1,
3,
@@ -662,7 +662,7 @@ public class PreAggregationITCase {
(float) 1.11,
1.21,
LocalDate.of(2022, 1, 2),
- LocalDateTime.of(2, 1, 1, 2, 0, 0)));
+ LocalDateTime.of(2022, 1, 1, 2, 0, 0)));
}
@Test
diff --git
a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java
b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java
index 1da7232fc..dd8ac08f2 100644
---
a/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java
+++
b/paimon-format/src/main/java/org/apache/paimon/format/orc/reader/OrcTimestampColumnVector.java
@@ -19,6 +19,7 @@
package org.apache.paimon.format.orc.reader;
import org.apache.paimon.data.Timestamp;
+import org.apache.paimon.utils.DateTimeUtils;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
@@ -40,8 +41,6 @@ public class OrcTimestampColumnVector extends
AbstractOrcColumnVector
@Override
public Timestamp getTimestamp(int i, int precision) {
int index = vector.isRepeating ? 0 : i;
- java.sql.Timestamp timestamp = new
java.sql.Timestamp(vector.time[index]);
- timestamp.setNanos(vector.nanos[index]);
- return Timestamp.fromSQLTimestamp(timestamp);
+ return DateTimeUtils.toInternal(vector.time[index],
vector.nanos[index] % 1_000_000);
}
}