This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 3beb92b3b00 [bugfix](iceberg)fix datetime conversion error and data
path error (#35708)
3beb92b3b00 is described below
commit 3beb92b3b001d8ce3596a6f2d286aa4e5184153f
Author: wuwenchi <[email protected]>
AuthorDate: Sat Jun 1 00:41:56 2024 +0800
[bugfix](iceberg)fix datetime conversion error and data path error (#35708)
## Proposed changes
Issue #31442
<!--Describe your changes.-->
1. The unit of the seventh parameter of `ZonedDateTime.of` is
nanosecond, so we should multiply the microsecond by 1000.
2. When writing to a non-partitioned iceberg table, the data path has an
extra slash
---
.../sink/writer/iceberg/viceberg_table_writer.cpp | 17 +++-
.../org/apache/doris/analysis/DateLiteral.java | 19 +++-
.../doris/datasource/iceberg/IcebergUtils.java | 3 +-
.../org/apache/doris/analysis/DateLiteralTest.java | 15 +++
.../iceberg/test_iceberg_filter.out | 54 +++++++++++
.../iceberg/test_iceberg_filter.groovy | 102 +++++++++++++++++++++
6 files changed, 201 insertions(+), 9 deletions(-)
diff --git a/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp
b/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp
index 12115c869aa..2703330406c 100644
--- a/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp
+++ b/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp
@@ -349,10 +349,19 @@ std::shared_ptr<VIcebergPartitionWriter>
VIcebergTableWriter::_create_partition_
}
const std::string& output_path = iceberg_table_sink.output_path;
- auto write_path = fmt::format("{}/{}", output_path, partition_path);
- auto original_write_path =
- fmt::format("{}/{}", iceberg_table_sink.original_output_path,
partition_path);
- auto target_path = fmt::format("{}/{}", output_path, partition_path);
+ std::string write_path;
+ std::string original_write_path;
+ std::string target_path;
+ if (partition_path.empty()) {
+ original_write_path = iceberg_table_sink.original_output_path;
+ target_path = output_path;
+ write_path = output_path;
+ } else {
+ original_write_path =
+ fmt::format("{}/{}", iceberg_table_sink.original_output_path,
partition_path);
+ target_path = fmt::format("{}/{}", output_path, partition_path);
+ write_path = fmt::format("{}/{}", output_path, partition_path);
+ }
VIcebergPartitionWriter::WriteInfo write_info = {
std::move(write_path), std::move(original_write_path),
std::move(target_path),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
index 2c0b5a6e885..0a2d21c426e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DateLiteral.java
@@ -967,12 +967,25 @@ public class DateLiteral extends LiteralExpr {
}
public long unixTimestamp(TimeZone timeZone) {
- ZonedDateTime zonedDateTime = ZonedDateTime.of((int) year, (int)
month, (int) day, (int) hour,
- (int) minute, (int) second, (int) microsecond,
ZoneId.of(timeZone.getID()));
- Timestamp timestamp = Timestamp.from(zonedDateTime.toInstant());
+ Timestamp timestamp = getTimestamp(timeZone);
return timestamp.getTime();
}
+ private Timestamp getTimestamp(TimeZone timeZone) {
+ ZonedDateTime zonedDateTime = ZonedDateTime.of((int) year, (int)
month, (int) day, (int) hour,
+ (int) minute, (int) second, (int) microsecond * 1000,
ZoneId.of(timeZone.getID()));
+ return Timestamp.from(zonedDateTime.toInstant());
+ }
+
+ public long getUnixTimestampWithMillisecond(TimeZone timeZone) {
+ return unixTimestamp(timeZone);
+ }
+
+ public long getUnixTimestampWithMicroseconds(TimeZone timeZone) {
+ Timestamp timestamp = getTimestamp(timeZone);
+ return timestamp.getTime() * 1000 + timestamp.getNanos() / 1000 % 1000;
+ }
+
public static boolean hasTimePart(String format) {
return format.chars().anyMatch(c -> TIME_PART_SET.contains((char) c));
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index 9c57fc8e940..ffe22b5c006 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -84,7 +84,6 @@ public class IcebergUtils {
return 0;
}
};
- static long MILLIS_TO_NANO_TIME = 1000;
// https://iceberg.apache.org/spec/#schemas-and-data-types
// All time and timestamp values are stored with microsecond precision
private static final int ICEBERG_DATETIME_SCALE_MS = 6;
@@ -320,7 +319,7 @@ public class IcebergUtils {
case DATE:
return dateLiteral.getStringValue();
case TIMESTAMP:
- return dateLiteral.unixTimestamp(TimeUtils.getTimeZone())
* MILLIS_TO_NANO_TIME;
+ return
dateLiteral.getUnixTimestampWithMicroseconds(TimeUtils.getTimeZone());
default:
return null;
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/analysis/DateLiteralTest.java
b/fe/fe-core/src/test/java/org/apache/doris/analysis/DateLiteralTest.java
index 556c8a52457..a6e864f89da 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/analysis/DateLiteralTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/DateLiteralTest.java
@@ -26,6 +26,9 @@ import org.apache.doris.common.jmockit.Deencapsulation;
import org.junit.Assert;
import org.junit.Test;
+import java.time.ZoneOffset;
+import java.util.TimeZone;
+
public class DateLiteralTest {
@Test
@@ -414,4 +417,16 @@ public class DateLiteralTest {
}
Assert.assertFalse(hasException);
}
+
+ @Test
+ public void testUnixTimestampWithMilliMicroSecond() throws
AnalysisException {
+ String s = "2020-12-13 12:13:14.123456";
+ Type type = Type.DATETIMEV2;
+ DateLiteral literal = new DateLiteral(s, type);
+ long l =
literal.getUnixTimestampWithMillisecond(TimeZone.getTimeZone(ZoneOffset.UTC));
+ Assert.assertEquals(123, l % 1000);
+
+ long l2 =
literal.getUnixTimestampWithMicroseconds(TimeZone.getTimeZone(ZoneOffset.UTC));
+ Assert.assertEquals(123456, l2 % 1000000);
+ }
}
diff --git
a/regression-test/data/external_table_p0/iceberg/test_iceberg_filter.out
b/regression-test/data/external_table_p0/iceberg/test_iceberg_filter.out
new file mode 100644
index 00000000000..bf8c13cde46
--- /dev/null
+++ b/regression-test/data/external_table_p0/iceberg/test_iceberg_filter.out
@@ -0,0 +1,54 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !qt01 --
+1 2024-05-30T20:34:56
+2 2024-05-30T20:34:56.100
+3 2024-05-30T20:34:56.120
+4 2024-05-30T20:34:56.123
+5 2024-05-30T20:34:56.123400
+6 2024-05-30T20:34:56.123450
+7 2024-05-30T20:34:56.123456
+
+-- !qt02 --
+1 2024-05-30T20:34:56
+
+-- !qt03 --
+2 2024-05-30T20:34:56.100
+
+-- !qt04 --
+2 2024-05-30T20:34:56.100
+
+-- !qt05 --
+2 2024-05-30T20:34:56.100
+
+-- !qt06 --
+4 2024-05-30T20:34:56.123
+
+-- !qt07 --
+4 2024-05-30T20:34:56.123
+
+-- !qt08 --
+5 2024-05-30T20:34:56.123400
+
+-- !qt09 --
+7 2024-05-30T20:34:56.123456
+
+-- !qt10 --
+1 2024-05-30T20:34:56
+2 2024-05-30T20:34:56.100
+
+-- !qt11 --
+4 2024-05-30T20:34:56.123
+5 2024-05-30T20:34:56.123400
+6 2024-05-30T20:34:56.123450
+7 2024-05-30T20:34:56.123456
+
+-- !qt12 --
+1 2024-05-30T20:34:56
+2 2024-05-30T20:34:56.100
+
+-- !qt13 --
+4 2024-05-30T20:34:56.123
+5 2024-05-30T20:34:56.123400
+6 2024-05-30T20:34:56.123450
+7 2024-05-30T20:34:56.123456
+
diff --git
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_filter.groovy
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_filter.groovy
new file mode 100644
index 00000000000..7eca9da94d1
--- /dev/null
+++
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_filter.groovy
@@ -0,0 +1,102 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_iceberg_filter",
"p0,external,doris,external_docker,external_docker_doris") {
+ String enabled = context.config.otherConfigs.get("enableIcebergTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ try {
+ String rest_port =
context.config.otherConfigs.get("iceberg_rest_uri_port")
+ String minio_port =
context.config.otherConfigs.get("iceberg_minio_port")
+ String externalEnvIp =
context.config.otherConfigs.get("externalEnvIp")
+ String catalog_name = "test_iceberg_filter"
+
+ sql """drop catalog if exists ${catalog_name}"""
+ sql """CREATE CATALOG ${catalog_name} PROPERTIES (
+ 'type'='iceberg',
+ 'iceberg.catalog.type'='rest',
+ 'uri' = 'http://${externalEnvIp}:${rest_port}',
+ "s3.access_key" = "admin",
+ "s3.secret_key" = "password",
+ "s3.endpoint" = "http://${externalEnvIp}:${minio_port}",
+ "s3.region" = "us-east-1"
+ );"""
+
+ sql """ switch ${catalog_name} """
+ sql """ create database if not exists ${catalog_name} """
+ sql """ use ${catalog_name} """
+
+ String tb_ts_filter = "tb_ts_filter";
+ sql """ drop table if exists ${tb_ts_filter} """
+ sql """ create table ${tb_ts_filter} (id int, ts datetime)"""
+ sql """ insert into ${tb_ts_filter} values (1, '2024-05-30
20:34:56') """
+ sql """ insert into ${tb_ts_filter} values (2, '2024-05-30
20:34:56.1') """
+ sql """ insert into ${tb_ts_filter} values (3, '2024-05-30
20:34:56.12') """
+ sql """ insert into ${tb_ts_filter} values (4, '2024-05-30
20:34:56.123') """
+ sql """ insert into ${tb_ts_filter} values (5, '2024-05-30
20:34:56.1234') """
+ sql """ insert into ${tb_ts_filter} values (6, '2024-05-30
20:34:56.12345') """
+ sql """ insert into ${tb_ts_filter} values (7, '2024-05-30
20:34:56.123456') """
+
+ qt_qt01 """ select * from ${tb_ts_filter} order by id """
+ qt_qt02 """ select * from ${tb_ts_filter} where ts = '2024-05-30
20:34:56' order by id """
+ qt_qt03 """ select * from ${tb_ts_filter} where ts = '2024-05-30
20:34:56.1' order by id """
+ qt_qt04 """ select * from ${tb_ts_filter} where ts = '2024-05-30
20:34:56.10' order by id """
+ qt_qt05 """ select * from ${tb_ts_filter} where ts = '2024-05-30
20:34:56.100' order by id """
+ qt_qt06 """ select * from ${tb_ts_filter} where ts = '2024-05-30
20:34:56.123' order by id """
+ qt_qt07 """ select * from ${tb_ts_filter} where ts = '2024-05-30
20:34:56.1230' order by id """
+ qt_qt08 """ select * from ${tb_ts_filter} where ts = '2024-05-30
20:34:56.123400' order by id """
+ qt_qt09 """ select * from ${tb_ts_filter} where ts = '2024-05-30
20:34:56.123456' order by id """
+
+ qt_qt10 """ select * from ${tb_ts_filter} where ts < '2024-05-30
20:34:56.12' order by id """
+ qt_qt11 """ select * from ${tb_ts_filter} where ts > '2024-05-30
20:34:56.12' order by id """
+ qt_qt12 """ select * from ${tb_ts_filter} where ts < '2024-05-30
20:34:56.1200' order by id """
+ qt_qt13 """ select * from ${tb_ts_filter} where ts > '2024-05-30
20:34:56.1200' order by id """
+
+ // TODO support filter
+ // explain {
+ // sql("select * from ${tb_ts_filter} where ts < '2024-05-30
20:34:56'")
+ // contains "inputSplitNum=0"
+ // }
+ // explain {
+ // sql("select * from ${tb_ts_filter} where ts < '2024-05-30
20:34:56.12'")
+ // contains "inputSplitNum=1"
+ // }
+ // explain {
+ // sql("select * from ${tb_ts_filter} where ts > '2024-05-30
20:34:56.1234'")
+ // contains "inputSplitNum=2"
+ // }
+ // explain {
+ // sql("select * from ${tb_ts_filter} where ts > '2024-05-30
20:34:56.0'")
+ // contains "inputSplitNum=1"
+ // }
+ // explain {
+ // sql("select * from ${tb_ts_filter} where ts = '2024-05-30
20:34:56.123456'")
+ // contains "inputSplitNum=1"
+ // }
+ // explain {
+ // sql("select * from ${tb_ts_filter} where ts < '2024-05-30
20:34:56.123456'")
+ // contains "inputSplitNum=5"
+ // }
+ // explain {
+ // sql("select * from ${tb_ts_filter} where ts > '2024-05-30
20:34:56.123456'")
+ // contains "inputSplitNum=0"
+ // }
+
+ } finally {
+ }
+ }
+}
+
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]