This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 18a22217c57 branch-4.0: [improvement](timezone) Set the default value
of the timezone in streamload to global time_zone #56365 (#56635)
18a22217c57 is described below
commit 18a22217c573d9b89a747480ff075d29e4cad170
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Sep 30 08:53:12 2025 +0800
branch-4.0: [improvement](timezone) Set the default value of the timezone
in streamload to global time_zone #56365 (#56635)
Cherry-picked from #56365
Co-authored-by: Refrain <[email protected]>
---
be/src/http/action/stream_load.cpp | 2 +-
.../doris/nereids/load/NereidsStreamLoadTask.java | 4 +
.../test_global_timezone_streamload2.csv | 2 +
.../data/datatype_p0/datetimev2/test_tz_load.out | 25 +++
.../datatype_p0/datetimev2/test_tz_load.groovy | 229 +++++++++++++++++++++
5 files changed, 261 insertions(+), 1 deletion(-)
diff --git a/be/src/http/action/stream_load.cpp
b/be/src/http/action/stream_load.cpp
index 2526030fc7d..59641d4a934 100644
--- a/be/src/http/action/stream_load.cpp
+++ b/be/src/http/action/stream_load.cpp
@@ -502,7 +502,7 @@ Status StreamLoadAction::_process_put(HttpRequest* http_req,
}
request.__set_strictMode(strictMode);
}
- // timezone first. if not, try time_zone
+ // timezone first. if not, try system time_zone
if (!http_req->header(HTTP_TIMEZONE).empty()) {
request.__set_timezone(http_req->header(HTTP_TIMEZONE));
} else if (!http_req->header(HTTP_TIME_ZONE).empty()) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadTask.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadTask.java
index 6b3fa1ec3ee..a0cfa57b19e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadTask.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadTask.java
@@ -27,6 +27,7 @@ import org.apache.doris.load.loadv2.LoadTask;
import org.apache.doris.nereids.analyzer.UnboundSlot;
import org.apache.doris.nereids.trees.expressions.BinaryOperator;
import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.qe.ConnectContext;
import org.apache.doris.task.LoadTaskInfo;
import org.apache.doris.thrift.TFileCompressType;
import org.apache.doris.thrift.TFileFormatType;
@@ -431,8 +432,11 @@ public class NereidsStreamLoadTask implements
NereidsLoadTaskInfo {
if (request.isSetStrictMode()) {
strictMode = request.isStrictMode();
}
+ // global time_zone if not set
if (request.isSetTimezone()) {
timezone =
TimeUtils.checkTimeZoneValidAndStandardize(request.getTimezone());
+ } else if (ConnectContext.get() != null) {
+ timezone = ConnectContext.get().getSessionVariable().getTimeZone();
}
if (request.isSetExecMemLimit()) {
execMemLimit = request.getExecMemLimit();
diff --git
a/regression-test/data/datatype_p0/datetimev2/test_global_timezone_streamload2.csv
b/regression-test/data/datatype_p0/datetimev2/test_global_timezone_streamload2.csv
new file mode 100644
index 00000000000..5057cd60841
--- /dev/null
+++
b/regression-test/data/datatype_p0/datetimev2/test_global_timezone_streamload2.csv
@@ -0,0 +1,2 @@
+1,2024-04-11T16:00:13+08:00,2024-04-11T16:00:13+08:00
+2,2024-04-11T06:00:13+08:00,2024-04-11T06:00:13+08:00
\ No newline at end of file
diff --git a/regression-test/data/datatype_p0/datetimev2/test_tz_load.out
b/regression-test/data/datatype_p0/datetimev2/test_tz_load.out
new file mode 100644
index 00000000000..7193239cb5d
--- /dev/null
+++ b/regression-test/data/datatype_p0/datetimev2/test_tz_load.out
@@ -0,0 +1,25 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !global_offset --
+1 2024-04-11T08:00:13 2024-04-11
+1 2024-04-11T08:00:13 2024-04-11
+1 2024-04-11T08:00:13 2024-04-11
+2 2024-04-10T22:00:13 2024-04-11
+2 2024-04-10T22:00:13 2024-04-11
+2 2024-04-10T22:00:13 2024-04-11
+
+-- !global_offset --
+1 2024-04-11T16:00:13 2024-04-11
+1 2024-04-11T16:00:13 2024-04-11
+1 2024-04-11T16:00:13 2024-04-11
+2 2024-04-11T06:00:13 2024-04-11
+2 2024-04-11T06:00:13 2024-04-11
+2 2024-04-11T06:00:13 2024-04-11
+
+-- !global_offset --
+1 2024-04-11T08:00:13 2024-04-11
+1 2024-04-11T08:00:13 2024-04-11
+1 2024-04-11T08:00:13 2024-04-11
+2 2024-04-10T22:00:13 2024-04-11
+2 2024-04-10T22:00:13 2024-04-11
+2 2024-04-10T22:00:13 2024-04-11
+
diff --git a/regression-test/suites/datatype_p0/datetimev2/test_tz_load.groovy
b/regression-test/suites/datatype_p0/datetimev2/test_tz_load.groovy
new file mode 100644
index 00000000000..49ec18b6541
--- /dev/null
+++ b/regression-test/suites/datatype_p0/datetimev2/test_tz_load.groovy
@@ -0,0 +1,229 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_tz_load", "nonConcurrent") {
+ def table1 = "global_timezone_test"
+ def s3BucketName = getS3BucketName()
+ def s3Endpoint = getS3Endpoint()
+ def s3Region = getS3Region()
+ def ak = getS3AK()
+ def sk = getS3SK()
+
+ sql "drop table if exists ${table1}"
+
+ sql "SET GLOBAL time_zone = 'Asia/Shanghai'"
+
+ sql """
+ CREATE TABLE IF NOT EXISTS ${table1} (
+ `id` int NULL,
+ `dt_datetime` datetime NULL,
+ `dt_date` date NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 3
+ PROPERTIES (
+ "replication_num" = "1"
+ )
+ """
+
+ // case1 stream load set time_zone = UTC
+ // stream load set timezone = UTC
+ // broker load set timezone = UTC
+ // insert into does not support setting timezone
+ // same as case3
+ /*
+ 1 2024-04-11T08:00:13 2024-04-11
+ 1 2024-04-11T08:00:13 2024-04-11
+ 1 2024-04-11T08:00:13 2024-04-11
+ 2 2024-04-10T22:00:13 2024-04-11
+ 2 2024-04-10T22:00:13 2024-04-11
+ 2 2024-04-10T22:00:13 2024-04-11
+ */
+ streamLoad {
+ table "${table1}"
+ set 'column_separator', ','
+ set 'timezone', 'UTC'
+ file "test_global_timezone_streamload2.csv"
+ time 20000
+ }
+ streamLoad {
+ table "${table1}"
+ set 'column_separator', ','
+ set 'time_zone', 'UTC'
+ file "test_global_timezone_streamload2.csv"
+ time 20000
+ }
+ def label1 = "s3_load_default_" +
UUID.randomUUID().toString().replaceAll("-", "")
+ sql """
+ LOAD LABEL ${label1} (
+ DATA
INFILE("s3://${s3BucketName}/load/test_global_timezone_streamload.csv")
+ INTO TABLE ${table1}
+ COLUMNS TERMINATED BY ","
+ FORMAT AS "csv"
+ (id, dt_datetime, dt_date)
+ )
+ WITH S3 (
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "s3.endpoint" = "${s3Endpoint}",
+ "s3.region" = "${s3Region}"
+ )
+ PROPERTIES (
+ "timezone" = "UTC"
+ );
+ """
+ def max_try_time1 = 60000
+ while (max_try_time1 > 0) {
+ def result = sql "select * from ${table1}"
+ if (result.size() == 6) {
+ break;
+ }
+ Thread.sleep(1000)
+ max_try_time1 -= 1000
+ if (max_try_time1 <= 0) {
+ throw new Exception("Load job timeout")
+ }
+ }
+ sql "sync"
+ qt_global_offset "select * from ${table1} order by id"
+ sql "truncate table ${table1}"
+
+ // case2 stream load not set timezone
+ // broker load not set timezone
+ // insert into not set timezone
+ /*
+ 1 2024-04-11T16:00:13 2024-04-11
+ 2 2024-04-11T06:00:13 2024-04-11
+ */
+ streamLoad {
+ table "${table1}"
+ set 'column_separator', ','
+ file "test_global_timezone_streamload2.csv"
+ time 20000
+ }
+
+ def label2 = "s3_load_no_timezone_" +
UUID.randomUUID().toString().replaceAll("-", "")
+ sql """
+ LOAD LABEL ${label2} (
+ DATA
INFILE("s3://${s3BucketName}/load/test_global_timezone_streamload.csv")
+ INTO TABLE ${table1}
+ COLUMNS TERMINATED BY ","
+ FORMAT AS "csv"
+ (id, dt_datetime, dt_date)
+ )
+ WITH S3 (
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "s3.endpoint" = "${s3Endpoint}",
+ "s3.region" = "${s3Region}"
+ );
+ """
+
+ sql """
+ INSERT INTO ${table1}(id, dt_datetime, dt_date)
+ SELECT
+ CAST(split_part(c1, ',', 1) AS INT) AS id,
+ CAST(split_part(c1, ',', 2) AS DATETIME) AS dt_datetime,
+ CAST(split_part(c1, ',', 3) AS DATE) AS dt_date
+ FROM S3 (
+ "uri" =
"s3://${s3BucketName}/load/test_global_timezone_streamload.csv",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "s3.endpoint" = "${s3Endpoint}",
+ "s3.region" = "${s3Region}",
+ "format" = "csv"
+ );
+ """
+ def max_try_time2 = 60000
+ while (max_try_time2 > 0) {
+ def result = sql "select * from ${table1}"
+ if (result.size() == 6) {
+ break;
+ }
+ Thread.sleep(1000)
+ max_try_time2 -= 1000
+ if (max_try_time2 <= 0) {
+ throw new Exception("Load job timeout")
+ }
+ }
+ sql "sync"
+ qt_global_offset "select * from ${table1} order by id"
+ sql "truncate table ${table1}"
+
+ // case3 not set timezone but default is UTC
+ // same as case1
+ /*
+ 1 2024-04-11T08:00:13 2024-04-11
+ 2 2024-04-10T22:00:13 2024-04-11
+ */
+ sql "SET GLOBAL time_zone = 'UTC'"
+ streamLoad {
+ table "${table1}"
+ set 'column_separator', ','
+ file "test_global_timezone_streamload2.csv"
+ time 20000
+ }
+
+ def label3 = "s3_load_utc_" + UUID.randomUUID().toString().replaceAll("-",
"")
+ sql """
+ LOAD LABEL ${label3} (
+ DATA
INFILE("s3://${s3BucketName}/load/test_global_timezone_streamload.csv")
+ INTO TABLE ${table1}
+ COLUMNS TERMINATED BY ","
+ FORMAT AS "csv"
+ (id, dt_datetime, dt_date)
+ )
+ WITH S3 (
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "s3.endpoint" = "${s3Endpoint}",
+ "s3.region" = "${s3Region}"
+ )
+ PROPERTIES (
+ "timezone" = "UTC"
+ );
+ """
+ sql """
+ INSERT INTO ${table1}(id, dt_datetime, dt_date)
+ SELECT
+ CAST(split_part(c1, ',', 1) AS INT) AS id,
+ CAST(split_part(c1, ',', 2) AS DATETIME) AS dt_datetime,
+ CAST(split_part(c1, ',', 3) AS DATE) AS dt_date
+ FROM S3 (
+ "uri" =
"s3://${s3BucketName}/load/test_global_timezone_streamload.csv",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "s3.endpoint" = "${s3Endpoint}",
+ "s3.region" = "${s3Region}",
+ "format" = "csv"
+ );
+ """
+ def max_try_time3 = 60000
+ while (max_try_time3 > 0) {
+ def result = sql "select * from ${table1}"
+ if (result.size() == 6) {
+ break;
+ }
+ Thread.sleep(1000)
+ max_try_time3 -= 1000
+ if (max_try_time3 <= 0) {
+ throw new Exception("Load job timeout")
+ }
+ }
+ sql "sync"
+ qt_global_offset "select * from ${table1} order by id"
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]