This is an automated email from the ASF dual-hosted git repository.

zclllyybb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 87316004891 [fix](be) Fix timestamptz group_array state serde (#63827)
87316004891 is described below

commit 87316004891ab8d32f107b353c48bc2b65625425
Author: Mryange <[email protected]>
AuthorDate: Fri May 29 10:08:17 2026 +0800

    [fix](be) Fix timestamptz group_array state serde (#63827)
    
    Fix collect_list/group_array on nested TIMESTAMPTZ values when complex
    aggregate state is serialized through JSON. This keeps the existing
    state format for compatibility, provides a UTC timezone during serde,
    and adds regression coverage for the nested group_array case.
---
 .../exprs/aggregate/aggregate_function_collect.h   |  7 ++++
 .../timestamptz/test_timestamptz_agg_functions.out |  3 ++
 .../test_timestamptz_agg_functions.groovy          | 37 ++++++++++++++++++++++
 3 files changed, 47 insertions(+)

diff --git a/be/src/exprs/aggregate/aggregate_function_collect.h 
b/be/src/exprs/aggregate/aggregate_function_collect.h
index 7eef3cff85e..ef8bb0437f7 100644
--- a/be/src/exprs/aggregate/aggregate_function_collect.h
+++ b/be/src/exprs/aggregate/aggregate_function_collect.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <cctz/time_zone.h>
 #include <glog/logging.h>
 
 #include <cstddef>
@@ -342,6 +343,10 @@ struct AggregateFunctionCollectListData<T, HasLimit> {
         buf.write_binary(size);
 
         DataTypeSerDe::FormatOptions opt;
+        auto timezone = cctz::utc_time_zone();
+        opt.timezone = &timezone;
+        // TODO: Refactor this aggregate state serialization to avoid
+        // round-tripping through a human-readable string format.
         auto tmp_str = ColumnString::create();
         VectorBufferWriter tmp_buf(*tmp_str.get());
 
@@ -367,6 +372,8 @@ struct AggregateFunctionCollectListData<T, HasLimit> {
 
         StringRef s;
         DataTypeSerDe::FormatOptions opt;
+        auto timezone = cctz::utc_time_zone();
+        opt.timezone = &timezone;
         for (size_t i = 0; i < size; i++) {
             buf.read_binary(s);
             Slice slice(s.data, s.size);
diff --git 
a/regression-test/data/datatype_p0/timestamptz/test_timestamptz_agg_functions.out
 
b/regression-test/data/datatype_p0/timestamptz/test_timestamptz_agg_functions.out
index 850cbe14a98..f7ff2eb36d0 100644
--- 
a/regression-test/data/datatype_p0/timestamptz/test_timestamptz_agg_functions.out
+++ 
b/regression-test/data/datatype_p0/timestamptz/test_timestamptz_agg_functions.out
@@ -11,3 +11,6 @@ true
 -- !group_array_union --
 3
 
+-- !group_array_nested_timestamptz --
+[["2024-01-01 00:00:00.000000+00:00", "2024-01-01 00:00:00.000000+00:00", 
"2024-01-02 00:00:00.000000+00:00"], ["2024-01-01 00:00:00.000000+00:00", 
"2024-01-02 00:00:00.000000+00:00", "2024-01-03 00:00:00.000000+00:00"]]
+
diff --git 
a/regression-test/suites/datatype_p0/timestamptz/test_timestamptz_agg_functions.groovy
 
b/regression-test/suites/datatype_p0/timestamptz/test_timestamptz_agg_functions.groovy
index 89126b5a284..e5bf945225e 100644
--- 
a/regression-test/suites/datatype_p0/timestamptz/test_timestamptz_agg_functions.groovy
+++ 
b/regression-test/suites/datatype_p0/timestamptz/test_timestamptz_agg_functions.groovy
@@ -56,4 +56,41 @@ suite("test_timestamptz_agg_functions", "datatype_p0") {
     qt_group_array_union "SELECT size(group_array_union(arr)) FROM test_tz_agg"
 
     sql "DROP TABLE IF EXISTS test_tz_agg"
+
+    sql "DROP TABLE IF EXISTS tz_group_array_crash"
+    sql """
+        CREATE TABLE tz_group_array_crash (
+            grp INT,
+            arr ARRAY<TIMESTAMPTZ(6)>
+        )
+        DUPLICATE KEY(grp)
+        DISTRIBUTED BY HASH(grp) BUCKETS 1
+        PROPERTIES('replication_num' = '1')
+    """
+
+    sql """
+        INSERT INTO tz_group_array_crash VALUES
+        (
+            1,
+            ARRAY(
+                CAST('2024-01-01 00:00:00 +00:00' AS TIMESTAMPTZ(6)),
+                CAST('2024-01-01 08:00:00 +08:00' AS TIMESTAMPTZ(6)),
+                CAST('2024-01-02 00:00:00 +00:00' AS TIMESTAMPTZ(6))
+            )
+        ),
+        (
+            1,
+            ARRAY(
+                CAST('2024-01-01 00:00:00 +00:00' AS TIMESTAMPTZ(6)),
+                CAST('2024-01-02 08:00:00 +08:00' AS TIMESTAMPTZ(6)),
+                CAST('2024-01-03 00:00:00 +00:00' AS TIMESTAMPTZ(6))
+            )
+        )
+    """
+
+    qt_group_array_nested_timestamptz """
+        SELECT CAST(array_sort(group_array(arr)) AS STRING)
+        FROM tz_group_array_crash
+        GROUP BY grp
+    """
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to