This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 079996228f7 branch-4.0: [feature](jsonb) json type support group by
and distinct #57679 (#58101)
079996228f7 is described below
commit 079996228f724ab0a584f8285d659739df977e6d
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Nov 18 16:26:41 2025 +0800
branch-4.0: [feature](jsonb) json type support group by and distinct #57679
(#58101)
Cherry-picked from #57679
Co-authored-by: Mryange <[email protected]>
---
be/src/vec/common/hash_table/hash_key_type.h | 3 +-
.../json/json_group_by_and_distinct.out | 25 +++++
.../json/json_group_by_and_distinct.groovy | 112 +++++++++++++++++++++
3 files changed, 139 insertions(+), 1 deletion(-)
diff --git a/be/src/vec/common/hash_table/hash_key_type.h
b/be/src/vec/common/hash_table/hash_key_type.h
index fe370059742..52d264371cb 100644
--- a/be/src/vec/common/hash_table/hash_key_type.h
+++ b/be/src/vec/common/hash_table/hash_key_type.h
@@ -103,7 +103,8 @@ inline HashKeyType get_hash_key_type(const
std::vector<vectorized::DataTypePtr>&
auto t = remove_nullable(data_types[0]);
// serialized cannot be used in the case of single column, because the
join operator will have some processing of column nullable, resulting in
incorrect serialized results.
if (!t->have_maximum_size_of_value()) {
- if (is_string_type(t->get_primitive_type()) || t->get_primitive_type()
== TYPE_ARRAY) {
+ if (is_string_type(t->get_primitive_type()) || t->get_primitive_type()
== TYPE_ARRAY ||
+ t->get_primitive_type() == TYPE_JSONB) {
return HashKeyType::string_key;
}
throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid type,
type={}", t->get_name());
diff --git
a/regression-test/data/datatype_p0/json/json_group_by_and_distinct.out
b/regression-test/data/datatype_p0/json/json_group_by_and_distinct.out
new file mode 100644
index 00000000000..e5f6b0a29f1
--- /dev/null
+++ b/regression-test/data/datatype_p0/json/json_group_by_and_distinct.out
@@ -0,0 +1,25 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !order --
+{"a":2,"b":2} 3
+{"a":1,"b":2} 2
+{"a":1,"b":3} 1
+
+-- !order --
+{"a":1,"b":2}
+{"a":1,"b":3}
+{"a":2,"b":2}
+
+-- !order --
+{"a":1,"b":2}
+{"b":2,"a":1}
+
+-- !order --
+{"a":1,"b":2} 2
+
+-- !order --
+123
+123
+
+-- !order --
+123 2
+
diff --git
a/regression-test/suites/datatype_p0/json/json_group_by_and_distinct.groovy
b/regression-test/suites/datatype_p0/json/json_group_by_and_distinct.groovy
new file mode 100644
index 00000000000..8ba0196e6a7
--- /dev/null
+++ b/regression-test/suites/datatype_p0/json/json_group_by_and_distinct.groovy
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_json_group_by_and_distinct", "p0") {
+
+
+ sql """
+ drop table if exists test_jsonb_groupby;
+ """
+ sql """
+ CREATE TABLE IF NOT EXISTS test_jsonb_groupby (
+ `id` INT ,
+ `j` jsonb
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "storage_format" = "V2"
+ );
+ """
+
+
+ sql """
+ insert into test_jsonb_groupby values (1, '{"a":1, "b":2}'), (2,
'{"a":1, "b":3}'), (3, '{"a":2, "b":2}') , (4, '{"a":2, "b":2}') , (5, '{"a":1,
"b":2}') , (6, '{"a":2, "b":2}') ;
+ """
+
+ qt_order"""
+ select j, count(*) as cnt from test_jsonb_groupby group by j order by
cnt desc, cast(j as string);
+ """
+
+ qt_order"""
+ select distinct j from test_jsonb_groupby order by cast(j as string);
+ """
+
+
+ sql """
+ drop table if exists test_jsonb_obj;
+ """
+
+ sql """
+ CREATE TABLE IF NOT EXISTS test_jsonb_obj (
+ `id` INT ,
+ `j` jsonb
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "storage_format" = "V2"
+ );
+ """
+
+ sql """
+ insert into test_jsonb_obj values (1,'{"a":1, "b":2}'), (2,'{"b":2,
"a":1}');
+ """
+
+ qt_order"""
+ select j from test_jsonb_obj group by j order by cast(j as string);
+ """
+
+ qt_order"""
+ select SORT_JSON_OBJECT_KEYS(j), count(*) from test_jsonb_obj group by
SORT_JSON_OBJECT_KEYS(j) order by cast(SORT_JSON_OBJECT_KEYS(j) as string);
+ """
+
+
+
+
+ sql """
+ drop table if exists test_jsonb_number;
+ """
+
+ sql """
+ CREATE TABLE IF NOT EXISTS test_jsonb_number (
+ `id` INT ,
+ `j` jsonb
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "storage_format" = "V2"
+ );
+ """
+
+
+ sql """
+ insert into test_jsonb_number values (1,to_json( cast(123 as
bigint))), (2,to_json(cast(123 as tinyint)));
+ """
+
+ qt_order"""
+ select j from test_jsonb_number group by j order by cast(j as string);
+ """
+
+ qt_order"""
+ select NORMALIZE_JSON_NUMBERS_TO_DOUBLE(j), count(*) from
test_jsonb_number group by NORMALIZE_JSON_NUMBERS_TO_DOUBLE(j) order by
cast(NORMALIZE_JSON_NUMBERS_TO_DOUBLE(j) as string);
+ """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]