(doris) branch branch-4.0 updated: branch-4.0: [feature](jsonb) json type support group by and distinct #57679 (#58101)

yiguolei Tue, 18 Nov 2025 00:27:31 -0800

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 079996228f7 branch-4.0: [feature](jsonb) json type support group by 
and distinct #57679 (#58101)
079996228f7 is described below

commit 079996228f724ab0a584f8285d659739df977e6d
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Nov 18 16:26:41 2025 +0800

    branch-4.0: [feature](jsonb) json type support group by and distinct #57679 
(#58101)
    
    Cherry-picked from #57679
    
    Co-authored-by: Mryange <[email protected]>
---
 be/src/vec/common/hash_table/hash_key_type.h       |   3 +-
 .../json/json_group_by_and_distinct.out            |  25 +++++
 .../json/json_group_by_and_distinct.groovy         | 112 +++++++++++++++++++++
 3 files changed, 139 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/common/hash_table/hash_key_type.h 
b/be/src/vec/common/hash_table/hash_key_type.h
index fe370059742..52d264371cb 100644
--- a/be/src/vec/common/hash_table/hash_key_type.h
+++ b/be/src/vec/common/hash_table/hash_key_type.h
@@ -103,7 +103,8 @@ inline HashKeyType get_hash_key_type(const 
std::vector<vectorized::DataTypePtr>&
     auto t = remove_nullable(data_types[0]);
     // serialized cannot be used in the case of single column, because the 
join operator will have some processing of column nullable, resulting in 
incorrect serialized results.
     if (!t->have_maximum_size_of_value()) {
-        if (is_string_type(t->get_primitive_type()) || t->get_primitive_type() 
== TYPE_ARRAY) {
+        if (is_string_type(t->get_primitive_type()) || t->get_primitive_type() 
== TYPE_ARRAY ||
+            t->get_primitive_type() == TYPE_JSONB) {
             return HashKeyType::string_key;
         }
         throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid type, 
type={}", t->get_name());
diff --git 
a/regression-test/data/datatype_p0/json/json_group_by_and_distinct.out 
b/regression-test/data/datatype_p0/json/json_group_by_and_distinct.out
new file mode 100644
index 00000000000..e5f6b0a29f1
--- /dev/null
+++ b/regression-test/data/datatype_p0/json/json_group_by_and_distinct.out
@@ -0,0 +1,25 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !order --
+{"a":2,"b":2}  3
+{"a":1,"b":2}  2
+{"a":1,"b":3}  1
+
+-- !order --
+{"a":1,"b":2}
+{"a":1,"b":3}
+{"a":2,"b":2}
+
+-- !order --
+{"a":1,"b":2}
+{"b":2,"a":1}
+
+-- !order --
+{"a":1,"b":2}  2
+
+-- !order --
+123
+123
+
+-- !order --
+123    2
+
diff --git 
a/regression-test/suites/datatype_p0/json/json_group_by_and_distinct.groovy 
b/regression-test/suites/datatype_p0/json/json_group_by_and_distinct.groovy
new file mode 100644
index 00000000000..8ba0196e6a7
--- /dev/null
+++ b/regression-test/suites/datatype_p0/json/json_group_by_and_distinct.groovy
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_json_group_by_and_distinct", "p0") {
+    
+
+    sql """
+        drop table if exists test_jsonb_groupby;
+    """
+    sql """
+    CREATE TABLE IF NOT EXISTS test_jsonb_groupby (
+              `id` INT ,
+              `j` jsonb
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`id`)
+            DISTRIBUTED BY HASH(`id`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "storage_format" = "V2"
+    );
+    """
+
+
+    sql """
+        insert into test_jsonb_groupby values (1, '{"a":1, "b":2}'), (2, 
'{"a":1, "b":3}'), (3, '{"a":2, "b":2}') , (4, '{"a":2, "b":2}') , (5, '{"a":1, 
"b":2}') , (6, '{"a":2, "b":2}') ;
+    """
+
+    qt_order"""
+        select j, count(*) as cnt from test_jsonb_groupby group by j order by 
cnt desc, cast(j as string);
+    """
+
+    qt_order"""
+        select distinct j from test_jsonb_groupby order by cast(j as string);
+    """
+
+
+    sql """
+        drop table if exists test_jsonb_obj;
+    """
+
+    sql """
+         CREATE TABLE IF NOT EXISTS test_jsonb_obj (
+              `id` INT ,
+              `j` jsonb
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`id`)
+            DISTRIBUTED BY HASH(`id`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "storage_format" = "V2"
+        );
+    """
+
+    sql """
+        insert into test_jsonb_obj values (1,'{"a":1, "b":2}'), (2,'{"b":2, 
"a":1}');
+    """
+
+    qt_order"""
+        select j from test_jsonb_obj group by j order by cast(j as string);
+    """
+
+    qt_order"""
+        select SORT_JSON_OBJECT_KEYS(j), count(*) from test_jsonb_obj group by 
SORT_JSON_OBJECT_KEYS(j) order by cast(SORT_JSON_OBJECT_KEYS(j) as string);
+    """
+
+
+
+
+    sql """
+        drop table if exists test_jsonb_number;
+    """
+
+    sql """
+         CREATE TABLE IF NOT EXISTS test_jsonb_number (
+              `id` INT ,
+              `j` jsonb
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`id`)
+            DISTRIBUTED BY HASH(`id`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "storage_format" = "V2"
+        );
+    """
+
+
+    sql """
+        insert into test_jsonb_number values (1,to_json( cast(123 as 
bigint))), (2,to_json(cast(123 as tinyint)));
+    """
+
+    qt_order"""
+        select j from test_jsonb_number group by j order by cast(j as string);
+    """
+
+    qt_order"""
+        select NORMALIZE_JSON_NUMBERS_TO_DOUBLE(j), count(*) from 
test_jsonb_number group by NORMALIZE_JSON_NUMBERS_TO_DOUBLE(j) order by 
cast(NORMALIZE_JSON_NUMBERS_TO_DOUBLE(j) as string);
+    """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch branch-4.0 updated: branch-4.0: [feature](jsonb) json type support group by and distinct #57679 (#58101)

Reply via email to