This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch m/doris-25672
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 92520ac7975650c0e251d7395ecde45f809478ad
Author: lihangyu <[email protected]>
AuthorDate: Thu May 21 15:53:38 2026 +0800

    [fix](fe) Reject COUNT DISTINCT on variant arguments
    
    ### What problem does this PR solve?
    
    Issue Number: close #25672
    
    Related PR: None
    
    Problem Summary: COUNT(DISTINCT variant_subcolumn) could reach BE hash key 
selection and fail with a vague INTERNAL_ERROR when the argument was VARIANT. 
Reject VARIANT arguments during FE aggregate analysis and keep a clearer BE 
fallback for uncaught hash-key paths.
    
    ### Release note
    
    COUNT(DISTINCT ...) on VARIANT arguments now reports a clear 
unsupported-type error instead of a BE internal error. Cast VARIANT expressions 
to STRING or another supported scalar type before using COUNT DISTINCT.
    
    ### Check List (For Author)
    
    - Test: Unit Test
        - `./run-fe-ut.sh --run 
org.apache.doris.nereids.trees.expressions.functions.agg.CountTest`
        - Regression test added but not run because the new worktree does not 
have a built output cluster.
    - Behavior changed: Yes (COUNT DISTINCT on VARIANT now fails during 
analysis with a clearer error instead of a BE INTERNAL_ERROR)
    - Does this need documentation: No
---
 be/src/exec/common/hash_table/hash_key_type.h      |  8 +++-
 .../trees/expressions/functions/agg/Count.java     | 17 ++++++--
 .../functions/agg/MultiDistinctCount.java          |  6 +++
 .../trees/expressions/functions/agg/CountTest.java | 45 ++++++++++++++++++++++
 .../variant_p0/test_variant_count_distinct.groovy  | 41 ++++++++++++++++++++
 5 files changed, 113 insertions(+), 4 deletions(-)

diff --git a/be/src/exec/common/hash_table/hash_key_type.h 
b/be/src/exec/common/hash_table/hash_key_type.h
index 313df63f3d3..58bd6574986 100644
--- a/be/src/exec/common/hash_table/hash_key_type.h
+++ b/be/src/exec/common/hash_table/hash_key_type.h
@@ -118,6 +118,12 @@ inline HashKeyType get_hash_key_type(const 
std::vector<DataTypePtr>& data_types)
             t->get_primitive_type() == TYPE_JSONB) {
             return HashKeyType::string_key;
         }
+        if (t->get_primitive_type() == TYPE_VARIANT) {
+            throw Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
+                            "VARIANT type is not supported as a hash key. Cast 
the VARIANT "
+                            "expression to STRING or another supported scalar 
type before using "
+                            "it in DISTINCT, GROUP BY, JOIN, or other hash 
operations.");
+        }
         throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid type, 
type={}", t->get_name());
     }
 
@@ -140,4 +146,4 @@ inline HashKeyType get_hash_key_type(const 
std::vector<DataTypePtr>& data_types)
     }
 }
 
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java
index 8f486bfc2ef..0ecf4350485 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java
@@ -92,13 +92,24 @@ public class Count extends NotNullableAggregateFunction
     public void checkLegalityAfterRewrite() {
         // after rewrite, count(distinct bitmap_column) should be rewritten to 
bitmap_union_count(bitmap_column)
         for (Expression argument : getArguments()) {
-            if (distinct && (argument.getDataType().isComplexType()
-                    || argument.getDataType().isObjectType() || 
argument.getDataType().isJsonType())) {
-                throw new AnalysisException("COUNT DISTINCT could not process 
type " + this.toSql());
+            if (distinct) {
+                checkDistinctArgument(argument, this.toSql());
             }
         }
     }
 
+    static void checkDistinctArgument(Expression argument, String functionSql) 
{
+        DataType argumentType = argument.getDataType();
+        if (argumentType.isVariantType()) {
+            throw new AnalysisException("COUNT DISTINCT does not support 
VARIANT argument in " + functionSql
+                    + ". Cast the VARIANT expression to STRING or another 
supported scalar type before using "
+                    + "COUNT DISTINCT.");
+        }
+        if (argumentType.isComplexType() || argumentType.isObjectType() || 
argumentType.isJsonType()) {
+            throw new AnalysisException("COUNT DISTINCT could not process type 
" + functionSql);
+        }
+    }
+
     public boolean isStar() {
         return isStar;
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MultiDistinctCount.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MultiDistinctCount.java
index 9e71a3eb647..3bf6a5d24f2 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MultiDistinctCount.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MultiDistinctCount.java
@@ -57,6 +57,9 @@ public class MultiDistinctCount extends 
NotNullableAggregateFunction
         if (super.children().size() > 1) {
             throw new AnalysisException("MultiDistinctCount's children size 
must be 1");
         }
+        for (Expression argument : super.children()) {
+            Count.checkDistinctArgument(argument, "COUNT DISTINCT " + 
argument.toSql());
+        }
     }
 
     /** constructor for withChildren and reuse signature */
@@ -67,6 +70,9 @@ public class MultiDistinctCount extends 
NotNullableAggregateFunction
     @Override
     public MultiDistinctCount withDistinctAndChildren(boolean distinct, 
List<Expression> children) {
         Preconditions.checkArgument(children.size() == 1, 
"MultiDistinctCount's children size must be 1");
+        for (Expression argument : children) {
+            Count.checkDistinctArgument(argument, "COUNT DISTINCT " + 
argument.toSql());
+        }
         return new MultiDistinctCount(getFunctionParams(false, children));
     }
 
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountTest.java
new file mode 100644
index 00000000000..7ee6ce90eb6
--- /dev/null
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountTest.java
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.agg;
+
+import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.types.VariantType;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+class CountTest {
+    @Test
+    void testCountDistinctRejectsVariant() {
+        Count count = new Count(true, SlotReference.of("v", 
VariantType.INSTANCE));
+
+        AnalysisException exception = 
Assertions.assertThrows(AnalysisException.class,
+                count::checkLegalityAfterRewrite);
+        Assertions.assertTrue(exception.getMessage().contains("COUNT DISTINCT 
does not support VARIANT argument"));
+        Assertions.assertTrue(exception.getMessage().contains("Cast the 
VARIANT expression"));
+    }
+
+    @Test
+    void testMultiDistinctCountRejectsVariant() {
+        AnalysisException exception = 
Assertions.assertThrows(AnalysisException.class,
+                () -> new MultiDistinctCount(SlotReference.of("v", 
VariantType.INSTANCE)));
+        Assertions.assertTrue(exception.getMessage().contains("COUNT DISTINCT 
does not support VARIANT argument"));
+        Assertions.assertTrue(exception.getMessage().contains("Cast the 
VARIANT expression"));
+    }
+}
diff --git 
a/regression-test/suites/variant_p0/test_variant_count_distinct.groovy 
b/regression-test/suites/variant_p0/test_variant_count_distinct.groovy
new file mode 100644
index 00000000000..cad535f433c
--- /dev/null
+++ b/regression-test/suites/variant_p0/test_variant_count_distinct.groovy
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_variant_count_distinct") {
+    sql "DROP TABLE IF EXISTS test_variant_count_distinct_array_subcolumn"
+
+    sql """
+        CREATE TABLE test_variant_count_distinct_array_subcolumn (
+            id INT,
+            v VARIANT
+        ) DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1")
+    """
+
+    sql """
+        INSERT INTO test_variant_count_distinct_array_subcolumn VALUES
+        (1, '{"arr":[1,2,3]}'),
+        (2, '{"arr":[4,5]}'),
+        (3, '{"arr":[1,2,3]}')
+    """
+
+    test {
+        sql "SELECT COUNT(DISTINCT v['arr']) FROM 
test_variant_count_distinct_array_subcolumn"
+        exception "COUNT DISTINCT does not support VARIANT argument"
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to