This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch m/doris-25672 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 92520ac7975650c0e251d7395ecde45f809478ad Author: lihangyu <[email protected]> AuthorDate: Thu May 21 15:53:38 2026 +0800 [fix](fe) Reject COUNT DISTINCT on variant arguments ### What problem does this PR solve? Issue Number: close #25672 Related PR: None Problem Summary: COUNT(DISTINCT variant_subcolumn) could reach BE hash key selection and fail with a vague INTERNAL_ERROR when the argument was VARIANT. Reject VARIANT arguments during FE aggregate analysis and keep a clearer BE fallback for uncaught hash-key paths. ### Release note COUNT(DISTINCT ...) on VARIANT arguments now reports a clear unsupported-type error instead of a BE internal error. Cast VARIANT expressions to STRING or another supported scalar type before using COUNT DISTINCT. ### Check List (For Author) - Test: Unit Test - `./run-fe-ut.sh --run org.apache.doris.nereids.trees.expressions.functions.agg.CountTest` - Regression test added but not run because the new worktree does not have a built output cluster. - Behavior changed: Yes (COUNT DISTINCT on VARIANT now fails during analysis with a clearer error instead of a BE INTERNAL_ERROR) - Does this need documentation: No --- be/src/exec/common/hash_table/hash_key_type.h | 8 +++- .../trees/expressions/functions/agg/Count.java | 17 ++++++-- .../functions/agg/MultiDistinctCount.java | 6 +++ .../trees/expressions/functions/agg/CountTest.java | 45 ++++++++++++++++++++++ .../variant_p0/test_variant_count_distinct.groovy | 41 ++++++++++++++++++++ 5 files changed, 113 insertions(+), 4 deletions(-) diff --git a/be/src/exec/common/hash_table/hash_key_type.h b/be/src/exec/common/hash_table/hash_key_type.h index 313df63f3d3..58bd6574986 100644 --- a/be/src/exec/common/hash_table/hash_key_type.h +++ b/be/src/exec/common/hash_table/hash_key_type.h @@ -118,6 +118,12 @@ inline HashKeyType get_hash_key_type(const std::vector<DataTypePtr>& data_types) t->get_primitive_type() == TYPE_JSONB) { return HashKeyType::string_key; } + if (t->get_primitive_type() == TYPE_VARIANT) { + throw Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "VARIANT type is not supported as a hash key. Cast the VARIANT " + "expression to STRING or another supported scalar type before using " + "it in DISTINCT, GROUP BY, JOIN, or other hash operations."); + } throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid type, type={}", t->get_name()); } @@ -140,4 +146,4 @@ inline HashKeyType get_hash_key_type(const std::vector<DataTypePtr>& data_types) } } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java index 8f486bfc2ef..0ecf4350485 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java @@ -92,13 +92,24 @@ public class Count extends NotNullableAggregateFunction public void checkLegalityAfterRewrite() { // after rewrite, count(distinct bitmap_column) should be rewritten to bitmap_union_count(bitmap_column) for (Expression argument : getArguments()) { - if (distinct && (argument.getDataType().isComplexType() - || argument.getDataType().isObjectType() || argument.getDataType().isJsonType())) { - throw new AnalysisException("COUNT DISTINCT could not process type " + this.toSql()); + if (distinct) { + checkDistinctArgument(argument, this.toSql()); } } } + static void checkDistinctArgument(Expression argument, String functionSql) { + DataType argumentType = argument.getDataType(); + if (argumentType.isVariantType()) { + throw new AnalysisException("COUNT DISTINCT does not support VARIANT argument in " + functionSql + + ". Cast the VARIANT expression to STRING or another supported scalar type before using " + + "COUNT DISTINCT."); + } + if (argumentType.isComplexType() || argumentType.isObjectType() || argumentType.isJsonType()) { + throw new AnalysisException("COUNT DISTINCT could not process type " + functionSql); + } + } + public boolean isStar() { return isStar; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MultiDistinctCount.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MultiDistinctCount.java index 9e71a3eb647..3bf6a5d24f2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MultiDistinctCount.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MultiDistinctCount.java @@ -57,6 +57,9 @@ public class MultiDistinctCount extends NotNullableAggregateFunction if (super.children().size() > 1) { throw new AnalysisException("MultiDistinctCount's children size must be 1"); } + for (Expression argument : super.children()) { + Count.checkDistinctArgument(argument, "COUNT DISTINCT " + argument.toSql()); + } } /** constructor for withChildren and reuse signature */ @@ -67,6 +70,9 @@ public class MultiDistinctCount extends NotNullableAggregateFunction @Override public MultiDistinctCount withDistinctAndChildren(boolean distinct, List<Expression> children) { Preconditions.checkArgument(children.size() == 1, "MultiDistinctCount's children size must be 1"); + for (Expression argument : children) { + Count.checkDistinctArgument(argument, "COUNT DISTINCT " + argument.toSql()); + } return new MultiDistinctCount(getFunctionParams(false, children)); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountTest.java new file mode 100644 index 00000000000..7ee6ce90eb6 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountTest.java @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.agg; + +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.types.VariantType; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class CountTest { + @Test + void testCountDistinctRejectsVariant() { + Count count = new Count(true, SlotReference.of("v", VariantType.INSTANCE)); + + AnalysisException exception = Assertions.assertThrows(AnalysisException.class, + count::checkLegalityAfterRewrite); + Assertions.assertTrue(exception.getMessage().contains("COUNT DISTINCT does not support VARIANT argument")); + Assertions.assertTrue(exception.getMessage().contains("Cast the VARIANT expression")); + } + + @Test + void testMultiDistinctCountRejectsVariant() { + AnalysisException exception = Assertions.assertThrows(AnalysisException.class, + () -> new MultiDistinctCount(SlotReference.of("v", VariantType.INSTANCE))); + Assertions.assertTrue(exception.getMessage().contains("COUNT DISTINCT does not support VARIANT argument")); + Assertions.assertTrue(exception.getMessage().contains("Cast the VARIANT expression")); + } +} diff --git a/regression-test/suites/variant_p0/test_variant_count_distinct.groovy b/regression-test/suites/variant_p0/test_variant_count_distinct.groovy new file mode 100644 index 00000000000..cad535f433c --- /dev/null +++ b/regression-test/suites/variant_p0/test_variant_count_distinct.groovy @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_variant_count_distinct") { + sql "DROP TABLE IF EXISTS test_variant_count_distinct_array_subcolumn" + + sql """ + CREATE TABLE test_variant_count_distinct_array_subcolumn ( + id INT, + v VARIANT + ) DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES("replication_num" = "1") + """ + + sql """ + INSERT INTO test_variant_count_distinct_array_subcolumn VALUES + (1, '{"arr":[1,2,3]}'), + (2, '{"arr":[4,5]}'), + (3, '{"arr":[1,2,3]}') + """ + + test { + sql "SELECT COUNT(DISTINCT v['arr']) FROM test_variant_count_distinct_array_subcolumn" + exception "COUNT DISTINCT does not support VARIANT argument" + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
