This is an automated email from the ASF dual-hosted git repository.

zclllyybb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new aa68e4bd9e7 [Enhancement](udf) Reject bitmap, hll, and quantile_state 
in udf create (#63849)
aa68e4bd9e7 is described below

commit aa68e4bd9e74dc84eaeb3860058ba818e2081abb
Author: linrrarity <[email protected]>
AuthorDate: Fri May 29 11:34:39 2026 +0800

    [Enhancement](udf) Reject bitmap, hll, and quantile_state in udf create 
(#63849)
    
    Problem Summary:
    
    UDF creation currently allows `BITMAP`, `HLL`, and `QUANTILE_STATE` in
    function signatures, but these object types are not exposed to
    Java/Python UDF runtimes as first-class values. They are effectively
    bridged as opaque bytes, and marked unsupported in
    
[doc](https://doris.apache.org/docs/dev/query-data/udf/python-user-defined-function#data-type-mapping)
---
 .../plans/commands/CreateFunctionCommand.java      |  33 ++++
 .../apache/doris/catalog/CreateFunctionTest.java   |  35 ++++
 .../test_pythonudaf_object_types_inline.groovy     | 184 +++++++++++++++++++++
 .../test_pythonudf_object_types_inline.groovy      | 105 ++++++++++++
 .../test_pythonudtf_object_types_inline.groovy     | 105 ++++++++++++
 5 files changed, 462 insertions(+)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateFunctionCommand.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateFunctionCommand.java
index bc5edcbb59b..4a367bb8079 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateFunctionCommand.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateFunctionCommand.java
@@ -37,6 +37,7 @@ import org.apache.doris.catalog.MapType;
 import org.apache.doris.catalog.PrimitiveType;
 import org.apache.doris.catalog.ScalarFunction;
 import org.apache.doris.catalog.ScalarType;
+import org.apache.doris.catalog.StructField;
 import org.apache.doris.catalog.StructType;
 import org.apache.doris.catalog.Type;
 import org.apache.doris.common.AnalysisException;
@@ -346,6 +347,7 @@ public class CreateFunctionCommand extends Command 
implements ForwardWithSync {
         }
         if (binaryType == Function.BinaryType.JAVA_UDF) {
             FunctionUtil.checkEnableJavaUdf();
+            checkUdfSupportedTypes();
             if (!isAggregate && !isTableFunction) {
                 volatility = analyzeVolatility();
             }
@@ -363,6 +365,7 @@ public class CreateFunctionCommand extends Command 
implements ForwardWithSync {
             extractExpirationTime();
         } else if (binaryType == Function.BinaryType.PYTHON_UDF) {
             FunctionUtil.checkEnablePythonUdf();
+            checkUdfSupportedTypes();
             if (!isAggregate && !isTableFunction) {
                 volatility = analyzeVolatility();
             }
@@ -418,6 +421,36 @@ public class CreateFunctionCommand extends Command 
implements ForwardWithSync {
         return runtimeVersionString != null && 
PYTHON_VERSION_PATTERN.matcher(runtimeVersionString).matches();
     }
 
+    private void checkUdfSupportedTypes() throws AnalysisException {
+        Type[] argTypes = argsDef.getArgTypes();
+        for (int i = 0; i < argTypes.length; i++) {
+            checkUdfSupportedType(argTypes[i], "argument " + (i + 1));
+        }
+        checkUdfSupportedType(returnType.toCatalogDataType(), "return");
+        if (intermediateType != null) {
+            checkUdfSupportedType(intermediateType.toCatalogDataType(), 
"intermediate");
+        }
+    }
+
+    private void checkUdfSupportedType(Type type, String typePosition) throws 
AnalysisException {
+        // Reject bitmap/hll/quantile_state type
+        if (type.isObjectStored()) {
+            throw new AnalysisException(String.format(
+                    "%s does not support %s type %s", binaryType, 
typePosition, type.toSql()));
+        }
+
+        if (type.isArrayType()) {
+            checkUdfSupportedType(((ArrayType) type).getItemType(), 
typePosition + " element");
+        } else if (type.isMapType()) {
+            checkUdfSupportedType(((MapType) type).getKeyType(), typePosition 
+ " key");
+            checkUdfSupportedType(((MapType) type).getValueType(), 
typePosition + " value");
+        } else if (type.isStructType()) {
+            for (StructField field : ((StructType) type).getFields()) {
+                checkUdfSupportedType(field.getType(), typePosition + " field 
" + field.getName());
+            }
+        }
+    }
+
     private Boolean parseBooleanFromProperties(String propertyString) throws 
AnalysisException {
         String valueOfString = properties.get(propertyString);
         if (valueOfString == null) {
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java
index 426a45074b8..e6741b9e54c 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java
@@ -130,6 +130,35 @@ public class CreateFunctionTest {
         Assert.assertEquals(FunctionVolatility.VOLATILE, findFunction(db, 
"py_default").getVolatility());
     }
 
+    @Test
+    public void testCreatePythonFunctionRejectsObjectTypes() throws Exception {
+        ConnectContext ctx = UtFrameUtils.createDefaultCtx();
+        createDatabase(ctx, "create database py_obj_type_db;");
+        dorisAssert = new DorisAssert(ctx);
+        dorisAssert.useDatabase("py_obj_type_db");
+
+        assertCreateFunctionAnalysisException(ctx, "create function 
py_obj_type_db.py_bitmap_arg(bitmap) returns int "
+                + "properties('type'='PYTHON_UDF', 'symbol'='evaluate', 
'runtime_version'='3.10.2');",
+                "PYTHON_UDF does not support argument 1 type bitmap");
+        assertCreateFunctionAnalysisException(ctx, "create function 
py_obj_type_db.j_bitmap_arg(bitmap) returns int "
+                + "properties('type'='JAVA_UDF', 'symbol'='evaluate');",
+                "JAVA_UDF does not support argument 1 type bitmap");
+        assertCreateFunctionAnalysisException(ctx, "create function 
py_obj_type_db.py_hll_ret(int) returns hll "
+                + "properties('type'='PYTHON_UDF', 'symbol'='evaluate', 
'runtime_version'='3.10.2');",
+                "PYTHON_UDF does not support return type hll");
+        assertCreateFunctionAnalysisException(ctx, "create aggregate function 
py_obj_type_db.py_quantile_arg"
+                + "(quantile_state) returns int 
properties('type'='PYTHON_UDF', 'symbol'='Agg', "
+                + "'runtime_version'='3.10.2');",
+                "PYTHON_UDF does not support argument 1 type quantile_state");
+        assertCreateFunctionAnalysisException(ctx, "create aggregate function 
py_obj_type_db.j_quantile_arg"
+                + "(quantile_state) returns int properties('type'='JAVA_UDF', 
'symbol'='Agg');",
+                "JAVA_UDF does not support argument 1 type quantile_state");
+        assertCreateFunctionAnalysisException(ctx, "create tables function 
py_obj_type_db.py_bitmap_table(int) "
+                + "returns array<bitmap> properties('type'='PYTHON_UDF', 
'symbol'='evaluate', "
+                + "'runtime_version'='3.10.2');",
+                "ARRAY unsupported sub-type: bitmap");
+    }
+
     @Test
     public void testCreateGlobalFunction() throws Exception {
         ConnectContext ctx = UtFrameUtils.createDefaultCtx();
@@ -215,6 +244,12 @@ public class CreateFunctionTest {
         }
     }
 
+    private void assertCreateFunctionAnalysisException(ConnectContext ctx, 
String sql, String message) {
+        Exception exception = Assert.assertThrows(Exception.class, () -> 
createFunction(sql, ctx));
+        Assert.assertTrue("Expected error to contain: " + message + ", actual: 
" + exception.getMessage(),
+                exception.getMessage().contains(message));
+    }
+
     private boolean containsIgnoreCase(String str, String sub) {
         return str.toLowerCase().contains(sub.toLowerCase());
     }
diff --git 
a/regression-test/suites/pythonudaf_p0/test_pythonudaf_object_types_inline.groovy
 
b/regression-test/suites/pythonudaf_p0/test_pythonudaf_object_types_inline.groovy
new file mode 100644
index 00000000000..d37f5d33074
--- /dev/null
+++ 
b/regression-test/suites/pythonudaf_p0/test_pythonudaf_object_types_inline.groovy
@@ -0,0 +1,184 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudaf_object_types_inline") {
+    def runtime_version = getPythonUdfRuntimeVersion()
+
+    test {
+        sql """
+        CREATE AGGREGATE FUNCTION py_obj_udaf_bitmap_arg(bitmap)
+        RETURNS BIGINT
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "Agg",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+class Agg:
+    def __init__(self):
+        self.sum = 0
+    def accumulate(self, v):
+        pass
+    def merge(self, other):
+        pass
+    def finish(self):
+        return self.sum
+    @property
+    def aggregate_state(self):
+        return self.sum
+\$\$;
+        """
+        exception "does not support argument 1 type bitmap"
+    }
+
+    test {
+        sql """
+        CREATE AGGREGATE FUNCTION py_obj_udaf_hll_ret(int)
+        RETURNS HLL
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "Agg",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+class Agg:
+    def __init__(self):
+        self.state = None
+    def accumulate(self, v):
+        pass
+    def merge(self, other):
+        pass
+    def finish(self):
+        return self.state
+    @property
+    def aggregate_state(self):
+        return self.state
+\$\$;
+        """
+        exception "does not support return type hll"
+    }
+
+    test {
+        sql """
+        CREATE AGGREGATE FUNCTION py_obj_udaf_quantile_state(quantile_state)
+        RETURNS BIGINT
+        INTERMEDIATE BIGINT
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "Agg",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+class Agg:
+    def __init__(self):
+        self.state = 0
+    def accumulate(self, v):
+        pass
+    def merge(self, other):
+        pass
+    def finish(self):
+        return self.state
+    @property
+    def aggregate_state(self):
+        return self.state
+\$\$;
+        """
+        exception "does not support argument 1 type quantile_state"
+    }
+
+    test {
+        sql """
+        CREATE AGGREGATE FUNCTION py_obj_udaf_bitmap_intermediate(int)
+        RETURNS BIGINT
+        INTERMEDIATE BITMAP
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "Agg",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+class Agg:
+    def __init__(self):
+        self.state = 0
+    def accumulate(self, v):
+        pass
+    def merge(self, other):
+        pass
+    def finish(self):
+        return self.state
+    @property
+    def aggregate_state(self):
+        return self.state
+\$\$;
+        """
+        exception "does not support intermediate type bitmap"
+    }
+
+    test {
+        sql """
+        CREATE AGGREGATE FUNCTION py_obj_udaf_array_bitmap(int)
+        RETURNS ARRAY<BITMAP>
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "Agg",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+class Agg:
+    def __init__(self):
+        self.state = None
+    def accumulate(self, v):
+        pass
+    def merge(self, other):
+        pass
+    def finish(self):
+        return self.state
+    @property
+    def aggregate_state(self):
+        return self.state
+\$\$;
+        """
+        exception "ARRAY unsupported sub-type: bitmap"
+    }
+
+    test {
+        sql """
+        CREATE AGGREGATE FUNCTION py_obj_udaf_struct_bitmap(int)
+        RETURNS STRUCT<plain:INT, nested:MAP<INT, ARRAY<HLL>>>
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "Agg",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+class Agg:
+    def __init__(self):
+        self.state = None
+    def accumulate(self, v):
+        pass
+    def merge(self, other):
+        pass
+    def finish(self):
+        return self.state
+    @property
+    def aggregate_state(self):
+        return self.state
+\$\$;
+        """
+        exception "ARRAY unsupported sub-type: hll"
+    }
+}
diff --git 
a/regression-test/suites/pythonudf_p0/test_pythonudf_object_types_inline.groovy 
b/regression-test/suites/pythonudf_p0/test_pythonudf_object_types_inline.groovy
new file mode 100644
index 00000000000..b141e6d503e
--- /dev/null
+++ 
b/regression-test/suites/pythonudf_p0/test_pythonudf_object_types_inline.groovy
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_object_types_inline") {
+    def runtime_version = getPythonUdfRuntimeVersion()
+
+    test {
+        sql """
+        CREATE FUNCTION py_obj_udf_bitmap_arg(bitmap)
+        RETURNS INT
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(v):
+    return 1
+\$\$;
+        """
+        exception "does not support argument 1 type bitmap"
+    }
+
+    test {
+        sql """
+        CREATE FUNCTION py_obj_udf_hll_ret(int)
+        RETURNS HLL
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(v):
+    return None
+\$\$;
+        """
+        exception "does not support return type hll"
+    }
+
+    test {
+        sql """
+        CREATE FUNCTION py_obj_udf_array_bitmap(array<int>)
+        RETURNS ARRAY<BITMAP>
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(v):
+    return None
+\$\$;
+        """
+        exception "ARRAY unsupported sub-type: bitmap"
+    }
+
+    test {
+        sql """
+        CREATE FUNCTION py_obj_udf_map_bitmap(map<int, bitmap>)
+        RETURNS INT
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(v):
+    return 1
+\$\$;
+        """
+        exception "MAP unsupported sub-type: bitmap"
+    }
+
+    test {
+        sql """
+        CREATE FUNCTION py_obj_udf_struct_bitmap(INT)
+        RETURNS STRUCT<plain:INT, nested:ARRAY<STRUCT<b:BITMAP>>>
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(v):
+    return None
+\$\$;
+        """
+        exception "STRUCT unsupported sub-type: bitmap"
+    }
+}
diff --git 
a/regression-test/suites/pythonudtf_p0/test_pythonudtf_object_types_inline.groovy
 
b/regression-test/suites/pythonudtf_p0/test_pythonudtf_object_types_inline.groovy
new file mode 100644
index 00000000000..0d4259a6e6e
--- /dev/null
+++ 
b/regression-test/suites/pythonudtf_p0/test_pythonudtf_object_types_inline.groovy
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudtf_object_types_inline") {
+    def runtime_version = getPythonUdfRuntimeVersion()
+
+    test {
+        sql """
+        CREATE TABLES FUNCTION py_obj_udtf_bitmap_arg(bitmap)
+        RETURNS ARRAY<STRUCT<value:INT>>
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(v):
+    yield (1,)
+\$\$;
+        """
+        exception "does not support argument 1 type bitmap"
+    }
+
+    test {
+        sql """
+        CREATE TABLES FUNCTION py_obj_udtf_hll_ret(int)
+        RETURNS ARRAY<HLL>
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(v):
+    yield (1,)
+\$\$;
+        """
+        exception "ARRAY unsupported sub-type: hll"
+    }
+
+    test {
+        sql """
+        CREATE TABLES FUNCTION py_obj_udtf_quantile_state(quantile_state)
+        RETURNS ARRAY<STRUCT<value:INT>>
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(v):
+    yield (1,)
+\$\$;
+        """
+        exception "does not support argument 1 type quantile_state"
+    }
+
+    test {
+        sql """
+        CREATE TABLES FUNCTION py_obj_udtf_array_bitmap(array<bitmap>)
+        RETURNS ARRAY<STRUCT<value:INT>>
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(v):
+    yield (1,)
+\$\$;
+        """
+        exception "ARRAY unsupported sub-type: bitmap"
+    }
+
+    test {
+        sql """
+        CREATE TABLES FUNCTION py_obj_udtf_struct_bitmap(int)
+        RETURNS ARRAY<STRUCT<plain:INT, nested:MAP<INT, ARRAY<BITMAP>>>>
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(v):
+    yield (1,)
+\$\$;
+        """
+        exception "ARRAY unsupported sub-type: bitmap"
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to