This is an automated email from the ASF dual-hosted git repository.

zclllyybb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d073c953e8a [Fix](pyudf) Convert nested map value correctly (#63907)
d073c953e8a is described below

commit d073c953e8a44abfadbcde8b16cc8d1903c42fd7
Author: linrrarity <[email protected]>
AuthorDate: Tue Jun 2 11:04:08 2026 +0800

    [Fix](pyudf) Convert nested map value correctly (#63907)
    
    Problem Summary:
    
    Fix Python UDF nested complex type conversion when `MAP` appears inside
    `ARRAY`, `STRUCT`, or vectorized inputs.
    
    Previously, Python UDF argument conversion mostly relied on PyArrow's
    default conversions(`Scalar.as_py()`, `Array.to_pylist()`,
    `Array.to_pandas()`). Those APIs convert a top-level Arrow `MAP` into
    Python-friendly values in some paths, but nested `MAP` values are
    exposed as list-of-tuples. For example, `ARRAY<MAP<STRING, INT>>` could
    arrive in Python as `[[('a', 1)]]` instead of `[{'a': 1}]`. This made
    user UDF code see nested maps as `list` instead of `dict`.
    
    This PR introduces a recursive Arrow-value conversion helper and applies
    it consistently across Python UDF argument conversion paths. The helper
    manually reconstructs Python values according to the Arrow type:
    - `MAP` -> `dict`
    - `LIST` / `LARGE_LIST` -> `list`
    - `STRUCT` -> `dict`
    
    before
    ```sql
    CREATE FUNCTION py_deep_nested_debug(ARRAY<MAP<STRING, ARRAY<INT>>> )
    RETURNS STRING
    PROPERTIES (
        "type" = "PYTHON_UDF",
        "symbol" = "evaluate",
        "runtime_version" = "3.12.11",
        "always_nullable" = "true"
    )
    AS $$
    def evaluate(arr):
        if arr is None:
            return 'None'
        return 'outer_type={}, outer_repr={}'.format(type(arr).__name__, 
repr(arr))
    $$;
    
    SELECT py_deep_nested_debug([{'a': [1, 2], 'b': [3]}, {'c': [4, 5, 6]}]);
    
+-------------------------------------------------------------------------------+
    | py_deep_nested_debug([{'a': [1, 2], 'b': [3]}, {'c': [4, 5, 6]}])         
    |
    
+-------------------------------------------------------------------------------+
    | outer_type=list, outer_repr=[[('a', [1, 2]), ('b', [3])], [('c', [4, 5, 
6])]] |
    
+-------------------------------------------------------------------------------+
    ```
    
    now:
    ```text
    SELECT py_deep_nested_debug([{'a': [1, 2], 'b': [3]}, {'c': [4, 5, 6]}]);
    +-------------------------------------------------------------------------+
    | py_deep_nested_debug([{'a': [1, 2], 'b': [3]}, {'c': [4, 5, 6]}])       |
    +-------------------------------------------------------------------------+
    | outer_type=list, outer_repr=[{'a': [1, 2], 'b': [3]}, {'c': [4, 5, 6]}] |
    +-------------------------------------------------------------------------+
    ```
---
 be/src/udf/python/python_server.py                 |  85 +++-
 .../test_pythonudf_nested_complex_type.out         |  20 +
 .../test_pythonudf_nested_complex_type.groovy      | 445 +++++++++++++++++++++
 3 files changed, 541 insertions(+), 9 deletions(-)

diff --git a/be/src/udf/python/python_server.py 
b/be/src/udf/python/python_server.py
index ecdbef691b9..c0683149ea2 100644
--- a/be/src/udf/python/python_server.py
+++ b/be/src/udf/python/python_server.py
@@ -252,11 +252,6 @@ def convert_arrow_field_to_python(field, 
column_metadata=None):
     if field is None:
         return None
 
-    if pa.types.is_map(field.type):
-        # pyarrow.lib.MapScalar's as_py() returns a list of tuples, convert to 
dict
-        list_of_tuples = field.as_py()
-        return dict(list_of_tuples) if list_of_tuples is not None else None
-    
     # Check if we should apply special IP type conversion based on metadata
     if column_metadata:
         # Arrow metadata keys can be either bytes or str depending on how they 
were created
@@ -300,8 +295,64 @@ def convert_arrow_field_to_python(field, 
column_metadata=None):
                         )
                         return value
                 return None
-    
-    return field.as_py()
+
+    return convert_arrow_value_to_python(field.as_py(), field.type)
+
+
+def convert_arrow_value_to_python(value, arrow_type):
+    """
+    Recursively convert Arrow nested values to Doris Python UDF values.
+
+    PyArrow exposes MapScalar.as_py() as a list of key/value tuples. If the 
map is
+    nested under ARRAY or STRUCT, the top-level scalar is no longer MapScalar, 
so
+    field.as_py() alone would leak list-of-tuples to user UDF code.
+    """
+    if value is None:
+        return None
+
+    if pa.types.is_map(arrow_type):
+        key_type = arrow_type.key_type
+        item_type = arrow_type.item_type
+        return {
+            convert_arrow_value_to_python(k, key_type): 
convert_arrow_value_to_python(
+                v, item_type
+            )
+            for k, v in value
+        }
+
+    if pa.types.is_list(arrow_type) or pa.types.is_large_list(arrow_type):
+        element_type = arrow_type.value_type
+        return [convert_arrow_value_to_python(v, element_type) for v in value]
+
+    if pa.types.is_struct(arrow_type):
+        return {
+            arrow_type[i].name: convert_arrow_value_to_python(
+                value.get(arrow_type[i].name), arrow_type[i].type
+            )
+            for i in range(len(arrow_type))
+        }
+
+    return value
+
+
+def needs_nested_python_normalization(arrow_type):
+    """
+    Return True when Arrow default Python conversion can leak nested MAP 
values as
+    list-of-tuples and therefore needs recursive normalization.
+    """
+    if pa.types.is_map(arrow_type):
+        return True
+
+    if pa.types.is_list(arrow_type) or pa.types.is_large_list(arrow_type):
+        return needs_nested_python_normalization(arrow_type.value_type)
+
+    if pa.types.is_struct(arrow_type):
+        return any(
+            needs_nested_python_normalization(arrow_type[i].type)
+            for i in range(len(arrow_type))
+        )
+
+    return False
 
 
 def convert_python_to_arrow_value(value, output_type=None):
@@ -563,8 +614,24 @@ class AdaptivePythonUDF:
         Convert a pa.Array to an instance of the specified VectorType.
         """
         if vec_type == VectorType.LIST:
-            return arrow_array.to_pylist()
+            values = arrow_array.to_pylist()
+            if not needs_nested_python_normalization(arrow_array.type):
+                return values
+            return [
+                convert_arrow_value_to_python(value, arrow_array.type)
+                for value in values
+            ]
         elif vec_type == VectorType.PANDAS_SERIES:
+            if needs_nested_python_normalization(arrow_array.type):
+                # Some pyarrow builds cannot materialize nested map-containing 
arrays
+                # through to_pandas() (for example list<map<...>>). Normalize 
through
+                # Python objects first, then build an object Series explicitly.
+                values = arrow_array.to_pylist()
+                converted = [
+                    convert_arrow_value_to_python(value, arrow_array.type)
+                    for value in values
+                ]
+                return pd.Series(converted, dtype=object)
             return arrow_array.to_pandas()
         else:
             raise ValueError(f"Unsupported vector type: {vec_type}")
@@ -665,7 +732,7 @@ class AdaptivePythonUDF:
                 # instead of converting to list
                 pylist = arrow_col.to_pylist()
                 if len(pylist) > 0:
-                    converted = pylist[0]
+                    converted = convert_arrow_value_to_python(pylist[0], 
arrow_col.type)
                     logging.info(
                         "Converted %s to scalar (first value): %s",
                         param.name,
diff --git 
a/regression-test/data/pythonudf_p0/test_pythonudf_nested_complex_type.out 
b/regression-test/data/pythonudf_p0/test_pythonudf_nested_complex_type.out
new file mode 100644
index 00000000000..4fe7cb82953
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_nested_complex_type.out
@@ -0,0 +1,20 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !scalar_constant_nested_complex --
+[{a:1,b:2},{c:3}]|{left:[{x:10}],right:[{y:20},{z:30}]}|(const,[{s:7}],{empty:[],nums:[1,2]})|{const_key:(constTag,[{cm:11}])}
+[]|{empty:[]}|(empty,[],{none:NULL})|{empty:(emptyTag,[])}
+NULL|NULL|NULL|NULL
+
+-- !vector_list_nested_complex --
+[{a:1,b:2},{c:3}]|{left:[{x:10}],right:[{y:20},{z:30}]}|(row1,[{s:7},{t:8}],{empty:[],nums:[1,2]})|{first:(tagA,[{m:1},{n:2}]),second:(tagB,[])}
+[]|{empty:[]}|(row2,[],{none:NULL})|{empty:(tagEmpty,[])}
+NULL|NULL|NULL|NULL
+
+-- !vector_series_nested_complex --
+[{a:1,b:2},{c:3}]|{left:[{x:10}],right:[{y:20},{z:30}]}|(row1,[{s:7},{t:8}],{empty:[],nums:[1,2]})|{first:(tagA,[{m:1},{n:2}]),second:(tagB,[])}
+[]|{empty:[]}|(row2,[],{none:NULL})|{empty:(tagEmpty,[])}
+NULL|NULL|NULL|NULL
+
+-- !vector_mixed_scalar_nested_complex --
+1|{first:(tagA,[{m:1},{n:2}]),second:(tagB,[])}
+2|{first:(tagA,[{m:1},{n:2}]),second:(tagB,[])}
+3|{first:(tagA,[{m:1},{n:2}]),second:(tagB,[])}
diff --git 
a/regression-test/suites/pythonudf_p0/test_pythonudf_nested_complex_type.groovy 
b/regression-test/suites/pythonudf_p0/test_pythonudf_nested_complex_type.groovy
new file mode 100644
index 00000000000..ca626292375
--- /dev/null
+++ 
b/regression-test/suites/pythonudf_p0/test_pythonudf_nested_complex_type.groovy
@@ -0,0 +1,445 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_nested_complex_type") {
+    def runtime_version = getPythonUdfRuntimeVersion()
+
+    sql """ DROP TABLE IF EXISTS test_pythonudf_nested_complex_type; """
+    sql """
+        CREATE TABLE test_pythonudf_nested_complex_type (
+            id INT,
+            array_map ARRAY<MAP<STRING, INT>>,
+            map_array_map MAP<STRING, ARRAY<MAP<STRING, INT>>>,
+            struct_nested STRUCT<
+                label: STRING,
+                maps: ARRAY<MAP<STRING, INT>>,
+                attrs: MAP<STRING, ARRAY<INT>>
+            >,
+            map_struct_nested MAP<STRING, STRUCT<
+                tag: STRING,
+                metrics: ARRAY<MAP<STRING, INT>>
+            >>
+        ) ENGINE=OLAP
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+    """
+
+    sql """
+        INSERT INTO test_pythonudf_nested_complex_type VALUES
+        (
+            1,
+            [{'a': 1, 'b': 2}, {'c': 3}],
+            {'left': [{'x': 10}], 'right': [{'y': 20}, {'z': 30}]},
+            {'row1', [{'s': 7}, {'t': 8}], {'nums': [1, 2], 'empty': []}},
+            {'first': {'tagA', [{'m': 1}, {'n': 2}]}, 'second': {'tagB', []}}
+        ),
+        (
+            2,
+            [],
+            {'empty': []},
+            {'row2', [], {'none': NULL}},
+            {'empty': {'tagEmpty', []}}
+        ),
+        (
+            3,
+            NULL,
+            NULL,
+            NULL,
+            NULL
+        );
+    """
+
+    sql """
+        DROP FUNCTION IF EXISTS py_nested_complex_scalar(
+            ARRAY<MAP<STRING, INT>>,
+            MAP<STRING, ARRAY<MAP<STRING, INT>>>,
+            STRUCT<label: STRING, maps: ARRAY<MAP<STRING, INT>>, attrs: 
MAP<STRING, ARRAY<INT>>>,
+            MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING, INT>>>>
+        );
+    """
+    sql """
+        CREATE FUNCTION py_nested_complex_scalar(
+            ARRAY<MAP<STRING, INT>>,
+            MAP<STRING, ARRAY<MAP<STRING, INT>>>,
+            STRUCT<label: STRING, maps: ARRAY<MAP<STRING, INT>>, attrs: 
MAP<STRING, ARRAY<INT>>>,
+            MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING, INT>>>>
+        )
+        RETURNS STRING
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+def format_map(m):
+    if m is None:
+        return 'NULL'
+    if not isinstance(m, dict):
+        return 'BAD_MAP:' + type(m).__name__
+    return '{' + ','.join(f'{k}:{m[k]}' for k in sorted(m)) + '}'
+
+def format_array_map(arr):
+    if arr is None:
+        return 'NULL'
+    return '[' + ','.join(format_map(item) for item in arr) + ']'
+
+def format_map_array_map(m):
+    if m is None:
+        return 'NULL'
+    if not isinstance(m, dict):
+        return 'BAD_MAP_ARRAY_MAP:' + type(m).__name__
+    return '{' + ','.join(f'{k}:{format_array_map(m[k])}' for k in sorted(m)) 
+ '}'
+
+def format_attrs(attrs):
+    if attrs is None:
+        return 'NULL'
+    if not isinstance(attrs, dict):
+        return 'BAD_ATTRS:' + type(attrs).__name__
+    parts = []
+    for key in sorted(attrs):
+        val = attrs[key]
+        if val is None:
+            parts.append(f'{key}:NULL')
+        else:
+            parts.append(f'{key}:[' + ','.join(str(x) for x in val) + ']')
+    return '{' + ','.join(parts) + '}'
+
+def format_struct(s):
+    if s is None:
+        return 'NULL'
+    if not isinstance(s, dict):
+        return 'BAD_STRUCT:' + type(s).__name__
+    return '(' + str(s.get('label')) + ',' + format_array_map(s.get('maps')) + 
',' + format_attrs(s.get('attrs')) + ')'
+
+def format_map_struct_nested(m):
+    if m is None:
+        return 'NULL'
+    if not isinstance(m, dict):
+        return 'BAD_MAP_STRUCT:' + type(m).__name__
+    parts = []
+    for key in sorted(m):
+        val = m[key]
+        if val is None:
+            parts.append(f'{key}:NULL')
+        elif not isinstance(val, dict):
+            parts.append(f'{key}:BAD_STRUCT:' + type(val).__name__)
+        else:
+            parts.append(f'{key}:(' + str(val.get('tag')) + ',' + 
format_array_map(val.get('metrics')) + ')')
+    return '{' + ','.join(parts) + '}'
+
+def evaluate(array_map, map_array_map, struct_nested, map_struct_nested):
+    return '|'.join([
+        format_array_map(array_map),
+        format_map_array_map(map_array_map),
+        format_struct(struct_nested),
+        format_map_struct_nested(map_struct_nested),
+    ])
+\$\$;
+    """
+
+    qt_scalar_constant_nested_complex """
+        SELECT result
+        FROM (
+            SELECT 1 AS ord, py_nested_complex_scalar(
+                CAST([{'a': 1, 'b': 2}, {'c': 3}] AS ARRAY<MAP<STRING, INT>>),
+                CAST({'left': [{'x': 10}], 'right': [{'y': 20}, {'z': 30}]} AS 
MAP<STRING, ARRAY<MAP<STRING, INT>>>),
+                CAST({'const', [{'s': 7}], {'nums': [1, 2], 'empty': []}} AS 
STRUCT<label: STRING, maps: ARRAY<MAP<STRING, INT>>, attrs: MAP<STRING, 
ARRAY<INT>>>),
+                CAST({'const_key': {'constTag', [{'cm': 11}]}} AS MAP<STRING, 
STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING, INT>>>>)
+            ) AS result
+            UNION ALL
+            SELECT 2 AS ord, py_nested_complex_scalar(
+                CAST([] AS ARRAY<MAP<STRING, INT>>),
+                CAST({'empty': []} AS MAP<STRING, ARRAY<MAP<STRING, INT>>>),
+                CAST({'empty', [], {'none': NULL}} AS STRUCT<label: STRING, 
maps: ARRAY<MAP<STRING, INT>>, attrs: MAP<STRING, ARRAY<INT>>>),
+                CAST({'empty': {'emptyTag', []}} AS MAP<STRING, STRUCT<tag: 
STRING, metrics: ARRAY<MAP<STRING, INT>>>>)
+            ) AS result
+            UNION ALL
+            SELECT 3 AS ord, py_nested_complex_scalar(
+                CAST(NULL AS ARRAY<MAP<STRING, INT>>),
+                CAST(NULL AS MAP<STRING, ARRAY<MAP<STRING, INT>>>),
+                CAST(NULL AS STRUCT<label: STRING, maps: ARRAY<MAP<STRING, 
INT>>, attrs: MAP<STRING, ARRAY<INT>>>),
+                CAST(NULL AS MAP<STRING, STRUCT<tag: STRING, metrics: 
ARRAY<MAP<STRING, INT>>>>)
+            ) AS result
+        ) ordered_result
+        ORDER BY ord;
+    """
+
+    sql """
+        DROP FUNCTION IF EXISTS py_nested_complex_vector_list(
+            ARRAY<MAP<STRING, INT>>,
+            MAP<STRING, ARRAY<MAP<STRING, INT>>>,
+            STRUCT<label: STRING, maps: ARRAY<MAP<STRING, INT>>, attrs: 
MAP<STRING, ARRAY<INT>>>,
+            MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING, INT>>>>
+        );
+    """
+    sql """
+        CREATE FUNCTION py_nested_complex_vector_list(
+            ARRAY<MAP<STRING, INT>>,
+            MAP<STRING, ARRAY<MAP<STRING, INT>>>,
+            STRUCT<label: STRING, maps: ARRAY<MAP<STRING, INT>>, attrs: 
MAP<STRING, ARRAY<INT>>>,
+            MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING, INT>>>>
+        )
+        RETURNS STRING
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+def format_map(m):
+    if m is None:
+        return 'NULL'
+    if not isinstance(m, dict):
+        return 'BAD_MAP:' + type(m).__name__
+    return '{' + ','.join(f'{k}:{m[k]}' for k in sorted(m)) + '}'
+
+def format_array_map(arr):
+    if arr is None:
+        return 'NULL'
+    return '[' + ','.join(format_map(item) for item in arr) + ']'
+
+def format_map_array_map(m):
+    if m is None:
+        return 'NULL'
+    if not isinstance(m, dict):
+        return 'BAD_MAP_ARRAY_MAP:' + type(m).__name__
+    return '{' + ','.join(f'{k}:{format_array_map(m[k])}' for k in sorted(m)) 
+ '}'
+
+def format_attrs(attrs):
+    if attrs is None:
+        return 'NULL'
+    if not isinstance(attrs, dict):
+        return 'BAD_ATTRS:' + type(attrs).__name__
+    parts = []
+    for key in sorted(attrs):
+        val = attrs[key]
+        if val is None:
+            parts.append(f'{key}:NULL')
+        else:
+            parts.append(f'{key}:[' + ','.join(str(x) for x in val) + ']')
+    return '{' + ','.join(parts) + '}'
+
+def format_struct(s):
+    if s is None:
+        return 'NULL'
+    if not isinstance(s, dict):
+        return 'BAD_STRUCT:' + type(s).__name__
+    return '(' + str(s.get('label')) + ',' + format_array_map(s.get('maps')) + 
',' + format_attrs(s.get('attrs')) + ')'
+
+def format_map_struct_nested(m):
+    if m is None:
+        return 'NULL'
+    if not isinstance(m, dict):
+        return 'BAD_MAP_STRUCT:' + type(m).__name__
+    parts = []
+    for key in sorted(m):
+        val = m[key]
+        if val is None:
+            parts.append(f'{key}:NULL')
+        elif not isinstance(val, dict):
+            parts.append(f'{key}:BAD_STRUCT:' + type(val).__name__)
+        else:
+            parts.append(f'{key}:(' + str(val.get('tag')) + ',' + 
format_array_map(val.get('metrics')) + ')')
+    return '{' + ','.join(parts) + '}'
+
+def evaluate(array_maps: list, map_array_maps: list, struct_nesteds: list, 
map_struct_nesteds: list):
+    result = []
+    for array_map, map_array_map, struct_nested, map_struct_nested in 
zip(array_maps, map_array_maps, struct_nesteds, map_struct_nesteds):
+        result.append('|'.join([
+            format_array_map(array_map),
+            format_map_array_map(map_array_map),
+            format_struct(struct_nested),
+            format_map_struct_nested(map_struct_nested),
+        ]))
+    return result
+\$\$;
+    """
+
+    qt_vector_list_nested_complex """
+        SELECT py_nested_complex_vector_list(array_map, map_array_map, 
struct_nested, map_struct_nested) AS result
+        FROM test_pythonudf_nested_complex_type
+        ORDER BY id;
+    """
+
+    sql """
+        DROP FUNCTION IF EXISTS py_nested_complex_vector_series(
+            ARRAY<MAP<STRING, INT>>,
+            MAP<STRING, ARRAY<MAP<STRING, INT>>>,
+            STRUCT<label: STRING, maps: ARRAY<MAP<STRING, INT>>, attrs: 
MAP<STRING, ARRAY<INT>>>,
+            MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING, INT>>>>
+        );
+    """
+    sql """
+        CREATE FUNCTION py_nested_complex_vector_series(
+            ARRAY<MAP<STRING, INT>>,
+            MAP<STRING, ARRAY<MAP<STRING, INT>>>,
+            STRUCT<label: STRING, maps: ARRAY<MAP<STRING, INT>>, attrs: 
MAP<STRING, ARRAY<INT>>>,
+            MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING, INT>>>>
+        )
+        RETURNS STRING
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def format_map(m):
+    if m is None:
+        return 'NULL'
+    if not isinstance(m, dict):
+        return 'BAD_MAP:' + type(m).__name__
+    return '{' + ','.join(f'{k}:{m[k]}' for k in sorted(m)) + '}'
+
+def format_array_map(arr):
+    if arr is None:
+        return 'NULL'
+    return '[' + ','.join(format_map(item) for item in arr) + ']'
+
+def format_map_array_map(m):
+    if m is None:
+        return 'NULL'
+    if not isinstance(m, dict):
+        return 'BAD_MAP_ARRAY_MAP:' + type(m).__name__
+    return '{' + ','.join(f'{k}:{format_array_map(m[k])}' for k in sorted(m)) 
+ '}'
+
+def format_attrs(attrs):
+    if attrs is None:
+        return 'NULL'
+    if not isinstance(attrs, dict):
+        return 'BAD_ATTRS:' + type(attrs).__name__
+    parts = []
+    for key in sorted(attrs):
+        val = attrs[key]
+        if val is None:
+            parts.append(f'{key}:NULL')
+        else:
+            parts.append(f'{key}:[' + ','.join(str(x) for x in val) + ']')
+    return '{' + ','.join(parts) + '}'
+
+def format_struct(s):
+    if s is None:
+        return 'NULL'
+    if not isinstance(s, dict):
+        return 'BAD_STRUCT:' + type(s).__name__
+    return '(' + str(s.get('label')) + ',' + format_array_map(s.get('maps')) + 
',' + format_attrs(s.get('attrs')) + ')'
+
+def format_map_struct_nested(m):
+    if m is None:
+        return 'NULL'
+    if not isinstance(m, dict):
+        return 'BAD_MAP_STRUCT:' + type(m).__name__
+    parts = []
+    for key in sorted(m):
+        val = m[key]
+        if val is None:
+            parts.append(f'{key}:NULL')
+        elif not isinstance(val, dict):
+            parts.append(f'{key}:BAD_STRUCT:' + type(val).__name__)
+        else:
+            parts.append(f'{key}:(' + str(val.get('tag')) + ',' + 
format_array_map(val.get('metrics')) + ')')
+    return '{' + ','.join(parts) + '}'
+
+def evaluate(array_maps: pd.Series, map_array_maps: pd.Series, struct_nesteds: 
pd.Series, map_struct_nesteds: pd.Series) -> pd.Series:
+    if not all(isinstance(arg, pd.Series) for arg in [array_maps, 
map_array_maps, struct_nesteds, map_struct_nesteds]):
+        return pd.Series(['BAD_VECTOR_ARGS'] * len(array_maps))
+    result = []
+    for array_map, map_array_map, struct_nested, map_struct_nested in 
zip(array_maps, map_array_maps, struct_nesteds, map_struct_nesteds):
+        result.append('|'.join([
+            format_array_map(array_map),
+            format_map_array_map(map_array_map),
+            format_struct(struct_nested),
+            format_map_struct_nested(map_struct_nested),
+        ]))
+    return pd.Series(result)
+\$\$;
+    """
+
+    qt_vector_series_nested_complex """
+        SELECT py_nested_complex_vector_series(array_map, map_array_map, 
struct_nested, map_struct_nested) AS result
+        FROM test_pythonudf_nested_complex_type
+        ORDER BY id;
+    """
+
+    sql """
+        DROP FUNCTION IF EXISTS py_nested_complex_vector_mixed(
+            INT,
+            MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING, INT>>>>
+        );
+    """
+    sql """
+        CREATE FUNCTION py_nested_complex_vector_mixed(
+            INT,
+            MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING, INT>>>>
+        )
+        RETURNS STRING
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def format_map(m):
+    if m is None:
+        return 'NULL'
+    if not isinstance(m, dict):
+        return 'BAD_MAP:' + type(m).__name__
+    return '{' + ','.join(f'{k}:{m[k]}' for k in sorted(m)) + '}'
+
+def format_array_map(arr):
+    if arr is None:
+        return 'NULL'
+    return '[' + ','.join(format_map(item) for item in arr) + ']'
+
+def format_map_struct_nested(m):
+    if m is None:
+        return 'NULL'
+    if not isinstance(m, dict):
+        return 'BAD_MIXED_SCALAR:' + type(m).__name__
+    parts = []
+    for key in sorted(m):
+        val = m[key]
+        if val is None:
+            parts.append(f'{key}:NULL')
+        elif not isinstance(val, dict):
+            parts.append(f'{key}:BAD_STRUCT:' + type(val).__name__)
+        else:
+            parts.append(f'{key}:(' + str(val.get('tag')) + ',' + 
format_array_map(val.get('metrics')) + ')')
+    return '{' + ','.join(parts) + '}'
+
+def evaluate(ids: pd.Series, mixed_map_struct_nested) -> pd.Series:
+    if not isinstance(ids, pd.Series):
+        return pd.Series(['BAD_VECTOR_ARG'])
+    formatted = format_map_struct_nested(mixed_map_struct_nested)
+    return pd.Series([str(id_value) + '|' + formatted for id_value in ids])
+\$\$;
+    """
+
+    qt_vector_mixed_scalar_nested_complex """
+        SELECT py_nested_complex_vector_mixed(id, map_struct_nested) AS result
+        FROM test_pythonudf_nested_complex_type
+        ORDER BY id;
+    """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to