This is an automated email from the ASF dual-hosted git repository.

hello-stephen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new b653831c9fc [fix](function) deduplicate map keys after string-to-map 
cast (#63713)
b653831c9fc is described below

commit b653831c9fc7ad6a182b4bdfdc028c0134448c59
Author: Mryange <[email protected]>
AuthorDate: Fri May 29 10:42:08 2026 +0800

    [fix](function) deduplicate map keys after string-to-map cast (#63713)
    
    ### What problem does this PR solve?
    
    
    Problem Summary:
    Casting a JSON string with duplicated object keys to MAP kept all
    duplicated entries because the string-to-complex cast path returned the
    generic wrapper directly and skipped ColumnMap::deduplicate_keys(). This
    made string-to-map casts inconsistent with MAP constructor semantics
    where the last value wins.
    
    Reproduction SQL:
    
    ```sql
    SELECT CAST('{"a":1,"a":2}' AS MAP<STRING,INT>);
    SELECT size(CAST('{"a":1,"a":2}' AS MAP<STRING,INT>));
    SELECT element_at(CAST('{"a":1,"a":2}' AS MAP<STRING,INT>), 'a');
    
    SELECT CAST('{"outer":{"a":1,"a":2}}' AS MAP<STRING, MAP<STRING, INT>>);
    SELECT element_at(element_at(CAST('{"outer":{"a":1,"a":2}}' AS MAP<STRING, 
MAP<STRING, INT>>), 'outer'), 'a');
    
    SELECT map('a',1,'a',2);
    SELECT size(map('a',1,'a',2));
    SELECT element_at(map('a',1,'a',2), 'a');
    ```
    
    Before this fix:
    
    ```text
    {"a":1, "a":2}
    2
    1
    
    {"outer":{"a":1, "a":2}}
    1
    
    {"a":2}
    1
    2
    ```
    
    After this fix:
    
    ```text
    {"a":2}
    1
    2
    
    {"outer":{"a":2}}
    2
    
    {"a":2}
    1
    2
    ```
---
 be/src/exprs/function/cast/cast_to_map.h           | 48 +++++++++++++++++++---
 .../basic-elements/data-types/map-md.out           | 15 +++++++
 .../basic-elements/data-types/map-md.groovy        | 10 +++++
 3 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/be/src/exprs/function/cast/cast_to_map.h 
b/be/src/exprs/function/cast/cast_to_map.h
index abfec1baa92..44a7576a6cc 100644
--- a/be/src/exprs/function/cast/cast_to_map.h
+++ b/be/src/exprs/function/cast/cast_to_map.h
@@ -21,15 +21,53 @@
 #include "exprs/function/cast/cast_base.h"
 
 namespace doris::CastWrapper {
+
+inline Status deduplicate_map_keys_in_result(Block& block, uint32_t result) {
+    auto result_column_name = block.get_by_position(result).column->get_name();
+    auto mutable_result_column = 
IColumn::mutate(std::move(block.get_by_position(result).column));
+
+    if (auto* nullable_column = 
check_and_get_column<ColumnNullable>(*mutable_result_column)) {
+        auto nested_column = 
IColumn::mutate(nullable_column->get_nested_column_ptr());
+        auto* map_column = check_and_get_column<ColumnMap>(*nested_column);
+        if (!map_column) {
+            return Status::RuntimeError("Illegal column {} for function CAST 
AS MAP",
+                                        result_column_name);
+        }
+
+        RETURN_IF_ERROR(map_column->deduplicate_keys(true));
+        ColumnPtr nested_column_ptr = std::move(nested_column);
+        nullable_column->change_nested_column(nested_column_ptr);
+    } else {
+        auto* map_column = 
check_and_get_column<ColumnMap>(*mutable_result_column);
+        if (!map_column) {
+            return Status::RuntimeError("Illegal column {} for function CAST 
AS MAP",
+                                        result_column_name);
+        }
+
+        RETURN_IF_ERROR(map_column->deduplicate_keys(true));
+    }
+
+    block.get_by_position(result).column = std::move(mutable_result_column);
+    return Status::OK();
+}
+
+inline WrapperType wrap_string_to_map_wrapper(WrapperType wrapper) {
+    return [wrapper = std::move(wrapper)](FunctionContext* context, Block& 
block,
+                                          const ColumnNumbers& arguments, 
uint32_t result,
+                                          size_t input_rows_count,
+                                          const NullMap::value_type* null_map 
= nullptr) {
+        RETURN_IF_ERROR(wrapper(context, block, arguments, result, 
input_rows_count, null_map));
+        return deduplicate_map_keys_in_result(block, result);
+    };
+}
+
 //TODO(Amory) . Need support more cast for key , value for map
 WrapperType create_map_wrapper(FunctionContext* context, const DataTypePtr& 
from_type,
                                const DataTypeMap& to_type) {
     if (is_string_type(from_type->get_primitive_type())) {
-        if (context->enable_strict_mode()) {
-            return cast_from_string_to_complex_type_strict_mode;
-        } else {
-            return cast_from_string_to_complex_type;
-        }
+        auto wrapper = context->enable_strict_mode() ? 
cast_from_string_to_complex_type_strict_mode
+                                                     : 
cast_from_string_to_complex_type;
+        return wrap_string_to_map_wrapper(wrapper);
     }
     const auto* from = check_and_get_data_type<DataTypeMap>(from_type.get());
     if (!from) {
diff --git 
a/regression-test/data/doc/sql-manual/basic-elements/data-types/map-md.out 
b/regression-test/data/doc/sql-manual/basic-elements/data-types/map-md.out
index a9030f76745..db9b0c74427 100644
--- a/regression-test/data/doc/sql-manual/basic-elements/data-types/map-md.out
+++ b/regression-test/data/doc/sql-manual/basic-elements/data-types/map-md.out
@@ -77,6 +77,21 @@
 -- !sql --
 {"key1":1, "key2":2}
 
+-- !sql --
+{"a":2}
+
+-- !sql --
+1
+
+-- !sql --
+2
+
+-- !sql --
+{"outer":{"a":2}}
+
+-- !sql --
+2
+
 -- !sql --
 {"key1":1, "key2":2}   2
 
diff --git 
a/regression-test/suites/doc/sql-manual/basic-elements/data-types/map-md.groovy 
b/regression-test/suites/doc/sql-manual/basic-elements/data-types/map-md.groovy
index a48f63efa00..4c9e6b4771a 100644
--- 
a/regression-test/suites/doc/sql-manual/basic-elements/data-types/map-md.groovy
+++ 
b/regression-test/suites/doc/sql-manual/basic-elements/data-types/map-md.groovy
@@ -159,6 +159,16 @@ suite("map-md", "p0") {
 
     qt_sql """ SELECT CAST('{"key1":1,"key2":2}' AS MAP<STRING, INT>) """
 
+    qt_sql """ SELECT CAST('{"a":1,"a":2}' AS MAP<STRING, INT>) """
+
+    qt_sql """ SELECT size(CAST('{"a":1,"a":2}' AS MAP<STRING, INT>)) """
+
+    qt_sql """ SELECT element_at(CAST('{"a":1,"a":2}' AS MAP<STRING, INT>), 
'a') """
+
+    qt_sql """ SELECT CAST('{"outer":{"a":1,"a":2}}' AS MAP<STRING, 
MAP<STRING, INT>>) """
+
+    qt_sql """ SELECT element_at(element_at(CAST('{"outer":{"a":1,"a":2}}' AS 
MAP<STRING, MAP<STRING, INT>>), 'outer'), 'a') """
+
     sql """ DROP TABLE IF EXISTS ${tableName}; """
     sql """
         CREATE TABLE IF NOT EXISTS ${tableName} (


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to