This is an automated email from the ASF dual-hosted git repository.
hello-stephen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b653831c9fc [fix](function) deduplicate map keys after string-to-map
cast (#63713)
b653831c9fc is described below
commit b653831c9fc7ad6a182b4bdfdc028c0134448c59
Author: Mryange <[email protected]>
AuthorDate: Fri May 29 10:42:08 2026 +0800
[fix](function) deduplicate map keys after string-to-map cast (#63713)
### What problem does this PR solve?
Problem Summary:
Casting a JSON string with duplicated object keys to MAP kept all
duplicated entries because the string-to-complex cast path returned the
generic wrapper directly and skipped ColumnMap::deduplicate_keys(). This
made string-to-map casts inconsistent with MAP constructor semantics
where the last value wins.
Reproduction SQL:
```sql
SELECT CAST('{"a":1,"a":2}' AS MAP<STRING,INT>);
SELECT size(CAST('{"a":1,"a":2}' AS MAP<STRING,INT>));
SELECT element_at(CAST('{"a":1,"a":2}' AS MAP<STRING,INT>), 'a');
SELECT CAST('{"outer":{"a":1,"a":2}}' AS MAP<STRING, MAP<STRING, INT>>);
SELECT element_at(element_at(CAST('{"outer":{"a":1,"a":2}}' AS MAP<STRING,
MAP<STRING, INT>>), 'outer'), 'a');
SELECT map('a',1,'a',2);
SELECT size(map('a',1,'a',2));
SELECT element_at(map('a',1,'a',2), 'a');
```
Before this fix:
```text
{"a":1, "a":2}
2
1
{"outer":{"a":1, "a":2}}
1
{"a":2}
1
2
```
After this fix:
```text
{"a":2}
1
2
{"outer":{"a":2}}
2
{"a":2}
1
2
```
---
be/src/exprs/function/cast/cast_to_map.h | 48 +++++++++++++++++++---
.../basic-elements/data-types/map-md.out | 15 +++++++
.../basic-elements/data-types/map-md.groovy | 10 +++++
3 files changed, 68 insertions(+), 5 deletions(-)
diff --git a/be/src/exprs/function/cast/cast_to_map.h
b/be/src/exprs/function/cast/cast_to_map.h
index abfec1baa92..44a7576a6cc 100644
--- a/be/src/exprs/function/cast/cast_to_map.h
+++ b/be/src/exprs/function/cast/cast_to_map.h
@@ -21,15 +21,53 @@
#include "exprs/function/cast/cast_base.h"
namespace doris::CastWrapper {
+
+inline Status deduplicate_map_keys_in_result(Block& block, uint32_t result) {
+ auto result_column_name = block.get_by_position(result).column->get_name();
+ auto mutable_result_column =
IColumn::mutate(std::move(block.get_by_position(result).column));
+
+ if (auto* nullable_column =
check_and_get_column<ColumnNullable>(*mutable_result_column)) {
+ auto nested_column =
IColumn::mutate(nullable_column->get_nested_column_ptr());
+ auto* map_column = check_and_get_column<ColumnMap>(*nested_column);
+ if (!map_column) {
+ return Status::RuntimeError("Illegal column {} for function CAST
AS MAP",
+ result_column_name);
+ }
+
+ RETURN_IF_ERROR(map_column->deduplicate_keys(true));
+ ColumnPtr nested_column_ptr = std::move(nested_column);
+ nullable_column->change_nested_column(nested_column_ptr);
+ } else {
+ auto* map_column =
check_and_get_column<ColumnMap>(*mutable_result_column);
+ if (!map_column) {
+ return Status::RuntimeError("Illegal column {} for function CAST
AS MAP",
+ result_column_name);
+ }
+
+ RETURN_IF_ERROR(map_column->deduplicate_keys(true));
+ }
+
+ block.get_by_position(result).column = std::move(mutable_result_column);
+ return Status::OK();
+}
+
+inline WrapperType wrap_string_to_map_wrapper(WrapperType wrapper) {
+ return [wrapper = std::move(wrapper)](FunctionContext* context, Block&
block,
+ const ColumnNumbers& arguments,
uint32_t result,
+ size_t input_rows_count,
+ const NullMap::value_type* null_map
= nullptr) {
+ RETURN_IF_ERROR(wrapper(context, block, arguments, result,
input_rows_count, null_map));
+ return deduplicate_map_keys_in_result(block, result);
+ };
+}
+
//TODO(Amory) . Need support more cast for key , value for map
WrapperType create_map_wrapper(FunctionContext* context, const DataTypePtr&
from_type,
const DataTypeMap& to_type) {
if (is_string_type(from_type->get_primitive_type())) {
- if (context->enable_strict_mode()) {
- return cast_from_string_to_complex_type_strict_mode;
- } else {
- return cast_from_string_to_complex_type;
- }
+ auto wrapper = context->enable_strict_mode() ?
cast_from_string_to_complex_type_strict_mode
+ :
cast_from_string_to_complex_type;
+ return wrap_string_to_map_wrapper(wrapper);
}
const auto* from = check_and_get_data_type<DataTypeMap>(from_type.get());
if (!from) {
diff --git
a/regression-test/data/doc/sql-manual/basic-elements/data-types/map-md.out
b/regression-test/data/doc/sql-manual/basic-elements/data-types/map-md.out
index a9030f76745..db9b0c74427 100644
--- a/regression-test/data/doc/sql-manual/basic-elements/data-types/map-md.out
+++ b/regression-test/data/doc/sql-manual/basic-elements/data-types/map-md.out
@@ -77,6 +77,21 @@
-- !sql --
{"key1":1, "key2":2}
+-- !sql --
+{"a":2}
+
+-- !sql --
+1
+
+-- !sql --
+2
+
+-- !sql --
+{"outer":{"a":2}}
+
+-- !sql --
+2
+
-- !sql --
{"key1":1, "key2":2} 2
diff --git
a/regression-test/suites/doc/sql-manual/basic-elements/data-types/map-md.groovy
b/regression-test/suites/doc/sql-manual/basic-elements/data-types/map-md.groovy
index a48f63efa00..4c9e6b4771a 100644
---
a/regression-test/suites/doc/sql-manual/basic-elements/data-types/map-md.groovy
+++
b/regression-test/suites/doc/sql-manual/basic-elements/data-types/map-md.groovy
@@ -159,6 +159,16 @@ suite("map-md", "p0") {
qt_sql """ SELECT CAST('{"key1":1,"key2":2}' AS MAP<STRING, INT>) """
+ qt_sql """ SELECT CAST('{"a":1,"a":2}' AS MAP<STRING, INT>) """
+
+ qt_sql """ SELECT size(CAST('{"a":1,"a":2}' AS MAP<STRING, INT>)) """
+
+ qt_sql """ SELECT element_at(CAST('{"a":1,"a":2}' AS MAP<STRING, INT>),
'a') """
+
+ qt_sql """ SELECT CAST('{"outer":{"a":1,"a":2}}' AS MAP<STRING,
MAP<STRING, INT>>) """
+
+ qt_sql """ SELECT element_at(element_at(CAST('{"outer":{"a":1,"a":2}}' AS
MAP<STRING, MAP<STRING, INT>>), 'outer'), 'a') """
+
sql """ DROP TABLE IF EXISTS ${tableName}; """
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]