This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 98119b95dfb [fix](variant) return raw string for element_at on 
scalar-string variant (#64103)
98119b95dfb is described below

commit 98119b95dfbcd62556961b9b41337a8c4d777681
Author: Chenyang Sun <[email protected]>
AuthorDate: Mon Jun 8 10:15:31 2026 +0800

    [fix](variant) return raw string for element_at on scalar-string variant 
(#64103)
---
 be/src/exprs/function/function_variant_element.cpp |  9 +++++
 .../function/function_variant_element_test.cpp     | 38 ++++++++++++++++++++++
 .../data/variant_p0/sql/select_from_value.out      |  2 +-
 .../suites/variant_p0/element_function.groovy      | 28 ++++++++++++++++
 4 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/be/src/exprs/function/function_variant_element.cpp 
b/be/src/exprs/function/function_variant_element.cpp
index 4736342f4ee..c2d984885de 100644
--- a/be/src/exprs/function/function_variant_element.cpp
+++ b/be/src/exprs/function/function_variant_element.cpp
@@ -388,6 +388,15 @@ private:
             }
             break;
         }
+        case simdjson::ondemand::json_type::string: {
+            // Extract the raw (unescaped) string value rather than its JSON
+            // representation. simdjson::to_json_string would keep the 
surrounding
+            // double quotes (e.g. "2026-05-20"), which leaks into the result 
and
+            // makes scalar-string variants inconsistent with structured ones.
+            std::string_view value_str = value.get_string().value();
+            column->insert_data(value_str.data(), value_str.length());
+            break;
+        }
         default: {
             auto value_str = simdjson::to_json_string(value).value();
             column->insert_data(value_str.data(), value_str.length());
diff --git a/be/test/exprs/function/function_variant_element_test.cpp 
b/be/test/exprs/function/function_variant_element_test.cpp
index 1a8d6985167..7db931af14b 100644
--- a/be/test/exprs/function/function_variant_element_test.cpp
+++ b/be/test/exprs/function/function_variant_element_test.cpp
@@ -61,4 +61,42 @@ TEST(function_variant_element_test, 
extract_from_sparse_column) {
     EXPECT_EQ(result_string, "{\"age\":\"John\",\"name\":\"John\"}");
 }
 
+// CIR-20498: extracting a string property from a scalar-string-root variant
+// (the shape produced by `cast(text as variant)`) must return the raw string,
+// not its JSON token with surrounding double quotes.
+TEST(function_variant_element_test, extract_string_from_scalar_root) {
+    auto variant_column = ColumnVariant::create(0 /*max_subcolumns_count*/, 
false);
+    auto root_column = ColumnString::create();
+    std::string doc = R"({"wsn":"SRFSPXFDVY","uploadTimeValue":"2026-05-20 
18:40:02","n":49.98})";
+    root_column->insert_data(doc.data(), doc.size());
+    variant_column->create_root(std::make_shared<DataTypeString>(), 
std::move(root_column));
+    variant_column->set_num_rows(1);
+    ASSERT_TRUE(variant_column->is_scalar_variant());
+
+    DataTypeSerDe::FormatOptions options;
+    auto tz = cctz::utc_time_zone();
+    options.timezone = &tz;
+
+    auto extract = [&](const std::string& key) {
+        ColumnPtr index_inner = ColumnString::create();
+        assert_cast<ColumnString*>(index_inner->assert_mutable().get())
+                ->insert_data(key.data(), key.size());
+        ColumnPtr index_column = ColumnConst::create(index_inner, 1);
+        ColumnPtr result;
+        auto status =
+                FunctionVariantElement::get_element_column(*variant_column, 
index_column, &result);
+        EXPECT_TRUE(status.ok());
+        std::string out;
+        assert_cast<const ColumnVariant&>(*result.get())
+                .serialize_one_row_to_string(0, &out, options);
+        return out;
+    };
+
+    // string values: no surrounding quotes
+    EXPECT_EQ(extract("wsn"), "SRFSPXFDVY");
+    EXPECT_EQ(extract("uploadTimeValue"), "2026-05-20 18:40:02");
+    // non-string scalars keep their JSON representation
+    EXPECT_EQ(extract("n"), "49.98");
+}
+
 } // namespace doris
diff --git a/regression-test/data/variant_p0/sql/select_from_value.out 
b/regression-test/data/variant_p0/sql/select_from_value.out
index ef562a658e9..1fe3c49651b 100644
--- a/regression-test/data/variant_p0/sql/select_from_value.out
+++ b/regression-test/data/variant_p0/sql/select_from_value.out
@@ -1,4 +1,4 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !select_from_value --
-"b"
+b
 
diff --git a/regression-test/suites/variant_p0/element_function.groovy 
b/regression-test/suites/variant_p0/element_function.groovy
index 7b5e55ea53b..fb183db9690 100644
--- a/regression-test/suites/variant_p0/element_function.groovy
+++ b/regression-test/suites/variant_p0/element_function.groovy
@@ -29,4 +29,32 @@ suite("regression_test_variant_element_at", "p0")  {
 
     sql """insert into element_fn_test values (1, '{"arr1" : [1, 2, 3]}', 
'{"arr2" : [4, 5, 6]}')"""
     qt_sql """select array_first((x,y) -> (x - y) < 0, cast(v['arr1'] as 
array<int>), cast(v1['arr2'] as array<int>)) from element_fn_test"""
+
+    // CIR-20498: extracting a string property from a scalar-string variant
+    // (e.g. `cast(text as variant)['key']`) must not leak the surrounding JSON
+    // double quotes. The root of such a variant is a raw JSON string, so the
+    // extraction goes through the simdjson document path; a string value must 
be
+    // returned unescaped, consistently with the structured-subcolumn path.
+    def scalar = sql """select 
cast('{"wsn":"SRFSPXFDVY","uploadTimeValue":"2026-05-20 18:40:02"}' as 
variant)['wsn']"""
+    assertEquals("SRFSPXFDVY", scalar[0][0])
+
+    def sub = sql """select substring(cast('{"uploadTimeValue":"2026-05-20 
18:40:02"}' as variant)['uploadTimeValue'], 1, 10)"""
+    assertEquals("2026-05-20", sub[0][0])
+
+    // values containing escaped characters must be unescaped, not kept as raw 
JSON tokens
+    def escaped = sql """select cast('{"k":"a\\\\"b"}' as variant)['k']"""
+    assertEquals("a\"b", escaped[0][0])
+
+    // non-string scalars keep their existing JSON representation
+    def num = sql """select cast('{"n":49.98}' as variant)['n']"""
+    assertEquals("49.98", num[0][0])
+
+    // array / object values must keep their JSON text representation (no 
unquoting):
+    // only the top-level string scalar is unquoted; quotes nested inside JSON 
are
+    // part of the value and must be preserved.
+    def arr = sql """select cast('{"a":[1,2,3]}' as variant)['a']"""
+    assertEquals("[1,2,3]", arr[0][0])
+
+    def obj = sql """select cast('{"o":{"name":"john"}}' as variant)['o']"""
+    assertEquals('{"name":"john"}', obj[0][0])
 }
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to