This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 2588d2ce11b branch-3.1: [regression-test](Variant) fix and add escaped 
chars cases #52657 (#52664)
2588d2ce11b is described below

commit 2588d2ce11b19ece8b7057397819c72805de1995
Author: lihangyu <[email protected]>
AuthorDate: Wed Jul 2 19:24:50 2025 +0800

    branch-3.1: [regression-test](Variant) fix and add escaped chars cases 
#52657 (#52664)
    
    cherry-pick from #52657
---
 be/src/vec/columns/column_object.cpp               |  13 ++--
 be/src/vec/columns/column_object.h                 |   3 +-
 regression-test/data/variant_p0/escaped_chars.out  | Bin 0 -> 1458 bytes
 .../data/variant_p0/predefine/delete_update.out    | Bin 931 -> 951 bytes
 .../suites/variant_p0/escaped_chars.groovy         |  71 +++++++++++++++++++++
 5 files changed, 77 insertions(+), 10 deletions(-)

diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index 79380b413f0..0a83f5cd298 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -1313,7 +1313,8 @@ const ColumnObject::Subcolumn* 
ColumnObject::get_subcolumn(const PathInData& key
     return &node->data;
 }
 
-size_t ColumnObject::Subcolumn::serialize_text_json(size_t n, BufferWritable& 
output) const {
+size_t ColumnObject::Subcolumn::serialize_text_json(size_t n, BufferWritable& 
output,
+                                                    
DataTypeSerDe::FormatOptions opt) const {
     if (least_common_type.get_base_type_id() == TypeIndex::Nothing) {
         output.write(DataTypeSerDe::NULL_IN_COMPLEX_TYPE.data(),
                      DataTypeSerDe::NULL_IN_COMPLEX_TYPE.size());
@@ -1328,7 +1329,6 @@ size_t 
ColumnObject::Subcolumn::serialize_text_json(size_t n, BufferWritable& ou
     }
 
     ind -= num_of_defaults_in_prefix;
-    DataTypeSerDe::FormatOptions opt;
     for (size_t i = 0; i < data.size(); ++i) {
         const auto& part = data[i];
         const auto& part_type_serde = data_serdes[i];
@@ -1497,7 +1497,6 @@ void ColumnObject::serialize_one_row_to_string(int row, 
std::string* output) con
         // TODO preallocate memory
         serialize_one_row_to_json_format(row, write_buffer, nullptr);
     }
-
     write_buffer.commit();
     auto str_ref = tmp_col->get_data_at(0);
     *output = std::string(str_ref.data, str_ref.size);
@@ -1739,14 +1738,14 @@ void 
ColumnObject::serialize_one_row_to_json_format(int64_t row_num, BufferWrita
         // Serialize value of current path.
         if (auto subcolumn_it = subcolumn_path_map.find(path);
             subcolumn_it != subcolumn_path_map.end()) {
-            subcolumn_it->second.serialize_text_json(row_num, output);
+            subcolumn_it->second.serialize_text_json(row_num, output, 
{.escape_char = '\\'});
         } else {
             // To serialize value stored in shared data we should first 
deserialize it from binary format.
             Subcolumn tmp_subcolumn(0, true);
             const auto& data = ColumnObject::deserialize_from_sparse_column(
                     sparse_data_values, index_in_sparse_data_values++);
             tmp_subcolumn.insert(data.first, data.second);
-            tmp_subcolumn.serialize_text_json(0, output);
+            tmp_subcolumn.serialize_text_json(0, output, {.escape_char = 
'\\'});
         }
     }
 
@@ -1755,10 +1754,6 @@ void 
ColumnObject::serialize_one_row_to_json_format(int64_t row_num, BufferWrita
         writeChar('}', output);
     }
     writeChar('}', output);
-#ifndef NDEBUG
-    // check if it is a valid json
-#endif
-    return;
 }
 
 size_t ColumnObject::Subcolumn::get_non_null_value_size() const {
diff --git a/be/src/vec/columns/column_object.h 
b/be/src/vec/columns/column_object.h
index 69e9d08d62c..5551a10659a 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -144,7 +144,8 @@ public:
 
         size_t get_non_null_value_size() const;
 
-        size_t serialize_text_json(size_t n, BufferWritable& output) const;
+        size_t serialize_text_json(size_t n, BufferWritable& output,
+                                   DataTypeSerDe::FormatOptions opt = {}) 
const;
 
         const DataTypeSerDeSPtr& get_least_common_type_serde() const {
             return least_common_type.get_serde();
diff --git a/regression-test/data/variant_p0/escaped_chars.out 
b/regression-test/data/variant_p0/escaped_chars.out
new file mode 100644
index 00000000000..5a8d9f410c5
Binary files /dev/null and b/regression-test/data/variant_p0/escaped_chars.out 
differ
diff --git a/regression-test/data/variant_p0/predefine/delete_update.out 
b/regression-test/data/variant_p0/predefine/delete_update.out
index 3fab0479cac..2bcfda75ba3 100644
Binary files a/regression-test/data/variant_p0/predefine/delete_update.out and 
b/regression-test/data/variant_p0/predefine/delete_update.out differ
diff --git a/regression-test/suites/variant_p0/escaped_chars.groovy 
b/regression-test/suites/variant_p0/escaped_chars.groovy
new file mode 100644
index 00000000000..8e5a32f6803
--- /dev/null
+++ b/regression-test/suites/variant_p0/escaped_chars.groovy
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("regression_test_variant_escaped_chars", "p0"){
+    def tableName = "variant_escape_chars"
+
+    sql """ DROP TABLE IF EXISTS variant_escape_chars """
+
+    sql """
+        CREATE TABLE IF NOT EXISTS variant_escape_chars (
+            `id` INT,
+            `description` VARIANT 
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        COMMENT 'This is a test table with escape characters in description'
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES (
+            "replication_num" = "1"
+        );
+    """
+
+    sql """
+        INSERT INTO variant_escape_chars VALUES
+        (1, '{"a" : 123, "b" : "test with escape \\\\" characters"}'),
+        (2, '{"a" : 456, "b" : "another test with escape \\\\\\\\ 
characters"}'),
+        (3, '{"a" : 789, "b" : "test with single quote \\\' characters"}'),
+        (4, '{"a" : 101112, "b" : "test with newline \\\\n characters"}'),
+        (5, '{"a" : 131415, "b" : "test with tab \\\\t characters"}'),
+        (6, '{"a" : 161718, "b" : "test with backslash \\\\b characters"}');
+    """
+
+    // test json value with escaped characters
+    qt_select """ SELECT * FROM variant_escape_chars ORDER BY id """
+    qt_select """ SELECT description['b'] FROM variant_escape_chars ORDER BY 
id """
+    qt_select """ SELECT CAST(description['b'] AS TEXT) FROM 
variant_escape_chars ORDER BY id """
+
+    sql """
+        drop table if exists t01;
+        create table t01(id int, b json, c json, d variant, e variant) 
properties ("replication_num" = "1");
+        insert into t01 values (1, '{"c_json":{"a":"a\\\\nb"}}', '{"c_json": 
{"quote":"\\\\"Helvetica tofu try-hard gluten-free gentrify leggings.\\\\" - 
Remington Trantow"}}', '{"c_json": {"quote":"\\\\"Helvetica tofu try-hard 
gluten-free gentrify leggings.\\\\" - Remington Trantow"}}', 
'{"c_json":{"a":"a\\\\nb"}}');
+    """
+    qt_select """ SELECT * FROM t01 """
+    qt_select """select json_extract(b, "\$.c_json"), e["c_json"] from t01;"""
+
+    // test json keys with escaped characters, FIXED in 3.1.0
+    sql "truncate table variant_escape_chars"
+    sql """
+        INSERT INTO variant_escape_chars VALUES
+        (1, '{"test with escape \\\\" characters" : 123}'),
+        (2, '{"another test with escape \\\\\\\\ characters" : 123}'),
+        (3, '{"test with single quote \\\' characters" : 123}'),
+        (4, '{"test with newline \\\\n characters":123}'),
+        (5, '{"test with tab \\\\t characters" : 123}'),
+        (6, '{"test with backslash \\\\b characters" : 123}');
+    """
+    qt_select """ SELECT * FROM variant_escape_chars ORDER BY id """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to