This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 3cd7b888682 [Fix](Variant) fix variant with empty key (#35671)
3cd7b888682 is described below

commit 3cd7b8886823f0cac761f1bae2d15c81ea7865bd
Author: lihangyu <[email protected]>
AuthorDate: Thu May 30 19:55:25 2024 +0800

    [Fix](Variant) fix variant with empty key (#35671)
    
    in some senario empty key will cause crash like
    
    ```
    *** tablet *** SIGSEGV unknown detail explain (@0x0) received by PID 
1527747 (
    TID 1544788 OR 0x7f3302988700) from PID 0; stack trace: ***
     0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, 
siginfo_t*
    , void*) at /mnt/disk2/lihangyu/doris/be/src/common/signal_handler.h:429
     1# 0x00007F4880A12B50 in /lib64/libc.so.6
     2# 
doris::vectorized::PathInDataBuilder::append(std::basic_string_view<char,
    std::char_traits<char> >, bool) at 
/mnt/disk2/lihangyu/doris/be/src/vec/json/p
    ath_in_data.cpp:193
     3# doris::vectorized::JSONDataParser<doris::vectorized::SimdJSONParser, 
false
    >::traverseObject(doris::vectorized::SimdJSONParser::Object const&, 
doris::vec
    torized::JSONDataParser<doris::vectorized::SimdJSONParser, 
false>::ParseContex
    t&) at /mnt/disk2/lihangyu/doris/be/src/vec/json/json_parser.cpp:121
     4# doris::vectorized::JSONDataParser<doris::vectorized::SimdJSONParser, 
false
    >::traverse(doris::vectorized::SimdJSONParser::Element const&, 
doris::vectoriz
    ed::JSONDataParser<doris::vectorized::SimdJSONParser, 
false>::ParseContext&) a
    t /mnt/disk2/lihangyu/doris/be/src/vec/json/json_parser.cpp:95
     5# doris::vectorized::JSONDataParser<doris::vectorized::SimdJSONParser, 
false
    >::parse(char const*, unsigned long) at 
/mnt/disk2/lihangyu/doris/be/src/vec/j
    son/json_parser.cpp:81
    ```
    
    ## Proposed changes
    
    Issue Number: close #xxx
    
    <!--Describe your changes.-->
---
 be/src/vec/json/parse2column.cpp                   |  6 +++++-
 be/src/vec/json/path_in_data.cpp                   | 10 ++++------
 regression-test/data/variant_p0/column_name.out    | 17 +++++++++++++++-
 .../suites/variant_p0/column_name.groovy           | 23 +++++++++++++++++++++-
 4 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/be/src/vec/json/parse2column.cpp b/be/src/vec/json/parse2column.cpp
index 0f61e24dad7..a154ad14333 100644
--- a/be/src/vec/json/parse2column.cpp
+++ b/be/src/vec/json/parse2column.cpp
@@ -163,7 +163,11 @@ void parse_json_to_variant(IColumn& column, const char* 
src, size_t length,
             throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Failed to 
find sub column {}",
                                    paths[i].get_path());
         }
-        DCHECK_EQ(subcolumn->size(), old_num_rows);
+        if (subcolumn->size() != old_num_rows) {
+            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
+                                   "subcolumn {} size missmatched, may 
contains duplicated entry",
+                                   paths[i].get_path());
+        }
         subcolumn->insert(std::move(values[i]), std::move(field_info));
     }
     // /// Insert default values to missed subcolumns.
diff --git a/be/src/vec/json/path_in_data.cpp b/be/src/vec/json/path_in_data.cpp
index 4b3692f4776..30c9ff21768 100644
--- a/be/src/vec/json/path_in_data.cpp
+++ b/be/src/vec/json/path_in_data.cpp
@@ -188,13 +188,11 @@ PathInDataBuilder& 
PathInDataBuilder::append(std::string_view key, bool is_array
     if (parts.empty()) {
         current_anonymous_array_level += is_array;
     }
-    if (!key.empty()) {
-        if (!parts.empty()) {
-            parts.back().is_nested = is_array;
-        }
-        parts.emplace_back(key, false, current_anonymous_array_level);
-        current_anonymous_array_level = 0;
+    if (!parts.empty()) {
+        parts.back().is_nested = is_array;
     }
+    parts.emplace_back(key, false, current_anonymous_array_level);
+    current_anonymous_array_level = 0;
     return *this;
 }
 PathInDataBuilder& PathInDataBuilder::append(const PathInData::Parts& path, 
bool is_array) {
diff --git a/regression-test/data/variant_p0/column_name.out 
b/regression-test/data/variant_p0/column_name.out
index 2942c8d53bc..7e1f23d4ac8 100644
--- a/regression-test/data/variant_p0/column_name.out
+++ b/regression-test/data/variant_p0/column_name.out
@@ -3,7 +3,7 @@
 中文     unicode
 
 -- !sql --
-""
+
 
 -- !sql --
 \N
@@ -32,3 +32,18 @@ UPPER CASE   lower case
 -- !sql --
 1      {"tag_key1":123456}
 
+-- !sql --
+\N
+\N
+\N
+\N
+\N
+""
+1234566
+16
+8888888
+"UPPER CASE"
+"dkdkdkdkdkd"
+"ooaoaaaaaaa"
+"xmxxmmmmmm"
+
diff --git a/regression-test/suites/variant_p0/column_name.groovy 
b/regression-test/suites/variant_p0/column_name.groovy
index be0026e9c94..26520aafa50 100644
--- a/regression-test/suites/variant_p0/column_name.groovy
+++ b/regression-test/suites/variant_p0/column_name.groovy
@@ -35,7 +35,7 @@ suite("regression_test_variant_column_name", "variant_type"){
     // sql """insert into ${table_name} values (2, '{}')"""
     sql "truncate table ${table_name}"
     sql """insert into ${table_name} values (3, '{"": ""}')"""
-    qt_sql """select v[''] from ${table_name} order by k"""
+    qt_sql """select cast(v[''] as text) from ${table_name} order by k"""
     sql """insert into ${table_name} values (4, '{"!@#^&*()": "11111"}')"""
     qt_sql """select cast(v["!@#^&*()"] as string) from ${table_name} order by 
k"""
     sql """insert into ${table_name} values (5, '{"123": "456", "789": 
"012"}')"""
@@ -50,4 +50,25 @@ suite("regression_test_variant_column_name", "variant_type"){
     qt_sql "select * from var_column_name where tags['tag_key1'] is not null 
and cast(tags['tag_key1' ] as text) = '123456' order by k desc limit 1;"    
     qt_sql "select * from var_column_name where Tags['tag_key1'] is not null 
and cast(tags['tag_key1' ] as text) = '123456' order by k desc limit 1;"    
     qt_sql "select * from var_column_name where Tags['tag_key1'] is not null 
and cast(Tags['tag_key1' ] as text) = '123456' order by k desc limit 1;"    
+
+    // empty key
+    sql """insert into var_column_name values (7, '{"": "UPPER CASE"}')"""
+    sql """
+        insert into var_column_name values (7, 
'{"":16,"OpenCapStatus":0,"AccStatus":1,"AccTimeSum":481,"LowVoltage":0,"TowedStatus":0,"EncryptLng":117.23572361077638,"deviceId":"A1100614808888"}')
+    """
+    sql """insert into var_column_name values (7, '{"": ""}')"""
+    sql """insert into var_column_name values (7, '{"": "dkdkdkdkdkd"}')"""
+    sql """insert into var_column_name values (7, '{"": "xmxxmmmmmm"}')"""
+    sql """insert into var_column_name values (7, '{"": "ooaoaaaaaaa"}')"""
+    sql """insert into var_column_name values (7, '{"": 1234566}')"""
+    sql """insert into var_column_name values (7, '{"": 8888888}')"""
+
+    qt_sql "select Tags[''] from var_column_name order by cast(Tags[''] as 
string)"
+
+    try {
+        sql """insert into var_column_name values (7, '{"": "UPPER CASE", "": 
"lower case"}')"""
+    } catch(Exception ex) {
+        logger.info("""INSERT INTO ${table_name} failed: """ + ex)
+        assertTrue(ex.toString().contains("may contains duplicated entry"));
+    }
 }
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to