This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 3cd7b888682 [Fix](Variant) fix variant with empty key (#35671)
3cd7b888682 is described below
commit 3cd7b8886823f0cac761f1bae2d15c81ea7865bd
Author: lihangyu <[email protected]>
AuthorDate: Thu May 30 19:55:25 2024 +0800
[Fix](Variant) fix variant with empty key (#35671)
in some senario empty key will cause crash like
```
*** tablet *** SIGSEGV unknown detail explain (@0x0) received by PID
1527747 (
TID 1544788 OR 0x7f3302988700) from PID 0; stack trace: ***
0# doris::signal::(anonymous namespace)::FailureSignalHandler(int,
siginfo_t*
, void*) at /mnt/disk2/lihangyu/doris/be/src/common/signal_handler.h:429
1# 0x00007F4880A12B50 in /lib64/libc.so.6
2#
doris::vectorized::PathInDataBuilder::append(std::basic_string_view<char,
std::char_traits<char> >, bool) at
/mnt/disk2/lihangyu/doris/be/src/vec/json/p
ath_in_data.cpp:193
3# doris::vectorized::JSONDataParser<doris::vectorized::SimdJSONParser,
false
>::traverseObject(doris::vectorized::SimdJSONParser::Object const&,
doris::vec
torized::JSONDataParser<doris::vectorized::SimdJSONParser,
false>::ParseContex
t&) at /mnt/disk2/lihangyu/doris/be/src/vec/json/json_parser.cpp:121
4# doris::vectorized::JSONDataParser<doris::vectorized::SimdJSONParser,
false
>::traverse(doris::vectorized::SimdJSONParser::Element const&,
doris::vectoriz
ed::JSONDataParser<doris::vectorized::SimdJSONParser,
false>::ParseContext&) a
t /mnt/disk2/lihangyu/doris/be/src/vec/json/json_parser.cpp:95
5# doris::vectorized::JSONDataParser<doris::vectorized::SimdJSONParser,
false
>::parse(char const*, unsigned long) at
/mnt/disk2/lihangyu/doris/be/src/vec/j
son/json_parser.cpp:81
```
## Proposed changes
Issue Number: close #xxx
<!--Describe your changes.-->
---
be/src/vec/json/parse2column.cpp | 6 +++++-
be/src/vec/json/path_in_data.cpp | 10 ++++------
regression-test/data/variant_p0/column_name.out | 17 +++++++++++++++-
.../suites/variant_p0/column_name.groovy | 23 +++++++++++++++++++++-
4 files changed, 47 insertions(+), 9 deletions(-)
diff --git a/be/src/vec/json/parse2column.cpp b/be/src/vec/json/parse2column.cpp
index 0f61e24dad7..a154ad14333 100644
--- a/be/src/vec/json/parse2column.cpp
+++ b/be/src/vec/json/parse2column.cpp
@@ -163,7 +163,11 @@ void parse_json_to_variant(IColumn& column, const char*
src, size_t length,
throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Failed to
find sub column {}",
paths[i].get_path());
}
- DCHECK_EQ(subcolumn->size(), old_num_rows);
+ if (subcolumn->size() != old_num_rows) {
+ throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
+ "subcolumn {} size missmatched, may
contains duplicated entry",
+ paths[i].get_path());
+ }
subcolumn->insert(std::move(values[i]), std::move(field_info));
}
// /// Insert default values to missed subcolumns.
diff --git a/be/src/vec/json/path_in_data.cpp b/be/src/vec/json/path_in_data.cpp
index 4b3692f4776..30c9ff21768 100644
--- a/be/src/vec/json/path_in_data.cpp
+++ b/be/src/vec/json/path_in_data.cpp
@@ -188,13 +188,11 @@ PathInDataBuilder&
PathInDataBuilder::append(std::string_view key, bool is_array
if (parts.empty()) {
current_anonymous_array_level += is_array;
}
- if (!key.empty()) {
- if (!parts.empty()) {
- parts.back().is_nested = is_array;
- }
- parts.emplace_back(key, false, current_anonymous_array_level);
- current_anonymous_array_level = 0;
+ if (!parts.empty()) {
+ parts.back().is_nested = is_array;
}
+ parts.emplace_back(key, false, current_anonymous_array_level);
+ current_anonymous_array_level = 0;
return *this;
}
PathInDataBuilder& PathInDataBuilder::append(const PathInData::Parts& path,
bool is_array) {
diff --git a/regression-test/data/variant_p0/column_name.out
b/regression-test/data/variant_p0/column_name.out
index 2942c8d53bc..7e1f23d4ac8 100644
--- a/regression-test/data/variant_p0/column_name.out
+++ b/regression-test/data/variant_p0/column_name.out
@@ -3,7 +3,7 @@
中文 unicode
-- !sql --
-""
+
-- !sql --
\N
@@ -32,3 +32,18 @@ UPPER CASE lower case
-- !sql --
1 {"tag_key1":123456}
+-- !sql --
+\N
+\N
+\N
+\N
+\N
+""
+1234566
+16
+8888888
+"UPPER CASE"
+"dkdkdkdkdkd"
+"ooaoaaaaaaa"
+"xmxxmmmmmm"
+
diff --git a/regression-test/suites/variant_p0/column_name.groovy
b/regression-test/suites/variant_p0/column_name.groovy
index be0026e9c94..26520aafa50 100644
--- a/regression-test/suites/variant_p0/column_name.groovy
+++ b/regression-test/suites/variant_p0/column_name.groovy
@@ -35,7 +35,7 @@ suite("regression_test_variant_column_name", "variant_type"){
// sql """insert into ${table_name} values (2, '{}')"""
sql "truncate table ${table_name}"
sql """insert into ${table_name} values (3, '{"": ""}')"""
- qt_sql """select v[''] from ${table_name} order by k"""
+ qt_sql """select cast(v[''] as text) from ${table_name} order by k"""
sql """insert into ${table_name} values (4, '{"!@#^&*()": "11111"}')"""
qt_sql """select cast(v["!@#^&*()"] as string) from ${table_name} order by
k"""
sql """insert into ${table_name} values (5, '{"123": "456", "789":
"012"}')"""
@@ -50,4 +50,25 @@ suite("regression_test_variant_column_name", "variant_type"){
qt_sql "select * from var_column_name where tags['tag_key1'] is not null
and cast(tags['tag_key1' ] as text) = '123456' order by k desc limit 1;"
qt_sql "select * from var_column_name where Tags['tag_key1'] is not null
and cast(tags['tag_key1' ] as text) = '123456' order by k desc limit 1;"
qt_sql "select * from var_column_name where Tags['tag_key1'] is not null
and cast(Tags['tag_key1' ] as text) = '123456' order by k desc limit 1;"
+
+ // empty key
+ sql """insert into var_column_name values (7, '{"": "UPPER CASE"}')"""
+ sql """
+ insert into var_column_name values (7,
'{"":16,"OpenCapStatus":0,"AccStatus":1,"AccTimeSum":481,"LowVoltage":0,"TowedStatus":0,"EncryptLng":117.23572361077638,"deviceId":"A1100614808888"}')
+ """
+ sql """insert into var_column_name values (7, '{"": ""}')"""
+ sql """insert into var_column_name values (7, '{"": "dkdkdkdkdkd"}')"""
+ sql """insert into var_column_name values (7, '{"": "xmxxmmmmmm"}')"""
+ sql """insert into var_column_name values (7, '{"": "ooaoaaaaaaa"}')"""
+ sql """insert into var_column_name values (7, '{"": 1234566}')"""
+ sql """insert into var_column_name values (7, '{"": 8888888}')"""
+
+ qt_sql "select Tags[''] from var_column_name order by cast(Tags[''] as
string)"
+
+ try {
+ sql """insert into var_column_name values (7, '{"": "UPPER CASE", "":
"lower case"}')"""
+ } catch(Exception ex) {
+ logger.info("""INSERT INTO ${table_name} failed: """ + ex)
+ assertTrue(ex.toString().contains("may contains duplicated entry"));
+ }
}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]