This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b4b5445bf1e [Fix](Variant) fix variant lost null info after
`cast_column` (#30153)
b4b5445bf1e is described below
commit b4b5445bf1eddfde831538a8a9a1d84aef17004a
Author: lihangyu <[email protected]>
AuthorDate: Fri Jan 26 14:02:26 2024 +0800
[Fix](Variant) fix variant lost null info after `cast_column` (#30153)
This could result incorrect output in hirachinal cases
```
sql """insert into ${table_name} values (-3, '{"a" : 1, "b" : 1.5, "c" :
[1, 2, 3]}')"""
sql """insert into ${table_name} select -2, '{"a": 11245, "b" : [123,
{"xx" : 1}], "c" : {"c" : 456, "d" : "null", "e" : 7.111}}' as json_str
union all select -1, '{"a": 1123}' as json_str union all
select *, '{"a" : 1234, "xxxx" : "kaana"}' as json_str from numbers("number" =
"4096") limit 4096 ;"""
mysql> select v["c"] from var_rs where k = -3 or k = -2;
+----------------------+
| element_at(`v`, 'c') |
+----------------------+
| [1,2,3] |
| [] |
+----------------------+
2 rows in set (0.04 sec)
```
---
be/src/exprs/json_functions.cpp | 2 +-
be/src/vec/common/schema_util.cpp | 23 +++++++++-
be/src/vec/data_types/serde/data_type_serde.cpp | 2 +-
.../data/variant_p0/variant_hirachinal.out | 21 ++++++++++
.../suites/variant_p0/variant_hirachinal.groovy | 49 ++++++++++++++++++++++
5 files changed, 94 insertions(+), 3 deletions(-)
diff --git a/be/src/exprs/json_functions.cpp b/be/src/exprs/json_functions.cpp
index ace18f1090f..724c78922e8 100644
--- a/be/src/exprs/json_functions.cpp
+++ b/be/src/exprs/json_functions.cpp
@@ -333,7 +333,7 @@ void JsonFunctions::merge_objects(rapidjson::Value&
dst_object, rapidjson::Value
for (auto src_it = src_object.MemberBegin(); src_it !=
src_object.MemberEnd(); ++src_it) {
auto dst_it = dst_object.FindMember(src_it->name);
if (dst_it != dst_object.MemberEnd()) {
- if (src_it->value.IsObject()) {
+ if (src_it->value.IsObject() && dst_it->value.IsObject()) {
merge_objects(dst_it->value, src_it->value, allocator);
} else {
if (dst_it->value.IsNull()) {
diff --git a/be/src/vec/common/schema_util.cpp
b/be/src/vec/common/schema_util.cpp
index 0ceddf25b3f..dc6f65584f2 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -152,6 +152,23 @@ Status cast_column(const ColumnWithTypeAndName& arg, const
DataTypePtr& type, Co
Block tmp_block {arguments};
size_t result_column = tmp_block.columns();
auto ctx = FunctionContext::create_context(nullptr, {}, {});
+
+ // To prevent from null info lost, we should not call function since the
function framework will wrap
+ // nullable to Variant instead of the root of Variant
+ // correct output: Nullable(Array(int)) ->
Nullable(Variant(Nullable(Array(int))))
+ // incorrect output: Nullable(Array(int)) -> Nullable(Variant(Array(int)))
+ if (WhichDataType(remove_nullable(type)).is_variant_type()) {
+ // set variant root column/type to from column/type
+ auto variant = ColumnObject::create(true /*always nullable*/);
+ CHECK(arg.column->is_nullable());
+ variant->create_root(arg.type, arg.column->assume_mutable());
+ ColumnPtr nullable = ColumnNullable::create(
+ variant->get_ptr(),
+
check_and_get_column<ColumnNullable>(arg.column.get())->get_null_map_column_ptr());
+ *result = type->is_nullable() ? nullable : variant->get_ptr();
+ return Status::OK();
+ }
+
// We convert column string to jsonb type just add a string jsonb field to
dst column instead of parse
// each line in original string column.
ctx->set_string_as_jsonb_string(true);
@@ -159,6 +176,8 @@ Status cast_column(const ColumnWithTypeAndName& arg, const
DataTypePtr& type, Co
RETURN_IF_ERROR(
function->execute(ctx.get(), tmp_block, {0}, result_column,
arg.column->size()));
*result =
tmp_block.get_by_position(result_column).column->convert_to_full_column_if_const();
+ VLOG_DEBUG << fmt::format("{} before convert {}, after convert {}",
arg.name,
+ arg.column->get_name(), (*result)->get_name());
return Status::OK();
}
@@ -488,7 +507,9 @@ Status extract(ColumnPtr source, const PathInData& path,
MutableColumnPtr& dst)
size_t result_column = tmp_block.columns();
tmp_block.insert({nullptr, json_type, ""});
RETURN_IF_ERROR(function->execute(nullptr, tmp_block, argnum,
result_column, source->size()));
- dst = tmp_block.get_by_position(result_column).column->assume_mutable();
+ dst = tmp_block.get_by_position(result_column)
+ .column->convert_to_full_column_if_const()
+ ->assume_mutable();
return Status::OK();
}
// ---------------------------
diff --git a/be/src/vec/data_types/serde/data_type_serde.cpp
b/be/src/vec/data_types/serde/data_type_serde.cpp
index 32466c47e84..62ba805b82f 100644
--- a/be/src/vec/data_types/serde/data_type_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_serde.cpp
@@ -45,8 +45,8 @@ DataTypeSerDeSPtrs create_data_type_serdes(const
std::vector<SlotDescriptor*>& s
void DataTypeSerDe::convert_array_to_rapidjson(const vectorized::Array& array,
rapidjson::Value& target,
rapidjson::Document::AllocatorType& allocator) {
+ target.SetArray();
for (const vectorized::Field& item : array) {
- target.SetArray();
rapidjson::Value val;
convert_field_to_rapidjson(item, val, allocator);
target.PushBack(val, allocator);
diff --git a/regression-test/data/variant_p0/variant_hirachinal.out
b/regression-test/data/variant_p0/variant_hirachinal.out
new file mode 100644
index 00000000000..0f6db58969a
--- /dev/null
+++ b/regression-test/data/variant_p0/variant_hirachinal.out
@@ -0,0 +1,21 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+-3 {"a":1,"b":1.5,"c":[1,2,3]}
+-2 {"a":11245,"b":[123,{"xx":1}],"c":{"c":456,"d":"null","e":7.111}}
+-1 {"a":1123}
+0 {"a":1234,"xxxx":"kaana"}
+1 {"a":1234,"xxxx":"kaana"}
+2 {"a":1234,"xxxx":"kaana"}
+3 {"a":1234,"xxxx":"kaana"}
+4 {"a":1234,"xxxx":"kaana"}
+5 {"a":1234,"xxxx":"kaana"}
+6 {"a":1234,"xxxx":"kaana"}
+
+-- !sql --
+[1,2,3]
+{"c":456,"d":"null","e":7.111}
+
+-- !sql --
+1.5
+[123,{"xx":1}]
+
diff --git a/regression-test/suites/variant_p0/variant_hirachinal.groovy
b/regression-test/suites/variant_p0/variant_hirachinal.groovy
new file mode 100644
index 00000000000..5ad12cdbeea
--- /dev/null
+++ b/regression-test/suites/variant_p0/variant_hirachinal.groovy
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("regression_test_variant_hirachinal", "variant_type"){
+ def set_be_config = { key, value ->
+ String backend_id;
+ def backendId_to_backendIP = [:]
+ def backendId_to_backendHttpPort = [:]
+ getBackendIpHttpPort(backendId_to_backendIP,
backendId_to_backendHttpPort);
+
+ backend_id = backendId_to_backendIP.keySet()[0]
+ def (code, out, err) =
update_be_config(backendId_to_backendIP.get(backend_id),
backendId_to_backendHttpPort.get(backend_id), key, value)
+ logger.info("update config: code=" + code + ", out=" + out + ", err="
+ err)
+ }
+
+ def table_name = "var_rs"
+ sql "DROP TABLE IF EXISTS ${table_name}"
+
+ sql """
+ CREATE TABLE IF NOT EXISTS ${table_name} (
+ k bigint,
+ v variant
+ )
+ DUPLICATE KEY(`k`)
+ DISTRIBUTED BY HASH(k) BUCKETS 1
+ properties("replication_num" = "1", "disable_auto_compaction" =
"false");
+ """
+ sql "set experimental_enable_nereids_planner = false"
+ sql """insert into ${table_name} values (-3, '{"a" : 1, "b" : 1.5, "c" :
[1, 2, 3]}')"""
+ sql """insert into ${table_name} select -2, '{"a": 11245, "b" : [123,
{"xx" : 1}], "c" : {"c" : 456, "d" : "null", "e" : 7.111}}' as json_str
+ union all select -1, '{"a": 1123}' as json_str union all select
*, '{"a" : 1234, "xxxx" : "kaana"}' as json_str from numbers("number" = "4096")
limit 4096 ;"""
+ qt_sql "select * from ${table_name} order by k limit 10"
+ qt_sql "select v['c'] from ${table_name} where k = -3 or k = -2"
+ qt_sql "select v['b'] from ${table_name} where k = -3 or k = -2"
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]