This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new e51f75e424c [FIX](map)fix map with rowstore table (#28877)
e51f75e424c is described below
commit e51f75e424c16b69a59c9350d5182c960a1c38b9
Author: amory <[email protected]>
AuthorDate: Sat Dec 23 12:11:06 2023 +0800
[FIX](map)fix map with rowstore table (#28877)
---
be/src/vec/columns/column_map.cpp | 2 +-
be/test/vec/jsonb/serialize_test.cpp | 157 +++++++++++++++++++++
.../query/test_nested_type_with_rowstore.out | 15 ++
.../datatype_p0/nested_types/query/varchar.tsv | 2 +
.../query/test_nested_type_with_rowstore.groovy | 58 ++++++++
5 files changed, 233 insertions(+), 1 deletion(-)
diff --git a/be/src/vec/columns/column_map.cpp
b/be/src/vec/columns/column_map.cpp
index d4b64f8c163..4d831e01f9e 100644
--- a/be/src/vec/columns/column_map.cpp
+++ b/be/src/vec/columns/column_map.cpp
@@ -217,7 +217,7 @@ StringRef ColumnMap::serialize_value_into_arena(size_t n,
Arena& arena, char con
const char* ColumnMap::deserialize_and_insert_from_arena(const char* pos) {
size_t array_size = unaligned_load<size_t>(pos);
- pos += 2 * sizeof(array_size);
+ pos += sizeof(array_size);
for (size_t i = 0; i < array_size; ++i) {
pos = get_keys().deserialize_and_insert_from_arena(pos);
diff --git a/be/test/vec/jsonb/serialize_test.cpp
b/be/test/vec/jsonb/serialize_test.cpp
index 880694c8d9f..f918e4be2d6 100644
--- a/be/test/vec/jsonb/serialize_test.cpp
+++ b/be/test/vec/jsonb/serialize_test.cpp
@@ -44,8 +44,10 @@
#include "vec/columns/column_array.h"
#include "vec/columns/column_complex.h"
#include "vec/columns/column_decimal.h"
+#include "vec/columns/column_map.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
+#include "vec/columns/column_struct.h"
#include "vec/columns/column_vector.h"
#include "vec/core/block.h"
#include "vec/core/column_with_type_and_name.h"
@@ -56,9 +58,11 @@
#include "vec/data_types/data_type_bitmap.h"
#include "vec/data_types/data_type_decimal.h"
#include "vec/data_types/data_type_hll.h"
+#include "vec/data_types/data_type_map.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
+#include "vec/data_types/data_type_struct.h"
#include "vec/data_types/data_type_time_v2.h"
#include "vec/data_types/serde/data_type_serde.h"
#include "vec/runtime/vdatetime_value.h"
@@ -177,6 +181,159 @@ TEST(BlockSerializeTest, Array) {
EXPECT_EQ(block.dump_data(), new_block.dump_data());
}
+TEST(BlockSerializeTest, Map) {
+ TabletSchema schema;
+ TabletColumn map;
+ map.set_name("m");
+ map.set_unique_id(1);
+ map.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
+ schema.append_column(map);
+ // map string string
+ DataTypePtr s =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+ DataTypePtr d =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+ DataTypePtr m = std::make_shared<DataTypeMap>(s, d);
+ Array k1, k2, v1, v2;
+ k1.push_back("null");
+ k1.push_back("doris");
+ k1.push_back("clever amory");
+ v1.push_back("ss");
+ v1.push_back(Null());
+ v1.push_back("NULL");
+ k2.push_back("hello amory");
+ k2.push_back("NULL");
+ k2.push_back("cute amory");
+ k2.push_back("doris");
+ v2.push_back("s");
+ v2.push_back("0");
+ v2.push_back("sf");
+ v2.push_back(Null());
+ Map m1, m2;
+ m1.push_back(k1);
+ m1.push_back(v1);
+ m2.push_back(k2);
+ m2.push_back(v2);
+ MutableColumnPtr map_column = m->create_column();
+ map_column->reserve(2);
+ map_column->insert(m1);
+ map_column->insert(m2);
+ vectorized::ColumnWithTypeAndName type_and_name(map_column->get_ptr(), m,
"test_map");
+ vectorized::Block block;
+ block.insert(type_and_name);
+
+ MutableColumnPtr col = ColumnString::create();
+ // serialize
+ std::cout << "serialize to jsonb" << std::endl;
+ JsonbSerializeUtil::block_to_jsonb(schema, block,
static_cast<ColumnString&>(*col.get()),
+ block.columns(),
+
create_data_type_serdes(block.get_data_types()));
+ // deserialize
+ TupleDescriptor read_desc(PTupleDescriptor(), true);
+ // slot
+ TSlotDescriptor tslot;
+ tslot.__set_colName("m");
+ tslot.nullIndicatorBit = -1;
+ tslot.nullIndicatorByte = 0;
+ TypeDescriptor type_desc(TYPE_MAP);
+ type_desc.children.push_back(TypeDescriptor(TYPE_STRING));
+ type_desc.children.push_back(TypeDescriptor(TYPE_INT));
+ type_desc.contains_nulls.push_back(true);
+ type_desc.contains_nulls.push_back(true);
+ tslot.__set_col_unique_id(1);
+ tslot.__set_slotType(type_desc.to_thrift());
+ SlotDescriptor* slot = new SlotDescriptor(tslot);
+ read_desc.add_slot(slot);
+
+ Block new_block = block.clone_empty();
+ std::unordered_map<uint32_t, uint32_t> col_uid_to_idx;
+ std::vector<std::string> default_values;
+ default_values.resize(read_desc.slots().size());
+ for (int i = 0; i < read_desc.slots().size(); ++i) {
+ col_uid_to_idx[read_desc.slots()[i]->col_unique_id()] = i;
+ default_values[i] = read_desc.slots()[i]->col_default_value();
+ std::cout << "uid " << read_desc.slots()[i]->col_unique_id() << ":" <<
i << std::endl;
+ }
+ std::cout << block.dump_data() << std::endl;
+ std::cout << new_block.dump_data() << std::endl;
+ std::cout << "deserialize from jsonb" << std::endl;
+
JsonbSerializeUtil::jsonb_to_block(create_data_type_serdes(read_desc.slots()),
+ static_cast<ColumnString&>(*col.get()),
col_uid_to_idx,
+ new_block, default_values);
+ std::cout << block.dump_data() << std::endl;
+ std::cout << new_block.dump_data() << std::endl;
+ EXPECT_EQ(block.dump_data(), new_block.dump_data());
+}
+
+TEST(BlockSerializeTest, Struct) {
+ TabletSchema schema;
+ TabletColumn struct_col;
+ struct_col.set_name("struct");
+ struct_col.set_unique_id(1);
+ struct_col.set_type(FieldType::OLAP_FIELD_TYPE_STRUCT);
+ schema.append_column(struct_col);
+ vectorized::Block block;
+ {
+ DataTypePtr s =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+ DataTypePtr d =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt128>());
+ DataTypePtr m =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>());
+ DataTypePtr st =
std::make_shared<DataTypeStruct>(std::vector<DataTypePtr> {s, d, m});
+ Tuple t1, t2;
+ t1.push_back(String("amory cute"));
+ t1.push_back(__int128_t(37));
+ t1.push_back(true);
+ t2.push_back("null");
+ t2.push_back(__int128_t(26));
+ t2.push_back(false);
+ MutableColumnPtr struct_column = st->create_column();
+ struct_column->reserve(2);
+ struct_column->insert(t1);
+ struct_column->insert(t2);
+ vectorized::ColumnWithTypeAndName
type_and_name(struct_column->get_ptr(), st,
+ "test_struct");
+ block.insert(type_and_name);
+ }
+
+ MutableColumnPtr col = ColumnString::create();
+ // serialize
+ std::cout << "serialize to jsonb" << std::endl;
+ JsonbSerializeUtil::block_to_jsonb(schema, block,
static_cast<ColumnString&>(*col.get()),
+ block.columns(),
+
create_data_type_serdes(block.get_data_types()));
+ // deserialize
+ TupleDescriptor read_desc(PTupleDescriptor(), true);
+ // slot
+ TSlotDescriptor tslot;
+ tslot.__set_colName("struct");
+ tslot.nullIndicatorBit = -1;
+ tslot.nullIndicatorByte = 0;
+ TypeDescriptor type_desc(TYPE_STRUCT);
+ type_desc.add_sub_type(TYPE_STRING, "name", true);
+ type_desc.add_sub_type(TYPE_LARGEINT, "age", true);
+ type_desc.add_sub_type(TYPE_BOOLEAN, "is", true);
+ tslot.__set_col_unique_id(1);
+ tslot.__set_slotType(type_desc.to_thrift());
+ SlotDescriptor* slot = new SlotDescriptor(tslot);
+ read_desc.add_slot(slot);
+
+ Block new_block = block.clone_empty();
+ std::unordered_map<uint32_t, uint32_t> col_uid_to_idx;
+ std::vector<std::string> default_values;
+ default_values.resize(read_desc.slots().size());
+ for (int i = 0; i < read_desc.slots().size(); ++i) {
+ col_uid_to_idx[read_desc.slots()[i]->col_unique_id()] = i;
+ default_values[i] = read_desc.slots()[i]->col_default_value();
+ std::cout << "uid " << read_desc.slots()[i]->col_unique_id() << ":" <<
i << std::endl;
+ }
+ std::cout << block.dump_data() << std::endl;
+ std::cout << new_block.dump_data() << std::endl;
+ std::cout << "deserialize from jsonb" << std::endl;
+
JsonbSerializeUtil::jsonb_to_block(create_data_type_serdes(read_desc.slots()),
+ static_cast<ColumnString&>(*col.get()),
col_uid_to_idx,
+ new_block, default_values);
+ std::cout << block.dump_data() << std::endl;
+ std::cout << new_block.dump_data() << std::endl;
+ EXPECT_EQ(block.dump_data(), new_block.dump_data());
+}
+
TEST(BlockSerializeTest, JsonbBlock) {
vectorized::Block block;
TabletSchema schema;
diff --git
a/regression-test/data/datatype_p0/nested_types/query/test_nested_type_with_rowstore.out
b/regression-test/data/datatype_p0/nested_types/query/test_nested_type_with_rowstore.out
new file mode 100644
index 00000000000..29cebe3421d
--- /dev/null
+++
b/regression-test/data/datatype_p0/nested_types/query/test_nested_type_with_rowstore.out
@@ -0,0 +1,15 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+1 doris1 {"jsonk1":123,"jsonk2":456} [100, 200] {"k1":10}
{"a": 1, "b": 2}
+2 doris2 {"jsonk3":333,"jsonk4":444} [300, 400] {"k2":20}
{"a": 3, "b": 4}
+
+-- !sql --
+1 doris1 {"jsonk1":123,"jsonk2":456} [100, 200] {"k1":10}
{"a": 1, "b": 2}
+
+-- !sql --
+1 apache doris {"jsonk1":123,"jsonk2":456} [100, 200]
{"k1":10} {"a": 1, "b": 2}
+2 apache doris 2.0 {"jsonk3":333,"jsonk4":444} [300, 400]
{"k2":20} {"a": 3, "b": 4}
+
+-- !sql --
+1 apache doris {"jsonk1":123,"jsonk2":456} [100, 200]
{"k1":10} {"a": 1, "b": 2}
+
diff --git a/regression-test/data/datatype_p0/nested_types/query/varchar.tsv
b/regression-test/data/datatype_p0/nested_types/query/varchar.tsv
new file mode 100644
index 00000000000..d7f794dd4ac
--- /dev/null
+++ b/regression-test/data/datatype_p0/nested_types/query/varchar.tsv
@@ -0,0 +1,2 @@
+1 apache doris
+2 apache doris 2.0
diff --git
a/regression-test/suites/datatype_p0/nested_types/query/test_nested_type_with_rowstore.groovy
b/regression-test/suites/datatype_p0/nested_types/query/test_nested_type_with_rowstore.groovy
new file mode 100644
index 00000000000..4f8d0e741eb
--- /dev/null
+++
b/regression-test/suites/datatype_p0/nested_types/query/test_nested_type_with_rowstore.groovy
@@ -0,0 +1,58 @@
+import org.apache.commons.lang3.StringUtils
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_nested_type_with_rowstore") {
+ // this test case aim to test nested type with old planner
+ sql """set enable_nereids_planner=false"""
+ sql """ DROP TABLE IF EXISTS ct_table;"""
+ sql """CREATE TABLE ct_table ( `id` int(11) NOT NULL COMMENT "用户 ID",
`c_varchar` varchar(65533) NULL COMMENT "用户姓名", `c_jsonb` JSONB NULL, `c_array`
ARRAY<INT> NULL, `c_map` MAP<STRING, INT> NULL, `c_struct` STRUCT<a:INT, b:INT>
NULL) UNIQUE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES("replication_num" = "1", "enable_unique_key_merge_on_write" =
"true", "store_row_column" = "true");"""
+
+ sql """ insert into ct_table values(2, "doris2", '{"jsonk3": 333,
"jsonk4": 444}', [300, 400], {"k2": 20}, {3, 4});"""
+ sql """ insert into ct_table values(1, "doris1", '{"jsonk1": 123,
"jsonk2": 456}', [100, 200], {"k1": 10}, {1, 2});"""
+
+ qt_sql """ select * from ct_table order by id;"""
+ // point sql
+ qt_sql """ select * from ct_table where id = 1"""
+
+ // column refresh
+ streamLoad {
+ table "ct_table"
+ time 10000
+ set 'partial_columns', 'true'
+ set 'strict_mode', 'false'
+ set 'columns', 'id,c_varchar'
+ file 'varchar.tsv'
+
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals("success", json.Status.toLowerCase())
+ assertEquals(2, json.NumberTotalRows)
+ assertEquals(0, json.NumberFilteredRows)
+ }
+ }
+
+ // select and check
+ qt_sql """ select * from ct_table order by id;"""
+ // point sql
+ qt_sql """ select * from ct_table where id = 1"""
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]