This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/variant-sparse by this push:
new 23a520dae92 [fix](ut) fix variant_column_writer_reader_test.cpp
column_object_test.cpp
23a520dae92 is described below
commit 23a520dae92f192a4f6935f8e8de19d4b2c143ad
Author: Sun Chenyang <[email protected]>
AuthorDate: Thu Apr 24 23:25:44 2025 +0800
[fix](ut) fix variant_column_writer_reader_test.cpp column_object_test.cpp
---
be/src/olap/rowset/segment_v2/column_reader.h | 2 +-
.../variant_column_writer_reader_test.cpp | 1326 ++++++++---------
be/test/vec/columns/column_object_test.cpp | 1568 ++++++++++----------
3 files changed, 1453 insertions(+), 1443 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h
b/be/src/olap/rowset/segment_v2/column_reader.h
index c0c6ca06882..363bc010f16 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -89,7 +89,7 @@ struct ColumnReaderOptions {
int be_exec_version = -1;
- const TabletSchemaSPtr tablet_schema = nullptr;
+ TabletSchemaSPtr tablet_schema = nullptr;
};
struct ColumnIteratorOptions {
diff --git
a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
index 0138d83adb5..d1c20ca2306 100644
--- a/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
+++ b/be/test/olap/rowset/segment_v2/variant_column_writer_reader_test.cpp
@@ -1,663 +1,663 @@
-// // Licensed to the Apache Software Foundation (ASF) under one
-// // or more contributor license agreements. See the NOTICE file
-// // distributed with this work for additional information
-// // regarding copyright ownership. The ASF licenses this file
-// // to you under the Apache License, Version 2.0 (the
-// // "License"); you may not use this file except in compliance
-// // with the License. You may obtain a copy of the License at
-// //
-// // http://www.apache.org/licenses/LICENSE-2.0
-// //
-// // Unless required by applicable law or agreed to in writing,
-// // software distributed under the License is distributed on an
-// // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// // KIND, either express or implied. See the License for the
-// // specific language governing permissions and limitations
-// // under the License.
-//
-// #include "gtest/gtest.h"
-// #include "olap/rowset/segment_v2/column_reader.h"
-// #include "olap/rowset/segment_v2/hierarchical_data_reader.h"
-// #include "olap/rowset/segment_v2/variant_column_writer_impl.h"
-// #include "olap/storage_engine.h"
-// #include "testutil/schema_utils.h"
-// #include "testutil/variant_util.h"
-//
-// using namespace doris::vectorized;
-//
-// namespace doris {
-//
-// constexpr static uint32_t MAX_PATH_LEN = 1024;
-// constexpr static std::string_view dest_dir =
"/ut_dir/variant_column_writer_test";
-// constexpr static std::string_view tmp_dir = "./ut_dir/tmp";
-//
-// class VariantColumnWriterReaderTest : public testing::Test {
-// public:
-// void SetUp() override {
-// // absolute dir
-// char buffer[MAX_PATH_LEN];
-// EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr);
-// _current_dir = std::string(buffer);
-// _absolute_dir = _current_dir + std::string(dest_dir);
-//
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
-//
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_absolute_dir).ok());
-//
-// // tmp dir
-//
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
-//
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tmp_dir).ok());
-// std::vector<StorePath> paths;
-// paths.emplace_back(std::string(tmp_dir), 1024000000);
-// auto tmp_file_dirs =
std::make_unique<segment_v2::TmpFileDirs>(paths);
-// Status st = tmp_file_dirs->init();
-// EXPECT_TRUE(st.ok()) << st.to_json();
-// ExecEnv::GetInstance()->set_tmp_file_dir(std::move(tmp_file_dirs));
-//
-// // storage engine
-// doris::EngineOptions options;
-// auto engine = std::make_unique<StorageEngine>(options);
-// _engine_ref = engine.get();
-// _data_dir = std::make_unique<DataDir>(*_engine_ref, _absolute_dir);
-// static_cast<void>(_data_dir->update_capacity());
-// ExecEnv::GetInstance()->set_storage_engine(std::move(engine));
-// }
-//
-// void TearDown() override {
-//
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
-//
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
-// _engine_ref = nullptr;
-// ExecEnv::GetInstance()->set_storage_engine(nullptr);
-// }
-//
-// VariantColumnWriterReaderTest() = default;
-// ~VariantColumnWriterReaderTest() override = default;
-//
-// private:
-// TabletSchemaSPtr _tablet_schema = nullptr;
-// StorageEngine* _engine_ref = nullptr;
-// std::unique_ptr<DataDir> _data_dir = nullptr;
-// TabletSharedPtr _tablet = nullptr;
-// std::string _absolute_dir;
-// std::string _current_dir;
-// };
-//
-// void check_column_meta(const ColumnMetaPB& column_meta, auto&
path_with_size) {
-// EXPECT_TRUE(column_meta.has_column_path_info());
-// auto path = std::make_shared<vectorized::PathInData>();
-// path->from_protobuf(column_meta.column_path_info());
-// EXPECT_EQ(column_meta.column_path_info().parrent_column_unique_id(), 1);
-// EXPECT_EQ(column_meta.none_null_size(),
path_with_size[path->copy_pop_front().get_path()]);
-// }
-//
-// void check_sparse_column_meta(const ColumnMetaPB& column_meta, auto&
path_with_size) {
-// EXPECT_TRUE(column_meta.has_column_path_info());
-// auto path = std::make_shared<vectorized::PathInData>();
-// path->from_protobuf(column_meta.column_path_info());
-// EXPECT_EQ(column_meta.column_path_info().parrent_column_unique_id(), 1);
-// for (const auto& [path, size] :
-// column_meta.variant_statistics().sparse_column_non_null_size()) {
-// EXPECT_EQ(size, path_with_size[path]);
-// }
-// EXPECT_EQ(path->copy_pop_front().get_path(),
"__DORIS_VARIANT_SPARSE__");
-// }
-//
-// TEST_F(VariantColumnWriterReaderTest, test_write_data_normal) {
-// // 1. create tablet_schema
-// TabletSchemaPB schema_pb;
-// schema_pb.set_keys_type(KeysType::DUP_KEYS);
-// SchemaUtils::construct_column(schema_pb.add_column(), 1, "VARIANT",
"V1");
-// _tablet_schema = std::make_shared<TabletSchema>();
-// _tablet_schema->init_from_pb(schema_pb);
-//
-// // 2. create tablet
-// TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
-// tablet_meta->_tablet_id = 10000;
-// _tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta,
_data_dir.get());
-//
-// EXPECT_TRUE(_tablet->init().ok());
-//
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
-//
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
-//
-// // 3. create file_writer
-// io::FileWriterPtr file_writer;
-// auto file_path = local_segment_path(_tablet->tablet_path(), "0", 0);
-// auto st = io::global_local_filesystem()->create_file(file_path,
&file_writer);
-// EXPECT_TRUE(st.ok()) << st.msg();
-//
-// // 4. create column_writer
-// SegmentFooterPB footer;
-// ColumnWriterOptions opts;
-// opts.meta = footer.add_columns();
-// opts.compression_type = CompressionTypePB::LZ4;
-// opts.file_writer = file_writer.get();
-// opts.footer = &footer;
-// RowsetWriterContext rowset_ctx;
-// rowset_ctx.write_type = DataWriteType::TYPE_DIRECT;
-// opts.rowset_ctx = &rowset_ctx;
-// opts.rowset_ctx->tablet_schema = _tablet_schema;
-// TabletColumn column = _tablet_schema->column(0);
-// _init_column_meta(opts.meta, 0, column, CompressionTypePB::LZ4);
-//
-// std::unique_ptr<ColumnWriter> writer;
-// EXPECT_TRUE(ColumnWriter::create(opts, &column, file_writer.get(),
&writer).ok());
-// EXPECT_TRUE(writer->init().ok());
-// EXPECT_TRUE(assert_cast<VariantColumnWriter*>(writer.get()) != nullptr);
-//
-// // 5. write data
-// auto olap_data_convertor =
std::make_unique<vectorized::OlapBlockDataConvertor>();
-// auto block = _tablet_schema->create_block();
-// auto column_object =
(*std::move(block.get_by_position(0).column)).mutate();
-// std::unordered_map<int, std::string> inserted_jsonstr;
-// auto path_with_size =
-// VariantUtil::fill_object_column_with_test_data(column_object,
1000, &inserted_jsonstr);
-// olap_data_convertor->add_column_data_convertor(column);
-// olap_data_convertor->set_source_content(&block, 0, 1000);
-// auto [result, accessor] = olap_data_convertor->convert_column_data(0);
-// EXPECT_TRUE(result.ok());
-// EXPECT_TRUE(accessor != nullptr);
-// EXPECT_TRUE(writer->append(accessor->get_nullmap(),
accessor->get_data(), 1000).ok());
-// st = writer->finish();
-// EXPECT_TRUE(st.ok()) << st.msg();
-// st = writer->write_data();
-// EXPECT_TRUE(st.ok()) << st.msg();
-// st = writer->write_ordinal_index();
-// EXPECT_TRUE(st.ok()) << st.msg();
-// st = writer->write_zone_map();
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(file_writer->close().ok());
-// footer.set_num_rows(1000);
-//
-// // 6. check footer
-// EXPECT_EQ(footer.columns_size(), 5);
-// auto column_meta = footer.columns(0);
-// EXPECT_EQ(column_meta.type(), (int)FieldType::OLAP_FIELD_TYPE_VARIANT);
-//
-// for (int i = 1; i < footer.columns_size() - 1; ++i) {
-// auto column_meta = footer.columns(i);
-// check_column_meta(column_meta, path_with_size);
-// }
-// check_sparse_column_meta(footer.columns(footer.columns_size() - 1),
path_with_size);
-//
-// // 7. check variant reader
-// io::FileReaderSPtr file_reader;
-// st = io::global_local_filesystem()->open_file(file_path, &file_reader);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// ColumnReaderOptions read_opts;
-// std::unique_ptr<ColumnReader> column_reader;
-// st = ColumnReader::create(read_opts, footer, 0, 1000, file_reader,
&column_reader);
-// EXPECT_TRUE(st.ok()) << st.msg();
-//
-// auto variant_column_reader =
assert_cast<VariantColumnReader*>(column_reader.get());
-// EXPECT_TRUE(variant_column_reader != nullptr);
-//
-// auto subcolumn_reader =
variant_column_reader->get_reader_by_path(PathInData("key0"));
-// EXPECT_TRUE(subcolumn_reader != nullptr);
-// subcolumn_reader =
variant_column_reader->get_reader_by_path(PathInData("key1"));
-// EXPECT_TRUE(subcolumn_reader != nullptr);
-// subcolumn_reader =
variant_column_reader->get_reader_by_path(PathInData("key2"));
-// EXPECT_TRUE(subcolumn_reader != nullptr);
-//
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key3")));
-//
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key4")));
-//
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key5")));
-//
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key6")));
-//
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key7")));
-//
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key8")));
-//
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key9")));
-// auto size = variant_column_reader->get_metadata_size();
-// EXPECT_GT(size, 0);
-//
-// // 8. check statistics
-// auto statistics = variant_column_reader->get_stats();
-// for (const auto& [path, size] : statistics->subcolumns_non_null_size) {
-// EXPECT_EQ(path_with_size[path], size);
-// }
-// for (const auto& [path, size] :
statistics->sparse_column_non_null_size) {
-// EXPECT_EQ(path_with_size[path], size);
-// }
-//
-// // 9. check hier reader
-// ColumnIterator* it;
-// TabletColumn parent_column = _tablet_schema->column(0);
-// StorageReadOptions storage_read_opts;
-// storage_read_opts.io_ctx.reader_type = ReaderType::READER_QUERY;
-// st = variant_column_reader->new_iterator(&it, parent_column,
&storage_read_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
-// ColumnIteratorOptions column_iter_opts;
-// OlapReaderStatistics stats;
-// column_iter_opts.stats = &stats;
-// column_iter_opts.file_reader = file_reader.get();
-// st = it->init(column_iter_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-//
-// MutableColumnPtr new_column_object = ColumnObject::create(3);
-// size_t nrows = 1000;
-// st = it->seek_to_ordinal(0);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// st = it->next_batch(&nrows, new_column_object);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(stats.bytes_read > 0);
-//
-// for (int i = 0; i < 1000; ++i) {
-// std::string value;
-// st = assert_cast<ColumnObject*>(new_column_object.get())
-// ->serialize_one_row_to_string(i, &value);
-//
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_EQ(value, inserted_jsonstr[i]);
-// }
-//
-// std::vector<rowid_t> row_ids;
-// for (int i = 0; i < 1000; ++i) {
-// if (i % 7 == 0) {
-// row_ids.push_back(i);
-// }
-// }
-// new_column_object = ColumnObject::create(3);
-// st = it->read_by_rowids(row_ids.data(), row_ids.size(),
new_column_object);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// for (int i = 0; i < row_ids.size(); ++i) {
-// std::string value;
-// st = assert_cast<ColumnObject*>(new_column_object.get())
-// ->serialize_one_row_to_string(i, &value);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_EQ(value, inserted_jsonstr[row_ids[i]]);
-// }
-//
-// auto read_to_column_object = [&]() {
-// new_column_object = ColumnObject::create(3);
-// nrows = 1000;
-// st = it->seek_to_ordinal(0);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// st = it->next_batch(&nrows, new_column_object);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(stats.bytes_read > 0);
-// EXPECT_EQ(nrows, 1000);
-// };
-//
-// // 10. check sparse extract reader
-// for (int i = 3; i < 10; ++i) {
-// std::string key = ".key" + std::to_string(i);
-// TabletColumn subcolumn_in_sparse;
-// subcolumn_in_sparse.set_name(parent_column.name_lower_case() + key);
-// subcolumn_in_sparse.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
-// subcolumn_in_sparse.set_parent_unique_id(parent_column.unique_id());
-//
subcolumn_in_sparse.set_path_info(PathInData(parent_column.name_lower_case() +
key));
-// subcolumn_in_sparse.set_variant_max_subcolumns_count(
-// parent_column.variant_max_subcolumns_count());
-// subcolumn_in_sparse.set_is_nullable(true);
-//
-// st = variant_column_reader->new_iterator(&it, subcolumn_in_sparse,
&storage_read_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
-// st = it->init(column_iter_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-//
-// read_to_column_object();
-//
-// for (int row = 0; row < 1000; ++row) {
-// std::string value;
-// st = assert_cast<ColumnObject*>(new_column_object.get())
-// ->serialize_one_row_to_string(row, &value);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// if (inserted_jsonstr[row].find(key) != std::string::npos) {
-// if (i % 2 == 0) {
-// EXPECT_EQ(value, "88");
-// } else {
-// EXPECT_EQ(value, "str99");
-// }
-// }
-// }
-// }
-//
-// // 11. check leaf reader
-// auto check_leaf_reader = [&]() {
-// for (int i = 0; i < 3; ++i) {
-// std::string key = ".key" + std::to_string(i);
-// TabletColumn subcolumn;
-// subcolumn.set_name(parent_column.name_lower_case() + key);
-// subcolumn.set_type((FieldType)(int)footer.columns(i +
1).type());
-// subcolumn.set_parent_unique_id(parent_column.unique_id());
-//
subcolumn.set_path_info(PathInData(parent_column.name_lower_case() + key));
-// subcolumn.set_variant_max_subcolumns_count(
-// parent_column.variant_max_subcolumns_count());
-// subcolumn.set_is_nullable(true);
-//
-// st = variant_column_reader->new_iterator(&it, subcolumn,
&storage_read_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(assert_cast<FileColumnIterator*>(it) != nullptr);
-// st = it->init(column_iter_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-//
-// auto column_type =
DataTypeFactory::instance().create_data_type(subcolumn, false);
-// auto read_column = column_type->create_column();
-// nrows = 1000;
-// st = it->seek_to_ordinal(0);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// st = it->next_batch(&nrows, read_column);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(stats.bytes_read > 0);
-//
-// for (int row = 0; row < 1000; ++row) {
-// const std::string& value =
column_type->to_string(*read_column, row);
-// if (inserted_jsonstr[row].find(key) != std::string::npos) {
-// if (i % 2 == 0) {
-// EXPECT_EQ(value, "88");
-// } else {
-// EXPECT_EQ(value, "str99");
-// }
-// }
-// }
-// }
-// };
-// check_leaf_reader();
-//
-// // 12. check empty
-// TabletColumn subcolumn;
-// subcolumn.set_name(parent_column.name_lower_case() + ".key10");
-// subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
-// subcolumn.set_parent_unique_id(parent_column.unique_id());
-// subcolumn.set_path_info(PathInData(parent_column.name_lower_case() +
".key10"));
-// subcolumn.set_is_nullable(true);
-// st = variant_column_reader->new_iterator(&it, subcolumn,
&storage_read_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(assert_cast<DefaultValueColumnIterator*>(it) != nullptr);
-//
-// // 13. check statistics size == limit
-// auto& variant_stats = variant_column_reader->_statistics;
-// EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() <
-// config::variant_max_sparse_column_statistics_size);
-// auto limit = config::variant_max_sparse_column_statistics_size -
-// variant_stats->sparse_column_non_null_size.size();
-// for (int i = 0; i < limit; ++i) {
-// std::string key = parent_column.name_lower_case() + ".key10" +
std::to_string(i);
-// variant_stats->sparse_column_non_null_size[key] = 10000;
-// }
-// EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() ==
-// config::variant_max_sparse_column_statistics_size);
-//
-// st = variant_column_reader->new_iterator(&it, subcolumn,
&storage_read_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
-// st = it->init(column_iter_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-//
-// auto check_empty_column = [&]() {
-// for (int row = 0; row < 1000; ++row) {
-// std::string value;
-// st = assert_cast<ColumnObject*>(new_column_object.get())
-// ->serialize_one_row_to_string(row, &value);
-//
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_EQ(value, "{}");
-// }
-// };
-//
-// read_to_column_object();
-// check_empty_column();
-//
-// // construct tablet schema for compaction
-// storage_read_opts.io_ctx.reader_type =
ReaderType::READER_BASE_COMPACTION;
-// storage_read_opts.tablet_schema = _tablet_schema;
-// std::unordered_map<int32_t, TabletSchema::PathsSetInfo>
uid_to_paths_set_info;
-// TabletSchema::PathsSetInfo paths_set_info;
-// paths_set_info.sub_path_set.insert("key0");
-// paths_set_info.sub_path_set.insert("key3");
-// paths_set_info.sub_path_set.insert("key4");
-// paths_set_info.sparse_path_set.insert("key1");
-// paths_set_info.sparse_path_set.insert("key2");
-// paths_set_info.sparse_path_set.insert("key5");
-// paths_set_info.sparse_path_set.insert("key6");
-// paths_set_info.sparse_path_set.insert("key7");
-// paths_set_info.sparse_path_set.insert("key8");
-// paths_set_info.sparse_path_set.insert("key9");
-// uid_to_paths_set_info[parent_column.unique_id()] = paths_set_info;
-// _tablet_schema->set_path_set_info(std::move(uid_to_paths_set_info));
-//
-// // 14. check compaction subcolumn reader
-// check_leaf_reader();
-//
-// // 15. check compaction root reader
-// st = variant_column_reader->new_iterator(&it, parent_column,
&storage_read_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(assert_cast<VariantRootColumnIterator*>(it) != nullptr);
-// st = it->init(column_iter_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-//
-// // 16. check compacton sparse column
-// TabletColumn sparse_column =
schema_util::create_sparse_column(parent_column);
-// st = variant_column_reader->new_iterator(&it, sparse_column,
&storage_read_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(assert_cast<SparseColumnMergeReader*>(it) != nullptr);
-// st = it->init(column_iter_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// auto column_type =
DataTypeFactory::instance().create_data_type(sparse_column, false);
-// auto read_column = column_type->create_column();
-// nrows = 1000;
-// st = it->seek_to_ordinal(0);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// st = it->next_batch(&nrows, read_column);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(stats.bytes_read > 0);
-//
-// for (int row = 0; row < 1000; ++row) {
-// const std::string& value = column_type->to_string(*read_column,
row);
-// EXPECT_TRUE(value.find("key0") == std::string::npos)
-// << "row: " << row << ", value: " << value;
-// EXPECT_TRUE(value.find("key3") == std::string::npos)
-// << "row: " << row << ", value: " << value;
-// EXPECT_TRUE(value.find("key4") == std::string::npos)
-// << "row: " << row << ", value: " << value;
-// }
-//
-// // 17. check limit = 10000
-// subcolumn.set_name(parent_column.name_lower_case() + ".key10");
-// subcolumn.set_path_info(PathInData(parent_column.name_lower_case() +
".key10"));
-// st = variant_column_reader->new_iterator(&it, subcolumn,
&storage_read_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
-//
-// for (int i = 0; i < limit; ++i) {
-// std::string key = parent_column.name_lower_case() + ".key10" +
std::to_string(i);
-// variant_stats->sparse_column_non_null_size.erase(key);
-// }
-//
-// // 18. check compacton sparse extract column
-// subcolumn.set_name(parent_column.name_lower_case() + ".key3");
-// subcolumn.set_path_info(PathInData(parent_column.name_lower_case() +
".key3"));
-// st = variant_column_reader->new_iterator(&it, subcolumn,
&storage_read_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
-//
-// // 19. check compaction default column
-// subcolumn.set_name(parent_column.name_lower_case() + ".key10");
-// subcolumn.set_path_info(PathInData(parent_column.name_lower_case() +
".key10"));
-// st = variant_column_reader->new_iterator(&it, subcolumn,
&storage_read_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(assert_cast<DefaultValueColumnIterator*>(it) != nullptr);
-//
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
-// }
-//
-// TEST_F(VariantColumnWriterReaderTest, test_write_data_advanced) {
-// // 1. create tablet_schema
-// TabletSchemaPB schema_pb;
-// schema_pb.set_keys_type(KeysType::DUP_KEYS);
-// SchemaUtils::construct_column(schema_pb.add_column(), 1, "VARIANT",
"V1", 10);
-// _tablet_schema = std::make_shared<TabletSchema>();
-// _tablet_schema->init_from_pb(schema_pb);
-//
-// // 2. create tablet
-// TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
-// tablet_meta->_tablet_id = 10000;
-// _tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta,
_data_dir.get());
-// EXPECT_TRUE(_tablet->init().ok());
-//
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
-//
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
-//
-// // 3. create file_writer
-// io::FileWriterPtr file_writer;
-// auto file_path = local_segment_path(_tablet->tablet_path(), "0", 0);
-// auto st = io::global_local_filesystem()->create_file(file_path,
&file_writer);
-// EXPECT_TRUE(st.ok()) << st.msg();
-//
-// // 4. create column_writer
-// SegmentFooterPB footer;
-// ColumnWriterOptions opts;
-// opts.meta = footer.add_columns();
-// opts.compression_type = CompressionTypePB::LZ4;
-// opts.file_writer = file_writer.get();
-// opts.footer = &footer;
-// RowsetWriterContext rowset_ctx;
-// rowset_ctx.write_type = DataWriteType::TYPE_DIRECT;
-// opts.rowset_ctx = &rowset_ctx;
-// opts.rowset_ctx->tablet_schema = _tablet_schema;
-// TabletColumn column = _tablet_schema->column(0);
-// _init_column_meta(opts.meta, 0, column, CompressionTypePB::LZ4);
-//
-// std::unique_ptr<ColumnWriter> writer;
-// EXPECT_TRUE(ColumnWriter::create(opts, &column, file_writer.get(),
&writer).ok());
-// EXPECT_TRUE(writer->init().ok());
-// EXPECT_TRUE(assert_cast<VariantColumnWriter*>(writer.get()) != nullptr);
-//
-// // 5. write data
-// auto olap_data_convertor =
std::make_unique<vectorized::OlapBlockDataConvertor>();
-// auto block = _tablet_schema->create_block();
-// auto column_object =
(*std::move(block.get_by_position(0).column)).mutate();
-// std::unordered_map<int, std::string> inserted_jsonstr;
-// auto path_with_size =
VariantUtil::fill_object_column_with_nested_test_data(column_object, 1000,
-//
&inserted_jsonstr);
-// olap_data_convertor->add_column_data_convertor(column);
-// olap_data_convertor->set_source_content(&block, 0, 1000);
-// auto [result, accessor] = olap_data_convertor->convert_column_data(0);
-// EXPECT_TRUE(result.ok());
-// EXPECT_TRUE(accessor != nullptr);
-// EXPECT_TRUE(writer->append(accessor->get_nullmap(),
accessor->get_data(), 1000).ok());
-// st = writer->finish();
-// EXPECT_TRUE(st.ok()) << st.msg();
-// st = writer->write_data();
-// EXPECT_TRUE(st.ok()) << st.msg();
-// st = writer->write_ordinal_index();
-// EXPECT_TRUE(st.ok()) << st.msg();
-// st = writer->write_zone_map();
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(file_writer->close().ok());
-// footer.set_num_rows(1000);
-//
-// // 6. check footer
-// EXPECT_EQ(footer.columns_size(), 12);
-// auto column_meta = footer.columns(0);
-// EXPECT_EQ(column_meta.type(), (int)FieldType::OLAP_FIELD_TYPE_VARIANT);
-//
-// for (int i = 1; i < footer.columns_size() - 1; ++i) {
-// auto column_meta = footer.columns(i);
-// check_column_meta(column_meta, path_with_size);
-// }
-// check_sparse_column_meta(footer.columns(footer.columns_size() - 1),
path_with_size);
-//
-// // 7. check variant reader
-// io::FileReaderSPtr file_reader;
-// st = io::global_local_filesystem()->open_file(file_path, &file_reader);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// ColumnReaderOptions read_opts;
-// std::unique_ptr<ColumnReader> column_reader;
-// st = ColumnReader::create(read_opts, footer, 0, 1000, file_reader,
&column_reader);
-// EXPECT_TRUE(st.ok()) << st.msg();
-//
-// auto variant_column_reader =
assert_cast<VariantColumnReader*>(column_reader.get());
-// EXPECT_TRUE(variant_column_reader != nullptr);
-//
-// // 8. check statistics
-// auto statistics = variant_column_reader->get_stats();
-// for (const auto& [path, size] : statistics->subcolumns_non_null_size) {
-// std::cout << "path: " << path << ", size: " << size << std::endl;
-// EXPECT_EQ(path_with_size[path], size);
-// }
-// for (const auto& [path, size] :
statistics->sparse_column_non_null_size) {
-// std::cout << "sparse path: " << path << ", size: " << size <<
std::endl;
-// EXPECT_EQ(path_with_size[path], size);
-// }
-//
-// // 9. check root
-// ColumnIterator* it;
-// TabletColumn parent_column = _tablet_schema->column(0);
-// StorageReadOptions storage_read_opts;
-// storage_read_opts.io_ctx.reader_type = ReaderType::READER_QUERY;
-// st = variant_column_reader->new_iterator(&it, parent_column,
&storage_read_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
-// ColumnIteratorOptions column_iter_opts;
-// OlapReaderStatistics stats;
-// column_iter_opts.stats = &stats;
-// column_iter_opts.file_reader = file_reader.get();
-// st = it->init(column_iter_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-//
-// MutableColumnPtr new_column_object = ColumnObject::create(3);
-// size_t nrows = 1000;
-// st = it->seek_to_ordinal(0);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// st = it->next_batch(&nrows, new_column_object);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(stats.bytes_read > 0);
-//
-// for (int i = 0; i < 1000; ++i) {
-// std::string value;
-// st = assert_cast<ColumnObject*>(new_column_object.get())
-// ->serialize_one_row_to_string(i, &value);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_EQ(value, inserted_jsonstr[i]);
-// }
-//
-// auto read_to_column_object = [&]() {
-// new_column_object = ColumnObject::create(10);
-// nrows = 1000;
-// st = it->seek_to_ordinal(0);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// st = it->next_batch(&nrows, new_column_object);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(stats.bytes_read > 0);
-// EXPECT_EQ(nrows, 1000);
-// };
-//
-// auto check_key_stats = [&](const std::string& key_num) {
-// std::string key = ".key" + key_num;
-// TabletColumn subcolumn_in_nested;
-// subcolumn_in_nested.set_name(parent_column.name_lower_case() + key);
-// subcolumn_in_nested.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
-// subcolumn_in_nested.set_parent_unique_id(parent_column.unique_id());
-//
subcolumn_in_nested.set_path_info(PathInData(parent_column.name_lower_case() +
key));
-// subcolumn_in_nested.set_variant_max_subcolumns_count(
-// parent_column.variant_max_subcolumns_count());
-// subcolumn_in_nested.set_is_nullable(true);
-//
-// st = variant_column_reader->new_iterator(&it, subcolumn_in_nested,
&storage_read_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
-// st = it->init(column_iter_opts);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// read_to_column_object();
-//
-// size_t key_count = 0;
-// size_t key_nested_count = 0;
-// for (int row = 0; row < 1000; ++row) {
-// std::string value;
-// st = assert_cast<ColumnObject*>(new_column_object.get())
-// ->serialize_one_row_to_string(row, &value);
-// EXPECT_TRUE(st.ok()) << st.msg();
-// if (value.find("nested" + key_num) != std::string::npos) {
-// key_nested_count++;
-// } else if (value.find("88") != std::string::npos) {
-// key_count++;
-// }
-// }
-// EXPECT_EQ(key_count, path_with_size["key" + key_num]);
-// EXPECT_EQ(key_nested_count, path_with_size["key" + key_num +
".nested" + key_num]);
-// };
-//
-// for (int i = 3; i < 10; ++i) {
-// check_key_stats(std::to_string(i));
-// }
-//
-//
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
-// }
-//
-// } // namespace doris
\ No newline at end of file
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gtest/gtest.h"
+#include "olap/rowset/segment_v2/column_reader.h"
+#include "olap/rowset/segment_v2/hierarchical_data_reader.h"
+#include "olap/rowset/segment_v2/variant_column_writer_impl.h"
+#include "olap/storage_engine.h"
+#include "testutil/schema_utils.h"
+#include "testutil/variant_util.h"
+
+using namespace doris::vectorized;
+
+namespace doris {
+
+constexpr static uint32_t MAX_PATH_LEN = 1024;
+constexpr static std::string_view dest_dir =
"/ut_dir/variant_column_writer_test";
+constexpr static std::string_view tmp_dir = "./ut_dir/tmp";
+
+class VariantColumnWriterReaderTest : public testing::Test {
+public:
+ void SetUp() override {
+ // absolute dir
+ char buffer[MAX_PATH_LEN];
+ EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr);
+ _current_dir = std::string(buffer);
+ _absolute_dir = _current_dir + std::string(dest_dir);
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_absolute_dir).ok());
+
+ // tmp dir
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(tmp_dir).ok());
+ std::vector<StorePath> paths;
+ paths.emplace_back(std::string(tmp_dir), 1024000000);
+ auto tmp_file_dirs = std::make_unique<segment_v2::TmpFileDirs>(paths);
+ Status st = tmp_file_dirs->init();
+ EXPECT_TRUE(st.ok()) << st.to_json();
+ ExecEnv::GetInstance()->set_tmp_file_dir(std::move(tmp_file_dirs));
+
+ // storage engine
+ doris::EngineOptions options;
+ auto engine = std::make_unique<StorageEngine>(options);
+ _engine_ref = engine.get();
+ _data_dir = std::make_unique<DataDir>(*_engine_ref, _absolute_dir);
+ static_cast<void>(_data_dir->update_capacity());
+ ExecEnv::GetInstance()->set_storage_engine(std::move(engine));
+ }
+
+ void TearDown() override {
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(tmp_dir).ok());
+ _engine_ref = nullptr;
+ ExecEnv::GetInstance()->set_storage_engine(nullptr);
+ }
+
+ VariantColumnWriterReaderTest() = default;
+ ~VariantColumnWriterReaderTest() override = default;
+
+private:
+ TabletSchemaSPtr _tablet_schema = nullptr;
+ StorageEngine* _engine_ref = nullptr;
+ std::unique_ptr<DataDir> _data_dir = nullptr;
+ TabletSharedPtr _tablet = nullptr;
+ std::string _absolute_dir;
+ std::string _current_dir;
+};
+
+void check_column_meta(const ColumnMetaPB& column_meta, auto& path_with_size) {
+ EXPECT_TRUE(column_meta.has_column_path_info());
+ auto path = std::make_shared<vectorized::PathInData>();
+ path->from_protobuf(column_meta.column_path_info());
+ EXPECT_EQ(column_meta.column_path_info().parrent_column_unique_id(), 1);
+ EXPECT_EQ(column_meta.none_null_size(),
path_with_size[path->copy_pop_front().get_path()]);
+}
+
+void check_sparse_column_meta(const ColumnMetaPB& column_meta, auto&
path_with_size) {
+ EXPECT_TRUE(column_meta.has_column_path_info());
+ auto path = std::make_shared<vectorized::PathInData>();
+ path->from_protobuf(column_meta.column_path_info());
+ EXPECT_EQ(column_meta.column_path_info().parrent_column_unique_id(), 1);
+ for (const auto& [path, size] :
+ column_meta.variant_statistics().sparse_column_non_null_size()) {
+ EXPECT_EQ(size, path_with_size[path]);
+ }
+ EXPECT_EQ(path->copy_pop_front().get_path(), "__DORIS_VARIANT_SPARSE__");
+}
+
+TEST_F(VariantColumnWriterReaderTest, test_write_data_normal) {
+ // 1. create tablet_schema
+ TabletSchemaPB schema_pb;
+ schema_pb.set_keys_type(KeysType::DUP_KEYS);
+ SchemaUtils::construct_column(schema_pb.add_column(), 1, "VARIANT", "V1");
+ _tablet_schema = std::make_shared<TabletSchema>();
+ _tablet_schema->init_from_pb(schema_pb);
+
+ // 2. create tablet
+ TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
+ tablet_meta->_tablet_id = 10000;
+ _tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta,
_data_dir.get());
+
+ EXPECT_TRUE(_tablet->init().ok());
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
+
+ // 3. create file_writer
+ io::FileWriterPtr file_writer;
+ auto file_path = local_segment_path(_tablet->tablet_path(), "0", 0);
+ auto st = io::global_local_filesystem()->create_file(file_path,
&file_writer);
+ EXPECT_TRUE(st.ok()) << st.msg();
+
+ // 4. create column_writer
+ SegmentFooterPB footer;
+ ColumnWriterOptions opts;
+ opts.meta = footer.add_columns();
+ opts.compression_type = CompressionTypePB::LZ4;
+ opts.file_writer = file_writer.get();
+ opts.footer = &footer;
+ RowsetWriterContext rowset_ctx;
+ rowset_ctx.write_type = DataWriteType::TYPE_DIRECT;
+ opts.rowset_ctx = &rowset_ctx;
+ opts.rowset_ctx->tablet_schema = _tablet_schema;
+ TabletColumn column = _tablet_schema->column(0);
+ _init_column_meta(opts.meta, 0, column, CompressionTypePB::LZ4);
+
+ std::unique_ptr<ColumnWriter> writer;
+ EXPECT_TRUE(ColumnWriter::create(opts, &column, file_writer.get(),
&writer).ok());
+ EXPECT_TRUE(writer->init().ok());
+ EXPECT_TRUE(assert_cast<VariantColumnWriter*>(writer.get()) != nullptr);
+
+ // 5. write data
+ auto olap_data_convertor =
std::make_unique<vectorized::OlapBlockDataConvertor>();
+ auto block = _tablet_schema->create_block();
+ auto column_object =
(*std::move(block.get_by_position(0).column)).mutate();
+ std::unordered_map<int, std::string> inserted_jsonstr;
+ auto path_with_size =
+ VariantUtil::fill_object_column_with_test_data(column_object,
1000, &inserted_jsonstr);
+ olap_data_convertor->add_column_data_convertor(column);
+ olap_data_convertor->set_source_content(&block, 0, 1000);
+ auto [result, accessor] = olap_data_convertor->convert_column_data(0);
+ EXPECT_TRUE(result.ok());
+ EXPECT_TRUE(accessor != nullptr);
+ EXPECT_TRUE(writer->append(accessor->get_nullmap(), accessor->get_data(),
1000).ok());
+ st = writer->finish();
+ EXPECT_TRUE(st.ok()) << st.msg();
+ st = writer->write_data();
+ EXPECT_TRUE(st.ok()) << st.msg();
+ st = writer->write_ordinal_index();
+ EXPECT_TRUE(st.ok()) << st.msg();
+ st = writer->write_zone_map();
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(file_writer->close().ok());
+ footer.set_num_rows(1000);
+
+ // 6. check footer
+ EXPECT_EQ(footer.columns_size(), 5);
+ auto column_meta = footer.columns(0);
+ EXPECT_EQ(column_meta.type(), (int)FieldType::OLAP_FIELD_TYPE_VARIANT);
+
+ for (int i = 1; i < footer.columns_size() - 1; ++i) {
+ auto column_meta = footer.columns(i);
+ check_column_meta(column_meta, path_with_size);
+ }
+ check_sparse_column_meta(footer.columns(footer.columns_size() - 1),
path_with_size);
+
+ // 7. check variant reader
+ io::FileReaderSPtr file_reader;
+ st = io::global_local_filesystem()->open_file(file_path, &file_reader);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ ColumnReaderOptions read_opts;
+ read_opts.tablet_schema = _tablet_schema;
+ std::unique_ptr<ColumnReader> column_reader;
+ st = ColumnReader::create(read_opts, footer, 0, 1000, file_reader,
&column_reader);
+ EXPECT_TRUE(st.ok()) << st.msg();
+
+ auto variant_column_reader =
assert_cast<VariantColumnReader*>(column_reader.get());
+ EXPECT_TRUE(variant_column_reader != nullptr);
+
+ auto subcolumn_reader =
variant_column_reader->get_reader_by_path(PathInData("key0"));
+ EXPECT_TRUE(subcolumn_reader != nullptr);
+ subcolumn_reader =
variant_column_reader->get_reader_by_path(PathInData("key1"));
+ EXPECT_TRUE(subcolumn_reader != nullptr);
+ subcolumn_reader =
variant_column_reader->get_reader_by_path(PathInData("key2"));
+ EXPECT_TRUE(subcolumn_reader != nullptr);
+
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key3")));
+
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key4")));
+
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key5")));
+
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key6")));
+
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key7")));
+
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key8")));
+
EXPECT_TRUE(variant_column_reader->exist_in_sparse_column(PathInData("key9")));
+ auto size = variant_column_reader->get_metadata_size();
+ EXPECT_GT(size, 0);
+
+ // 8. check statistics
+ auto statistics = variant_column_reader->get_stats();
+ for (const auto& [path, size] : statistics->subcolumns_non_null_size) {
+ EXPECT_EQ(path_with_size[path], size);
+ }
+ for (const auto& [path, size] : statistics->sparse_column_non_null_size) {
+ EXPECT_EQ(path_with_size[path], size);
+ }
+
+ // 9. check hier reader
+ ColumnIterator* it;
+ TabletColumn parent_column = _tablet_schema->column(0);
+ StorageReadOptions storage_read_opts;
+ storage_read_opts.io_ctx.reader_type = ReaderType::READER_QUERY;
+ st = variant_column_reader->new_iterator(&it, parent_column,
&storage_read_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
+ ColumnIteratorOptions column_iter_opts;
+ OlapReaderStatistics stats;
+ column_iter_opts.stats = &stats;
+ column_iter_opts.file_reader = file_reader.get();
+ st = it->init(column_iter_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+
+ MutableColumnPtr new_column_object = ColumnObject::create(3);
+ size_t nrows = 1000;
+ st = it->seek_to_ordinal(0);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ st = it->next_batch(&nrows, new_column_object);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(stats.bytes_read > 0);
+
+ for (int i = 0; i < 1000; ++i) {
+ std::string value;
+ st = assert_cast<ColumnObject*>(new_column_object.get())
+ ->serialize_one_row_to_string(i, &value);
+
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_EQ(value, inserted_jsonstr[i]);
+ }
+
+ std::vector<rowid_t> row_ids;
+ for (int i = 0; i < 1000; ++i) {
+ if (i % 7 == 0) {
+ row_ids.push_back(i);
+ }
+ }
+ new_column_object = ColumnObject::create(3);
+ st = it->read_by_rowids(row_ids.data(), row_ids.size(), new_column_object);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ for (int i = 0; i < row_ids.size(); ++i) {
+ std::string value;
+ st = assert_cast<ColumnObject*>(new_column_object.get())
+ ->serialize_one_row_to_string(i, &value);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_EQ(value, inserted_jsonstr[row_ids[i]]);
+ }
+
+ auto read_to_column_object = [&]() {
+ new_column_object = ColumnObject::create(3);
+ nrows = 1000;
+ st = it->seek_to_ordinal(0);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ st = it->next_batch(&nrows, new_column_object);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(stats.bytes_read > 0);
+ EXPECT_EQ(nrows, 1000);
+ };
+
+ // 10. check sparse extract reader
+ for (int i = 3; i < 10; ++i) {
+ std::string key = ".key" + std::to_string(i);
+ TabletColumn subcolumn_in_sparse;
+ subcolumn_in_sparse.set_name(parent_column.name_lower_case() + key);
+ subcolumn_in_sparse.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+ subcolumn_in_sparse.set_parent_unique_id(parent_column.unique_id());
+
subcolumn_in_sparse.set_path_info(PathInData(parent_column.name_lower_case() +
key));
+ subcolumn_in_sparse.set_variant_max_subcolumns_count(
+ parent_column.variant_max_subcolumns_count());
+ subcolumn_in_sparse.set_is_nullable(true);
+
+ st = variant_column_reader->new_iterator(&it, subcolumn_in_sparse,
&storage_read_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
+ st = it->init(column_iter_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+
+ read_to_column_object();
+
+ for (int row = 0; row < 1000; ++row) {
+ std::string value;
+ st = assert_cast<ColumnObject*>(new_column_object.get())
+ ->serialize_one_row_to_string(row, &value);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ if (inserted_jsonstr[row].find(key) != std::string::npos) {
+ if (i % 2 == 0) {
+ EXPECT_EQ(value, "88");
+ } else {
+ EXPECT_EQ(value, "str99");
+ }
+ }
+ }
+ }
+
+ // 11. check leaf reader
+ auto check_leaf_reader = [&]() {
+ for (int i = 0; i < 3; ++i) {
+ std::string key = ".key" + std::to_string(i);
+ TabletColumn subcolumn;
+ subcolumn.set_name(parent_column.name_lower_case() + key);
+ subcolumn.set_type((FieldType)(int)footer.columns(i + 1).type());
+ subcolumn.set_parent_unique_id(parent_column.unique_id());
+ subcolumn.set_path_info(PathInData(parent_column.name_lower_case()
+ key));
+ subcolumn.set_variant_max_subcolumns_count(
+ parent_column.variant_max_subcolumns_count());
+ subcolumn.set_is_nullable(true);
+
+ st = variant_column_reader->new_iterator(&it, subcolumn,
&storage_read_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(assert_cast<FileColumnIterator*>(it) != nullptr);
+ st = it->init(column_iter_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+
+ auto column_type =
DataTypeFactory::instance().create_data_type(subcolumn, false);
+ auto read_column = column_type->create_column();
+ nrows = 1000;
+ st = it->seek_to_ordinal(0);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ st = it->next_batch(&nrows, read_column);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(stats.bytes_read > 0);
+
+ for (int row = 0; row < 1000; ++row) {
+ const std::string& value =
column_type->to_string(*read_column, row);
+ if (inserted_jsonstr[row].find(key) != std::string::npos) {
+ if (i % 2 == 0) {
+ EXPECT_EQ(value, "88");
+ } else {
+ EXPECT_EQ(value, "str99");
+ }
+ }
+ }
+ }
+ };
+ check_leaf_reader();
+
+ // 12. check empty
+ TabletColumn subcolumn;
+ subcolumn.set_name(parent_column.name_lower_case() + ".key10");
+ subcolumn.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+ subcolumn.set_parent_unique_id(parent_column.unique_id());
+ subcolumn.set_path_info(PathInData(parent_column.name_lower_case() +
".key10"));
+ subcolumn.set_is_nullable(true);
+ st = variant_column_reader->new_iterator(&it, subcolumn,
&storage_read_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(assert_cast<DefaultValueColumnIterator*>(it) != nullptr);
+
+ // 13. check statistics size == limit
+ auto& variant_stats = variant_column_reader->_statistics;
+ EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() <
+ config::variant_max_sparse_column_statistics_size);
+ auto limit = config::variant_max_sparse_column_statistics_size -
+ variant_stats->sparse_column_non_null_size.size();
+ for (int i = 0; i < limit; ++i) {
+ std::string key = parent_column.name_lower_case() + ".key10" +
std::to_string(i);
+ variant_stats->sparse_column_non_null_size[key] = 10000;
+ }
+ EXPECT_TRUE(variant_stats->sparse_column_non_null_size.size() ==
+ config::variant_max_sparse_column_statistics_size);
+
+ st = variant_column_reader->new_iterator(&it, subcolumn,
&storage_read_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
+ st = it->init(column_iter_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+
+ auto check_empty_column = [&]() {
+ for (int row = 0; row < 1000; ++row) {
+ std::string value;
+ st = assert_cast<ColumnObject*>(new_column_object.get())
+ ->serialize_one_row_to_string(row, &value);
+
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_EQ(value, "{}");
+ }
+ };
+
+ read_to_column_object();
+ check_empty_column();
+
+ // construct tablet schema for compaction
+ storage_read_opts.io_ctx.reader_type = ReaderType::READER_BASE_COMPACTION;
+ storage_read_opts.tablet_schema = _tablet_schema;
+ std::unordered_map<int32_t, TabletSchema::PathsSetInfo>
uid_to_paths_set_info;
+ TabletSchema::PathsSetInfo paths_set_info;
+ paths_set_info.sub_path_set.insert("key0");
+ paths_set_info.sub_path_set.insert("key3");
+ paths_set_info.sub_path_set.insert("key4");
+ paths_set_info.sparse_path_set.insert("key1");
+ paths_set_info.sparse_path_set.insert("key2");
+ paths_set_info.sparse_path_set.insert("key5");
+ paths_set_info.sparse_path_set.insert("key6");
+ paths_set_info.sparse_path_set.insert("key7");
+ paths_set_info.sparse_path_set.insert("key8");
+ paths_set_info.sparse_path_set.insert("key9");
+ uid_to_paths_set_info[parent_column.unique_id()] = paths_set_info;
+ _tablet_schema->set_path_set_info(std::move(uid_to_paths_set_info));
+
+ // 14. check compaction subcolumn reader
+ check_leaf_reader();
+
+ // 15. check compaction root reader
+ st = variant_column_reader->new_iterator(&it, parent_column,
&storage_read_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(assert_cast<VariantRootColumnIterator*>(it) != nullptr);
+ st = it->init(column_iter_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+
+ // 16. check compacton sparse column
+ TabletColumn sparse_column =
schema_util::create_sparse_column(parent_column);
+ st = variant_column_reader->new_iterator(&it, sparse_column,
&storage_read_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(assert_cast<SparseColumnMergeReader*>(it) != nullptr);
+ st = it->init(column_iter_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ auto column_type =
DataTypeFactory::instance().create_data_type(sparse_column, false);
+ auto read_column = column_type->create_column();
+ nrows = 1000;
+ st = it->seek_to_ordinal(0);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ st = it->next_batch(&nrows, read_column);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(stats.bytes_read > 0);
+
+ for (int row = 0; row < 1000; ++row) {
+ const std::string& value = column_type->to_string(*read_column, row);
+ EXPECT_TRUE(value.find("key0") == std::string::npos)
+ << "row: " << row << ", value: " << value;
+ EXPECT_TRUE(value.find("key3") == std::string::npos)
+ << "row: " << row << ", value: " << value;
+ EXPECT_TRUE(value.find("key4") == std::string::npos)
+ << "row: " << row << ", value: " << value;
+ }
+
+ // 17. check limit = 10000
+ subcolumn.set_name(parent_column.name_lower_case() + ".key10");
+ subcolumn.set_path_info(PathInData(parent_column.name_lower_case() +
".key10"));
+ st = variant_column_reader->new_iterator(&it, subcolumn,
&storage_read_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
+
+ for (int i = 0; i < limit; ++i) {
+ std::string key = parent_column.name_lower_case() + ".key10" +
std::to_string(i);
+ variant_stats->sparse_column_non_null_size.erase(key);
+ }
+
+ // 18. check compacton sparse extract column
+ subcolumn.set_name(parent_column.name_lower_case() + ".key3");
+ subcolumn.set_path_info(PathInData(parent_column.name_lower_case() +
".key3"));
+ st = variant_column_reader->new_iterator(&it, subcolumn,
&storage_read_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(assert_cast<SparseColumnExtractReader*>(it) != nullptr);
+
+ // 19. check compaction default column
+ subcolumn.set_name(parent_column.name_lower_case() + ".key10");
+ subcolumn.set_path_info(PathInData(parent_column.name_lower_case() +
".key10"));
+ st = variant_column_reader->new_iterator(&it, subcolumn,
&storage_read_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(assert_cast<DefaultValueColumnIterator*>(it) != nullptr);
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+}
+
+TEST_F(VariantColumnWriterReaderTest, test_write_data_advanced) {
+ // 1. create tablet_schema
+ TabletSchemaPB schema_pb;
+ schema_pb.set_keys_type(KeysType::DUP_KEYS);
+ SchemaUtils::construct_column(schema_pb.add_column(), 1, "VARIANT", "V1",
10);
+ _tablet_schema = std::make_shared<TabletSchema>();
+ _tablet_schema->init_from_pb(schema_pb);
+
+ // 2. create tablet
+ TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
+ tablet_meta->_tablet_id = 10000;
+ _tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta,
_data_dir.get());
+ EXPECT_TRUE(_tablet->init().ok());
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
+
+ // 3. create file_writer
+ io::FileWriterPtr file_writer;
+ auto file_path = local_segment_path(_tablet->tablet_path(), "0", 0);
+ auto st = io::global_local_filesystem()->create_file(file_path,
&file_writer);
+ EXPECT_TRUE(st.ok()) << st.msg();
+
+ // 4. create column_writer
+ SegmentFooterPB footer;
+ ColumnWriterOptions opts;
+ opts.meta = footer.add_columns();
+ opts.compression_type = CompressionTypePB::LZ4;
+ opts.file_writer = file_writer.get();
+ opts.footer = &footer;
+ RowsetWriterContext rowset_ctx;
+ rowset_ctx.write_type = DataWriteType::TYPE_DIRECT;
+ opts.rowset_ctx = &rowset_ctx;
+ opts.rowset_ctx->tablet_schema = _tablet_schema;
+ TabletColumn column = _tablet_schema->column(0);
+ _init_column_meta(opts.meta, 0, column, CompressionTypePB::LZ4);
+
+ std::unique_ptr<ColumnWriter> writer;
+ EXPECT_TRUE(ColumnWriter::create(opts, &column, file_writer.get(),
&writer).ok());
+ EXPECT_TRUE(writer->init().ok());
+ EXPECT_TRUE(assert_cast<VariantColumnWriter*>(writer.get()) != nullptr);
+
+ // 5. write data
+ auto olap_data_convertor =
std::make_unique<vectorized::OlapBlockDataConvertor>();
+ auto block = _tablet_schema->create_block();
+ auto column_object =
(*std::move(block.get_by_position(0).column)).mutate();
+ std::unordered_map<int, std::string> inserted_jsonstr;
+ auto path_with_size =
VariantUtil::fill_object_column_with_nested_test_data(column_object, 1000,
+
&inserted_jsonstr);
+ olap_data_convertor->add_column_data_convertor(column);
+ olap_data_convertor->set_source_content(&block, 0, 1000);
+ auto [result, accessor] = olap_data_convertor->convert_column_data(0);
+ EXPECT_TRUE(result.ok());
+ EXPECT_TRUE(accessor != nullptr);
+ EXPECT_TRUE(writer->append(accessor->get_nullmap(), accessor->get_data(),
1000).ok());
+ st = writer->finish();
+ EXPECT_TRUE(st.ok()) << st.msg();
+ st = writer->write_data();
+ EXPECT_TRUE(st.ok()) << st.msg();
+ st = writer->write_ordinal_index();
+ EXPECT_TRUE(st.ok()) << st.msg();
+ st = writer->write_zone_map();
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(file_writer->close().ok());
+ footer.set_num_rows(1000);
+
+ // 6. check footer
+ EXPECT_EQ(footer.columns_size(), 12);
+ auto column_meta = footer.columns(0);
+ EXPECT_EQ(column_meta.type(), (int)FieldType::OLAP_FIELD_TYPE_VARIANT);
+
+ for (int i = 1; i < footer.columns_size() - 1; ++i) {
+ auto column_meta = footer.columns(i);
+ check_column_meta(column_meta, path_with_size);
+ }
+ check_sparse_column_meta(footer.columns(footer.columns_size() - 1),
path_with_size);
+
+ // 7. check variant reader
+ io::FileReaderSPtr file_reader;
+ st = io::global_local_filesystem()->open_file(file_path, &file_reader);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ ColumnReaderOptions read_opts;
+ read_opts.tablet_schema = _tablet_schema;
+ std::unique_ptr<ColumnReader> column_reader;
+ st = ColumnReader::create(read_opts, footer, 0, 1000, file_reader,
&column_reader);
+ EXPECT_TRUE(st.ok()) << st.msg();
+
+ auto variant_column_reader =
assert_cast<VariantColumnReader*>(column_reader.get());
+ EXPECT_TRUE(variant_column_reader != nullptr);
+
+ // 8. check statistics
+ auto statistics = variant_column_reader->get_stats();
+ for (const auto& [path, size] : statistics->subcolumns_non_null_size) {
+ EXPECT_EQ(path_with_size[path], size);
+ }
+ for (const auto& [path, size] : statistics->sparse_column_non_null_size) {
+ EXPECT_EQ(path_with_size[path], size);
+ }
+
+ // 9. check root
+ ColumnIterator* it;
+ TabletColumn parent_column = _tablet_schema->column(0);
+ StorageReadOptions storage_read_opts;
+ storage_read_opts.io_ctx.reader_type = ReaderType::READER_QUERY;
+ st = variant_column_reader->new_iterator(&it, parent_column,
&storage_read_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
+ ColumnIteratorOptions column_iter_opts;
+ OlapReaderStatistics stats;
+ column_iter_opts.stats = &stats;
+ column_iter_opts.file_reader = file_reader.get();
+ st = it->init(column_iter_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+
+ MutableColumnPtr new_column_object = ColumnObject::create(3);
+ size_t nrows = 1000;
+ st = it->seek_to_ordinal(0);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ st = it->next_batch(&nrows, new_column_object);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(stats.bytes_read > 0);
+
+ for (int i = 0; i < 1000; ++i) {
+ std::string value;
+ st = assert_cast<ColumnObject*>(new_column_object.get())
+ ->serialize_one_row_to_string(i, &value);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_EQ(value, inserted_jsonstr[i]);
+ }
+
+ auto read_to_column_object = [&]() {
+ new_column_object = ColumnObject::create(10);
+ nrows = 1000;
+ st = it->seek_to_ordinal(0);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ st = it->next_batch(&nrows, new_column_object);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(stats.bytes_read > 0);
+ EXPECT_EQ(nrows, 1000);
+ };
+
+ auto check_key_stats = [&](const std::string& key_num) {
+ std::string key = ".key" + key_num;
+ TabletColumn subcolumn_in_nested;
+ subcolumn_in_nested.set_name(parent_column.name_lower_case() + key);
+ subcolumn_in_nested.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+ subcolumn_in_nested.set_parent_unique_id(parent_column.unique_id());
+
subcolumn_in_nested.set_path_info(PathInData(parent_column.name_lower_case() +
key));
+ subcolumn_in_nested.set_variant_max_subcolumns_count(
+ parent_column.variant_max_subcolumns_count());
+ subcolumn_in_nested.set_is_nullable(true);
+
+ st = variant_column_reader->new_iterator(&it, subcolumn_in_nested,
&storage_read_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ EXPECT_TRUE(assert_cast<HierarchicalDataReader*>(it) != nullptr);
+ st = it->init(column_iter_opts);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ read_to_column_object();
+
+ size_t key_count = 0;
+ size_t key_nested_count = 0;
+ for (int row = 0; row < 1000; ++row) {
+ std::string value;
+ st = assert_cast<ColumnObject*>(new_column_object.get())
+ ->serialize_one_row_to_string(row, &value);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ if (value.find("nested" + key_num) != std::string::npos) {
+ key_nested_count++;
+ } else if (value.find("88") != std::string::npos) {
+ key_count++;
+ }
+ }
+ EXPECT_EQ(key_count, path_with_size["key" + key_num]);
+ EXPECT_EQ(key_nested_count, path_with_size["key" + key_num + ".nested"
+ key_num]);
+ };
+
+ for (int i = 3; i < 10; ++i) {
+ check_key_stats(std::to_string(i));
+ }
+
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+}
+
+} // namespace doris
\ No newline at end of file
diff --git a/be/test/vec/columns/column_object_test.cpp
b/be/test/vec/columns/column_object_test.cpp
index b08e57dd19e..7fd827c8919 100644
--- a/be/test/vec/columns/column_object_test.cpp
+++ b/be/test/vec/columns/column_object_test.cpp
@@ -5,9 +5,9 @@
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
-//
+
// http://www.apache.org/licenses/LICENSE-2.0
-//
+
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -15,790 +15,800 @@
// specific language governing permissions and limitations
// under the License.
-// #include "vec/columns/column_object.h"
-//
-// #include <gmock/gmock-more-matchers.h>
-// #include <gtest/gtest.h>
-// #include <rapidjson/prettywriter.h>
-// #include <stdio.h>
-//
-// #include "runtime/jsonb_value.h"
-// #include "testutil/variant_util.h"
-// #include "vec/common/string_ref.h"
-// #include "vec/core/field.h"
-// #include "vec/core/types.h"
-// #include "vec/data_types/data_type_array.h"
-// #include "vec/data_types/data_type_factory.hpp"
-//
-// using namespace doris::vectorized;
-//
-// using namespace doris;
-// // #define ADD_SUB_COLUMN(key) \
-// // varaint->add_sub_column(PathInData(std::string_view(key)), 0);
-//
-// void convert_field_to_rapidjson(const vectorized::Field& field,
rapidjson::Value& target,
-// rapidjson::Document::AllocatorType&
allocator) {
-// switch (field.get_type()) {
-// case vectorized::Field::Types::Null:
-// target.SetNull();
-// break;
-// case vectorized::Field::Types::Int64:
-// target.SetInt64(field.get<Int64>());
-// break;
-// case vectorized::Field::Types::Float64:
-// target.SetDouble(field.get<Float64>());
-// break;
-// case vectorized::Field::Types::JSONB: {
-// const auto& val = field.get<JsonbField>();
-// JsonbValue* json_val = JsonbDocument::createValue(val.get_value(),
val.get_size());
-// convert_jsonb_to_rapidjson(*json_val, target, allocator);
-// break;
-// }
-// case vectorized::Field::Types::String: {
-// const String& val = field.get<String>();
-// target.SetString(val.data(),
cast_set<rapidjson::SizeType>(val.size()));
-// break;
-// }
-// case vectorized::Field::Types::Array: {
-// const vectorized::Array& array = field.get<Array>();
-// target.SetArray();
-// for (const vectorized::Field& item : array) {
-// rapidjson::Value val;
-// convert_field_to_rapidjson(item, val, allocator);
-// target.PushBack(val, allocator);
-// }
-// break;
-// }
-// case vectorized::Field::Types::VariantMap: {
-// const vectorized::VariantMap& map = field.get<VariantMap>();
-// target.SetObject();
-// for (const auto& item : map) {
-// if (item.second.is_null()) {
-// continue;
-// }
-// rapidjson::Value key;
-// key.SetString(item.first.get_path().data(),
-//
cast_set<rapidjson::SizeType>(item.first.get_path().size()));
-// rapidjson::Value val;
-// convert_field_to_rapidjson(item.second, val, allocator);
-// if (val.IsNull() && item.first.empty()) {
-// // skip null value with empty key, indicate the null json
value of root in variant map,
-// // usally padding in nested arrays
-// continue;
-// }
-// target.AddMember(key, val, allocator);
-// }
-// break;
-// }
-// default:
-// throw doris::Exception(ErrorCode::INTERNAL_ERROR, "unkown field
type: {}",
-// field.get_type_name());
-// break;
-// }
-// }
-//
-// void convert_variant_map_to_rapidjson(const vectorized::VariantMap& map,
rapidjson::Value& target,
-// rapidjson::Document::AllocatorType&
allocator) {
-// target.SetObject();
-// for (const auto& item : map) {
-// if (item.second.is_null()) {
-// continue;
-// }
-// rapidjson::Value key;
-// key.SetString(item.first.get_path().data(),
-//
cast_set<rapidjson::SizeType>(item.first.get_path().size()));
-// rapidjson::Value val;
-// convert_field_to_rapidjson(item.second, val, allocator);
-// if (val.IsNull() && item.first.empty()) {
-// // skip null value with empty key, indicate the null json value
of root in variant map,
-// // usally padding in nested arrays
-// continue;
-// }
-// target.AddMember(key, val, allocator);
-// }
-// }
-//
-// void convert_array_to_rapidjson(const vectorized::Array& array,
rapidjson::Value& target,
-// rapidjson::Document::AllocatorType&
allocator) {
-// target.SetArray();
-// for (const vectorized::Field& item : array) {
-// rapidjson::Value val;
-// convert_field_to_rapidjson(item, val, allocator);
-// target.PushBack(val, allocator);
-// }
-// }
-//
-// TEST(ColumnVariantTest, insert_try_insert) {
-// auto v = VariantUtil::construct_dst_varint_column();
-// FieldInfo info;
-// info.scalar_type_id = TypeIndex::Nothing;
-// info.num_dimensions = 0;
-// PathInData path("v.f");
-// auto sub = v->get_subcolumn(path);
-// Int64 value = 43;
-// sub->insert(value, info);
-//
-// info.num_dimensions = 1;
-// sub->insert(value, info);
-//
-// info.num_dimensions = 2;
-// sub->insert(value, info);
-// }
-//
-// TEST(ColumnVariantTest, basic_finalize) {
-// auto variant = VariantUtil::construct_basic_varint_column();
-// // 4. finalize
-//
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
-// EXPECT_EQ(variant->size(), 10);
-//
-// // check finalized subcolumn
-// // 5 subcolumn + 1 root
-// EXPECT_EQ(variant->subcolumns.size(), 6);
-// for (const auto& column : variant->subcolumns) {
-// if (column->data.is_root) {
-// continue;
-// }
-// EXPECT_EQ(column->data.data.size(), 1);
-// }
-//
-// // check sparse column
-// const auto& offsets = variant->serialized_sparse_column_offsets();
-// for (int row = 0; row < 5; ++row) {
-// EXPECT_EQ(offsets[row], 0);
-// }
-// for (int row = 5; row < 10; ++row) {
-// EXPECT_EQ(offsets[row] - offsets[row - 1], 3);
-// }
-// }
-//
-// TEST(ColumnVariantTest, basic_deserialize) {
-// auto variant = VariantUtil::construct_basic_varint_column();
-//
-// // 4. finalize
-//
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
-// EXPECT_EQ(variant->size(), 10);
-//
-// const auto& [path, value] = variant->get_sparse_data_paths_and_values();
-// const auto& offsets = variant->serialized_sparse_column_offsets();
-// for (size_t row = 5; row < 10; ++row) {
-// size_t start = offsets[row - 1];
-// size_t end = offsets[row];
-//
-// auto data = path->get_data_at(start);
-// EXPECT_EQ(data, StringRef("v.b.d", 5));
-// auto pair = variant->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(pair.first.get<Int64>(), 30);
-//
-// auto data2 = path->get_data_at(start);
-// auto pair2 = variant->deserialize_from_sparse_column(value,
start++);
-// EXPECT_EQ(data2, StringRef("v.c.d", 5));
-// EXPECT_EQ(pair2.first.get<Int64>(), 30);
-//
-// auto data3 = path->get_data_at(start);
-// auto pair3 = variant->deserialize_from_sparse_column(value,
start++);
-// EXPECT_EQ(data3, StringRef("v.d.d", 5));
-// EXPECT_EQ(pair3.first.get<String>(), "50");
-// EXPECT_EQ(start, end);
-// }
-// }
-//
-// TEST(ColumnVariantTest, basic_inset_range_from) {
-// auto src = VariantUtil::construct_basic_varint_column();
-// EXPECT_TRUE(src->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
-// EXPECT_EQ(src->size(), 10);
-//
-// // dst is an empty column, has 5 subcolumn + 1 root
-// auto dst = VariantUtil::construct_dst_varint_column();
-//
-// // subcolumn->subcolumn v.b v.f v.e
-// // subcolumn->sparse_column v.a v.c
-// // sparse_column->subcolumn v.b.d v.c.d
-// // sparse_column->sparse_column v.d.d
-// dst->insert_range_from(*src, 0, 10);
-// dst->finalize();
-// EXPECT_EQ(dst->size(), 10);
-//
-// // 5 subcolumn
-// EXPECT_EQ(dst->subcolumns.size(), 6);
-// ColumnObject::Subcolumns dst_subcolumns = dst->subcolumns;
-// std::sort(
-// dst_subcolumns.begin(), dst_subcolumns.end(),
-// [](const auto& lhsItem, const auto& rhsItem) { return
lhsItem->path < rhsItem->path; });
-//
-// for (const auto& column : dst_subcolumns) {
-// if (column->data.is_root) {
-// continue;
-// }
-// EXPECT_EQ(column->data.data.size(), 1);
-// EXPECT_EQ(column->data.data[0]->size(), 10);
-// if (column->path.get_path().size() == 3) {
-// EXPECT_EQ(column->data.get_non_null_value_size(), 10);
-// } else {
-// EXPECT_EQ(column->path.get_path().size(), 5);
-// EXPECT_EQ(column->data.get_non_null_value_size(), 5);
-// for (size_t row = 0; row != 5; ++row) {
-// EXPECT_TRUE(column->data.data[0]->is_null_at(row));
-// }
-// for (size_t row = 5; row != 10; ++row) {
-// EXPECT_EQ((*column->data.data[0])[row].get<Int64>(), 30);
-// }
-// }
-// }
-//
-// // check sparse column
-// const auto& [path, value] = dst->get_sparse_data_paths_and_values();
-// const auto& offsets = dst->serialized_sparse_column_offsets();
-//
-// // v.a v.c
-// for (int row = 0; row < 5; ++row) {
-// size_t start = offsets[row - 1];
-// size_t end = offsets[row];
-//
-// auto data = path->get_data_at(start);
-// EXPECT_EQ(data, StringRef("v.a", 3));
-// auto pair = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(pair.first.get<Int64>(), 20);
-//
-// auto data2 = path->get_data_at(start);
-// EXPECT_EQ(data2, StringRef("v.c", 3));
-// auto pair2 = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(pair2.first.get<Int64>(), 20);
-//
-// EXPECT_EQ(start, end);
-// }
-//
-// // v.a v.c v.d.d
-// for (int row = 5; row < 10; ++row) {
-// size_t start = offsets[row - 1];
-// size_t end = offsets[row];
-//
-// auto data = path->get_data_at(start);
-// EXPECT_EQ(data, StringRef("v.a", 3));
-// auto pair = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(pair.first.get<Int64>(), 20);
-//
-// auto data2 = path->get_data_at(start);
-// EXPECT_EQ(data2, StringRef("v.c", 3));
-// auto pair2 = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(pair2.first.get<Int64>(), 20);
-//
-// auto data3 = path->get_data_at(start);
-// EXPECT_EQ(data3, StringRef("v.d.d", 5));
-// auto pair3 = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(pair3.first.get<String>(), "50");
-//
-// EXPECT_EQ(start, end);
-// }
-// }
-//
-// auto convert_to_jsonb_field(auto serde, auto& column) {
-// vectorized::DataTypeSerDe::FormatOptions options;
-// options.escape_char = '\\';
-// auto tmp_col = ColumnString::create();
-// VectorBufferWriter write_buffer(*tmp_col.get());
-// EXPECT_TRUE(serde->serialize_column_to_json(column, 0, 1, write_buffer,
options).ok());
-//
-// write_buffer.commit();
-// auto str_ref = tmp_col->get_data_at(0);
-// Slice data((char*)(str_ref.data), str_ref.size);
-//
-// auto jsonb_type =
doris::vectorized::DataTypeFactory::instance().create_data_type(
-// TypeIndex::JSONB, false);
-// auto jsonb_serde = jsonb_type->get_serde();
-// auto jsonb_column = jsonb_type->create_column();
-//
-// DataTypeSerDe::FormatOptions format_options;
-// format_options.converted_from_string = true;
-// EXPECT_TRUE(
-// jsonb_serde->deserialize_one_cell_from_json(*jsonb_column,
data, format_options).ok());
-// auto res = jsonb_column->get_data_at(0);
-// return JsonbField(res.data, res.size);
-// }
-//
-// auto convert_string_to_jsonb_field(auto& column) {
-// auto str_ref = column.get_data_at(0);
-// Slice data((char*)(str_ref.data), str_ref.size);
-//
-// auto jsonb_type =
doris::vectorized::DataTypeFactory::instance().create_data_type(
-// TypeIndex::JSONB, false);
-// auto jsonb_serde = jsonb_type->get_serde();
-// auto jsonb_column = jsonb_type->create_column();
-// DataTypeSerDe::FormatOptions format_options;
-// format_options.converted_from_string = true;
-// format_options.escape_char = '\\';
-//
-// EXPECT_TRUE(
-// jsonb_serde->deserialize_one_cell_from_json(*jsonb_column,
data, format_options).ok());
-// auto res = jsonb_column->get_data_at(0);
-// return JsonbField(res.data, res.size);
-// }
-//
-// doris::vectorized::Field get_jsonb_field(std::string_view type) {
-// static std::unordered_map<std::string_view, doris::vectorized::Field>
field_map;
-// if (field_map.empty()) {
-// DataTypePtr data_type_int =
doris::vectorized::DataTypeFactory::instance().create_data_type(
-// TypeIndex::Int8, false);
-// DataTypePtr data_type_array_int =
-//
std::make_shared<doris::vectorized::DataTypeArray>(data_type_int);
-// auto array_column_int = data_type_array_int->create_column();
-// array_column_int->insert(VariantUtil::get_field("array_int"));
-// auto array_serde_int = data_type_array_int->get_serde();
-// field_map["array_int"] = convert_to_jsonb_field(array_serde_int,
*array_column_int);
-//
-// DataTypePtr data_type_str =
doris::vectorized::DataTypeFactory::instance().create_data_type(
-// TypeIndex::String, false);
-// DataTypePtr data_type_array_str =
-//
std::make_shared<doris::vectorized::DataTypeArray>(data_type_str);
-// auto array_column_str = data_type_array_str->create_column();
-// array_column_str->insert(VariantUtil::get_field("array_str"));
-// auto array_serde_str = data_type_array_str->get_serde();
-// field_map["array_str"] = convert_to_jsonb_field(array_serde_str,
*array_column_str);
-//
-// auto column_int = data_type_int->create_column();
-// column_int->insert(VariantUtil::get_field("int"));
-// auto serde_int = data_type_int->get_serde();
-// field_map["int"] = convert_to_jsonb_field(serde_int, *column_int);
-//
-// // auto column_str = data_type_str->create_column();
-// // column_str->insert(VariantUtil::get_field("string"));
-// // field_map["string"] = convert_string_to_jsonb_field(*column_str);
-// }
-// return field_map[type];
-// }
-//
-// // std::string convert_jsonb_field_to_string(doris::vectorized::Field
jsonb) {
-// // const auto& val = jsonb.get<JsonbField>();
-// // const JsonbValue* json_val =
JsonbDocument::createValue(val.get_value(), val.get_size());
-//
-// // rapidjson::Document doc;
-// // doc.SetObject();
-// // rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
-// // rapidjson::Value json_value;
-// // convert_jsonb_to_rapidjson(*json_val, json_value, allocator);
-// // doc.AddMember("value", json_value, allocator);
-// // rapidjson::StringBuffer buffer;
-// // rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(buffer);
-// // doc.Accept(writer);
-// // return std::string(buffer.GetString());
-// // }
-//
-// std::string convert_field_to_string(doris::vectorized::Field array) {
+#include "vec/columns/column_object.h"
+
+#include <gmock/gmock-more-matchers.h>
+#include <gtest/gtest.h>
+#include <rapidjson/prettywriter.h>
+#include <stdio.h>
+
+#include "runtime/jsonb_value.h"
+#include "testutil/variant_util.h"
+#include "vec/common/string_ref.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_factory.hpp"
+
+using namespace doris::vectorized;
+
+using namespace doris;
+// #define ADD_SUB_COLUMN(key) \
+// varaint->add_sub_column(PathInData(std::string_view(key)), 0);
+
+void convert_field_to_rapidjson(const vectorized::Field& field,
rapidjson::Value& target,
+ rapidjson::Document::AllocatorType& allocator)
{
+ switch (field.get_type()) {
+ case vectorized::Field::Types::Null:
+ target.SetNull();
+ break;
+ case vectorized::Field::Types::Int64:
+ target.SetInt64(field.get<Int64>());
+ break;
+ case vectorized::Field::Types::Float64:
+ target.SetDouble(field.get<Float64>());
+ break;
+ case vectorized::Field::Types::JSONB: {
+ const auto& val = field.get<JsonbField>();
+ JsonbValue* json_val = JsonbDocument::createValue(val.get_value(),
val.get_size());
+ convert_jsonb_to_rapidjson(*json_val, target, allocator);
+ break;
+ }
+ case vectorized::Field::Types::String: {
+ const String& val = field.get<String>();
+ target.SetString(val.data(),
cast_set<rapidjson::SizeType>(val.size()));
+ break;
+ }
+ case vectorized::Field::Types::Array: {
+ const vectorized::Array& array = field.get<Array>();
+ target.SetArray();
+ for (const vectorized::Field& item : array) {
+ rapidjson::Value val;
+ convert_field_to_rapidjson(item, val, allocator);
+ target.PushBack(val, allocator);
+ }
+ break;
+ }
+ case vectorized::Field::Types::VariantMap: {
+ const vectorized::VariantMap& map = field.get<VariantMap>();
+ target.SetObject();
+ for (const auto& item : map) {
+ if (item.second.is_null()) {
+ continue;
+ }
+ rapidjson::Value key;
+ key.SetString(item.first.get_path().data(),
+
cast_set<rapidjson::SizeType>(item.first.get_path().size()));
+ rapidjson::Value val;
+ convert_field_to_rapidjson(item.second, val, allocator);
+ if (val.IsNull() && item.first.empty()) {
+ // skip null value with empty key, indicate the null json
value of root in variant map,
+ // usally padding in nested arrays
+ continue;
+ }
+ target.AddMember(key, val, allocator);
+ }
+ break;
+ }
+ default:
+ throw doris::Exception(ErrorCode::INTERNAL_ERROR, "unkown field type:
{}",
+ field.get_type_name());
+ break;
+ }
+}
+
+void convert_variant_map_to_rapidjson(const vectorized::VariantMap& map,
rapidjson::Value& target,
+ rapidjson::Document::AllocatorType&
allocator) {
+ target.SetObject();
+ for (const auto& item : map) {
+ if (item.second.is_null()) {
+ continue;
+ }
+ rapidjson::Value key;
+ key.SetString(item.first.get_path().data(),
+
cast_set<rapidjson::SizeType>(item.first.get_path().size()));
+ rapidjson::Value val;
+ convert_field_to_rapidjson(item.second, val, allocator);
+ if (val.IsNull() && item.first.empty()) {
+ // skip null value with empty key, indicate the null json value of
root in variant map,
+ // usally padding in nested arrays
+ continue;
+ }
+ target.AddMember(key, val, allocator);
+ }
+}
+
+void convert_array_to_rapidjson(const vectorized::Array& array,
rapidjson::Value& target,
+ rapidjson::Document::AllocatorType& allocator)
{
+ target.SetArray();
+ for (const vectorized::Field& item : array) {
+ rapidjson::Value val;
+ convert_field_to_rapidjson(item, val, allocator);
+ target.PushBack(val, allocator);
+ }
+}
+
+TEST(ColumnVariantTest, insert_try_insert) {
+ auto v = VariantUtil::construct_dst_varint_column();
+ FieldInfo info;
+ info.scalar_type_id = TypeIndex::Nothing;
+ info.num_dimensions = 0;
+ PathInData path("v.f");
+ auto sub = v->get_subcolumn(path);
+ Int64 value = 43;
+ sub->insert(value, info);
+
+ info.num_dimensions = 1;
+ sub->insert(value, info);
+
+ info.num_dimensions = 2;
+ sub->insert(value, info);
+}
+
+TEST(ColumnVariantTest, basic_finalize) {
+ auto variant = VariantUtil::construct_basic_varint_column();
+ // 4. finalize
+
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
+ EXPECT_TRUE(variant->pick_subcolumns_to_sparse_column({}).ok());
+ EXPECT_EQ(variant->size(), 10);
+
+ // check finalized subcolumn
+ // 5 subcolumn + 1 root
+ EXPECT_EQ(variant->subcolumns.size(), 6);
+ for (const auto& column : variant->subcolumns) {
+ if (column->data.is_root) {
+ continue;
+ }
+ EXPECT_EQ(column->data.data.size(), 1);
+ }
+
+ // check sparse column
+ const auto& offsets = variant->serialized_sparse_column_offsets();
+ for (int row = 0; row < 5; ++row) {
+ EXPECT_EQ(offsets[row], 0);
+ }
+ for (int row = 5; row < 10; ++row) {
+ EXPECT_EQ(offsets[row] - offsets[row - 1], 3);
+ }
+}
+
+TEST(ColumnVariantTest, basic_deserialize) {
+ auto variant = VariantUtil::construct_basic_varint_column();
+
+ // 4. finalize
+
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
+ EXPECT_TRUE(variant->pick_subcolumns_to_sparse_column({}).ok());
+ EXPECT_EQ(variant->size(), 10);
+
+ const auto& [path, value] = variant->get_sparse_data_paths_and_values();
+ const auto& offsets = variant->serialized_sparse_column_offsets();
+ for (size_t row = 5; row < 10; ++row) {
+ size_t start = offsets[row - 1];
+ size_t end = offsets[row];
+
+ auto data = path->get_data_at(start);
+ EXPECT_EQ(data, StringRef("v.b.d", 5));
+ auto pair = variant->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(pair.first.get<Int64>(), 30);
+
+ auto data2 = path->get_data_at(start);
+ auto pair2 = variant->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(data2, StringRef("v.c.d", 5));
+ EXPECT_EQ(pair2.first.get<Int64>(), 30);
+
+ auto data3 = path->get_data_at(start);
+ auto pair3 = variant->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(data3, StringRef("v.d.d", 5));
+ EXPECT_EQ(pair3.first.get<String>(), "50");
+ EXPECT_EQ(start, end);
+ }
+}
+
+TEST(ColumnVariantTest, basic_inset_range_from) {
+ auto src = VariantUtil::construct_basic_varint_column();
+ EXPECT_TRUE(src->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
+ EXPECT_TRUE(src->pick_subcolumns_to_sparse_column({}).ok());
+ EXPECT_EQ(src->size(), 10);
+
+ // dst is an empty column, has 5 subcolumn + 1 root
+ auto dst = VariantUtil::construct_dst_varint_column();
+
+ // subcolumn->subcolumn v.b v.f v.e
+ // subcolumn->sparse_column v.a v.c
+ // sparse_column->subcolumn v.b.d v.c.d
+ // sparse_column->sparse_column v.d.d
+ dst->insert_range_from(*src, 0, 10);
+ dst->finalize();
+ EXPECT_EQ(dst->size(), 10);
+
+ // 5 subcolumn
+ EXPECT_EQ(dst->subcolumns.size(), 6);
+ ColumnObject::Subcolumns dst_subcolumns = dst->subcolumns;
+ std::sort(
+ dst_subcolumns.begin(), dst_subcolumns.end(),
+ [](const auto& lhsItem, const auto& rhsItem) { return
lhsItem->path < rhsItem->path; });
+
+ for (const auto& column : dst_subcolumns) {
+ if (column->data.is_root) {
+ continue;
+ }
+ EXPECT_EQ(column->data.data.size(), 1);
+ EXPECT_EQ(column->data.data[0]->size(), 10);
+ if (column->path.get_path().size() == 3) {
+ EXPECT_EQ(column->data.get_non_null_value_size(), 10);
+ } else {
+ EXPECT_EQ(column->path.get_path().size(), 5);
+ EXPECT_EQ(column->data.get_non_null_value_size(), 5);
+ for (size_t row = 0; row != 5; ++row) {
+ EXPECT_TRUE(column->data.data[0]->is_null_at(row));
+ }
+ for (size_t row = 5; row != 10; ++row) {
+ EXPECT_EQ((*column->data.data[0])[row].get<Int64>(), 30);
+ }
+ }
+ }
+
+ // check sparse column
+ const auto& [path, value] = dst->get_sparse_data_paths_and_values();
+ const auto& offsets = dst->serialized_sparse_column_offsets();
+
+ // v.a v.c
+ for (int row = 0; row < 5; ++row) {
+ size_t start = offsets[row - 1];
+ size_t end = offsets[row];
+
+ auto data = path->get_data_at(start);
+ EXPECT_EQ(data, StringRef("v.a", 3));
+ auto pair = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(pair.first.get<Int64>(), 20);
+
+ auto data2 = path->get_data_at(start);
+ EXPECT_EQ(data2, StringRef("v.c", 3));
+ auto pair2 = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(pair2.first.get<Int64>(), 20);
+
+ EXPECT_EQ(start, end);
+ }
+
+ // v.a v.c v.d.d
+ for (int row = 5; row < 10; ++row) {
+ size_t start = offsets[row - 1];
+ size_t end = offsets[row];
+
+ auto data = path->get_data_at(start);
+ EXPECT_EQ(data, StringRef("v.a", 3));
+ auto pair = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(pair.first.get<Int64>(), 20);
+
+ auto data2 = path->get_data_at(start);
+ EXPECT_EQ(data2, StringRef("v.c", 3));
+ auto pair2 = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(pair2.first.get<Int64>(), 20);
+
+ auto data3 = path->get_data_at(start);
+ EXPECT_EQ(data3, StringRef("v.d.d", 5));
+ auto pair3 = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(pair3.first.get<String>(), "50");
+
+ EXPECT_EQ(start, end);
+ }
+}
+
+auto convert_to_jsonb_field(auto serde, auto& column) {
+ vectorized::DataTypeSerDe::FormatOptions options;
+ options.escape_char = '\\';
+ auto tmp_col = ColumnString::create();
+ VectorBufferWriter write_buffer(*tmp_col.get());
+ EXPECT_TRUE(serde->serialize_column_to_json(column, 0, 1, write_buffer,
options).ok());
+
+ write_buffer.commit();
+ auto str_ref = tmp_col->get_data_at(0);
+ Slice data((char*)(str_ref.data), str_ref.size);
+
+ auto jsonb_type =
doris::vectorized::DataTypeFactory::instance().create_data_type(
+ TypeIndex::JSONB, false);
+ auto jsonb_serde = jsonb_type->get_serde();
+ auto jsonb_column = jsonb_type->create_column();
+
+ DataTypeSerDe::FormatOptions format_options;
+ format_options.converted_from_string = true;
+ EXPECT_TRUE(
+ jsonb_serde->deserialize_one_cell_from_json(*jsonb_column, data,
format_options).ok());
+ auto res = jsonb_column->get_data_at(0);
+ return JsonbField(res.data, res.size);
+}
+
+auto convert_string_to_jsonb_field(auto& column) {
+ auto str_ref = column.get_data_at(0);
+ Slice data((char*)(str_ref.data), str_ref.size);
+
+ auto jsonb_type =
doris::vectorized::DataTypeFactory::instance().create_data_type(
+ TypeIndex::JSONB, false);
+ auto jsonb_serde = jsonb_type->get_serde();
+ auto jsonb_column = jsonb_type->create_column();
+ DataTypeSerDe::FormatOptions format_options;
+ format_options.converted_from_string = true;
+ format_options.escape_char = '\\';
+
+ EXPECT_TRUE(
+ jsonb_serde->deserialize_one_cell_from_json(*jsonb_column, data,
format_options).ok());
+ auto res = jsonb_column->get_data_at(0);
+ return JsonbField(res.data, res.size);
+}
+
+doris::vectorized::Field get_jsonb_field(std::string_view type) {
+ static std::unordered_map<std::string_view, doris::vectorized::Field>
field_map;
+ if (field_map.empty()) {
+ DataTypePtr data_type_int =
doris::vectorized::DataTypeFactory::instance().create_data_type(
+ TypeIndex::Int8, false);
+ DataTypePtr data_type_array_int =
+
std::make_shared<doris::vectorized::DataTypeArray>(data_type_int);
+ auto array_column_int = data_type_array_int->create_column();
+ array_column_int->insert(VariantUtil::get_field("array_int"));
+ auto array_serde_int = data_type_array_int->get_serde();
+ field_map["array_int"] = convert_to_jsonb_field(array_serde_int,
*array_column_int);
+
+ DataTypePtr data_type_str =
doris::vectorized::DataTypeFactory::instance().create_data_type(
+ TypeIndex::String, false);
+ DataTypePtr data_type_array_str =
+
std::make_shared<doris::vectorized::DataTypeArray>(data_type_str);
+ auto array_column_str = data_type_array_str->create_column();
+ array_column_str->insert(VariantUtil::get_field("array_str"));
+ auto array_serde_str = data_type_array_str->get_serde();
+ field_map["array_str"] = convert_to_jsonb_field(array_serde_str,
*array_column_str);
+
+ auto column_int = data_type_int->create_column();
+ column_int->insert(VariantUtil::get_field("int"));
+ auto serde_int = data_type_int->get_serde();
+ field_map["int"] = convert_to_jsonb_field(serde_int, *column_int);
+
+ // auto column_str = data_type_str->create_column();
+ // column_str->insert(VariantUtil::get_field("string"));
+ // field_map["string"] = convert_string_to_jsonb_field(*column_str);
+ }
+ return field_map[type];
+}
+
+// std::string convert_jsonb_field_to_string(doris::vectorized::Field jsonb) {
+// const auto& val = jsonb.get<JsonbField>();
+// const JsonbValue* json_val =
JsonbDocument::createValue(val.get_value(), val.get_size());
+
// rapidjson::Document doc;
// doc.SetObject();
// rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
// rapidjson::Value json_value;
-// // DataTypeSerDe::convert_field_to_rapidjson(array, json_value,
allocator);
+// convert_jsonb_to_rapidjson(*json_val, json_value, allocator);
// doc.AddMember("value", json_value, allocator);
// rapidjson::StringBuffer buffer;
// rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(buffer);
// doc.Accept(writer);
// return std::string(buffer.GetString());
// }
-//
-// TEST(ColumnVariantTest, is_null_at) {
-// auto v = VariantUtil::construct_dst_varint_column();
-// PathInData path("v.f");
-// auto sub = v->get_subcolumn(path);
-// std::cout << sub->get_least_common_typeBase()->get_name() << std::endl;
-// EXPECT_TRUE(sub->is_null_at(0));
-//
-// auto v1 = VariantUtil::construct_advanced_varint_column();
-// PathInData path1("v.b.d");
-// auto sub1 = v1->get_subcolumn(path1);
-// EXPECT_TRUE(sub1->is_null_at(2));
-// EXPECT_ANY_THROW(sub1->is_null_at(16));
-// vectorized::Field f;
-// EXPECT_ANY_THROW(sub1->get(16, f));
-// std::cout << sub1->num_rows << std::endl;
-// EXPECT_NO_THROW(sub1->resize(sub1->num_rows));
-//
-// auto [sparse_column_keys, sparse_column_values] =
v1->get_sparse_data_paths_and_values();
-// std::string_view pa("v.a");
-// EXPECT_NO_THROW(
-// sub1->serialize_to_sparse_column(sparse_column_keys, pa,
sparse_column_values, 2));
-// EXPECT_ANY_THROW(
-// sub1->serialize_to_sparse_column(sparse_column_keys, pa,
sparse_column_values, 16));
-// }
-//
-// TEST(ColumnVariantTest, advanced_finalize) {
-// auto variant = VariantUtil::construct_advanced_varint_column();
-//
-// // 4. finalize
-//
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
-// EXPECT_EQ(variant->size(), 15);
-//
-// // check finalized subcolumn
-// // 5 subcolumn + 1 root
-// EXPECT_EQ(variant->subcolumns.size(), 6);
-// for (const auto& column : variant->subcolumns) {
-// if (column->data.is_root) {
-// continue;
-// }
-// EXPECT_EQ(column->data.data.size(), 1);
-// }
-//
-// // check sparse column
-// const auto& offsets = variant->serialized_sparse_column_offsets();
-// for (int row = 0; row < 5; ++row) {
-// EXPECT_EQ(offsets[row] - offsets[row - 1], 0);
-// }
-// for (int row = 5; row < 15; ++row) {
-// EXPECT_EQ(offsets[row] - offsets[row - 1], 3);
-// }
-//
-// {
-// // Test fill_path_column_from_sparse_data
-// auto map = std::make_unique<NullMap>(15, 0);
-// vectorized::ColumnObject::fill_path_column_from_sparse_data(
-// *variant->get_subcolumn({}) /*root*/, map.get(), StringRef
{"array"},
-// variant->get_sparse_column(), 0, 5);
-// vectorized::ColumnObject::fill_path_column_from_sparse_data(
-// *variant->get_subcolumn({}) /*root*/, map.get(), StringRef
{"array"},
-// variant->get_sparse_column(), 5, 15);
-// }
-// }
-//
-// TEST(ColumnVariantTest, advanced_deserialize) {
-// auto variant = VariantUtil::construct_advanced_varint_column();
-//
-// // 4. finalize
-//
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
-// EXPECT_EQ(variant->size(), 15);
-//
-// const auto& [path, value] = variant->get_sparse_data_paths_and_values();
-// const auto& offsets = variant->serialized_sparse_column_offsets();
-// for (size_t row = 5; row < 10; ++row) {
-// size_t start = offsets[row - 1];
-// size_t end = offsets[row];
-//
-// auto data = path->get_data_at(start);
-// auto pair = variant->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(data, StringRef("v.b.d", 5));
-// EXPECT_EQ(convert_field_to_string(pair.first),
-// convert_field_to_string(get_jsonb_field("array_int")));
-//
-// auto data2 = path->get_data_at(start);
-// auto pair2 = variant->deserialize_from_sparse_column(value,
start++);
-// EXPECT_EQ(data2, StringRef("v.c.d", 5));
-// EXPECT_EQ(convert_field_to_string(pair2.first),
-//
convert_field_to_string(VariantUtil::get_field("string")));
-//
-// auto data3 = path->get_data_at(start);
-// auto pair3 = variant->deserialize_from_sparse_column(value,
start++);
-// EXPECT_EQ(data3, StringRef("v.d.d", 5));
-// EXPECT_EQ(convert_field_to_string(pair3.first),
-// convert_field_to_string(get_jsonb_field("array_int")));
-// EXPECT_EQ(start, end);
-// }
-//
-// for (size_t row = 10; row < 15; ++row) {
-// size_t start = offsets[row - 1];
-// size_t end = offsets[row];
-//
-// auto data = path->get_data_at(start);
-// auto pair = variant->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(data, StringRef("v.b.d", 5));
-// EXPECT_EQ(convert_field_to_string(pair.first),
-// convert_field_to_string(get_jsonb_field("array_str")));
-//
-// auto data2 = path->get_data_at(start);
-// auto pair2 = variant->deserialize_from_sparse_column(value,
start++);
-// EXPECT_EQ(data2, StringRef("v.c.d", 5));
-// EXPECT_EQ(convert_field_to_string(pair2.first),
-// convert_field_to_string(get_jsonb_field("int")));
-//
-// auto data3 = path->get_data_at(start);
-// auto pair3 = variant->deserialize_from_sparse_column(value,
start++);
-// EXPECT_EQ(data3, StringRef("v.d.d", 5));
-// EXPECT_EQ(convert_field_to_string(pair3.first),
-// convert_field_to_string(get_jsonb_field("array_str")));
-// EXPECT_EQ(start, end);
-// }
-// }
-//
-// TEST(ColumnVariantTest, advanced_insert_range_from) {
-// auto src = VariantUtil::construct_advanced_varint_column();
-// EXPECT_TRUE(src->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
-// EXPECT_EQ(src->size(), 15);
-//
-// auto dst = VariantUtil::construct_dst_varint_column();
-//
-// // subcolumn->subcolumn v.b v.f v.e
-// // subcolumn->sparse_column v.a v.c
-// // sparse_column->subcolumn v.b.d v.c.d
-// // sparse_column->sparse_column v.d.d
-// dst->insert_range_from(*src, 0, src->size());
-// dst->finalize();
-// EXPECT_EQ(dst->size(), 15);
-//
-// EXPECT_EQ(dst->subcolumns.size(), 6);
-// ColumnObject::Subcolumns dst_subcolumns = dst->subcolumns;
-//
-// std::sort(
-// dst_subcolumns.begin(), dst_subcolumns.end(),
-// [](const auto& lhsItem, const auto& rhsItem) { return
lhsItem->path < rhsItem->path; });
-//
-// // subcolumns
-// for (const auto& column : dst_subcolumns) {
-// if (column->data.is_root) {
-// continue;
-// }
-// EXPECT_EQ(column->data.data.size(), 1);
-// EXPECT_EQ(column->data.data[0]->size(), 15);
-//
-// if (column->path.get_path().size() == 3) {
-// EXPECT_EQ(column->data.get_non_null_value_size(), 15);
-// if (column->path.get_path() == "v.b") {
-// EXPECT_EQ(assert_cast<const
DataTypeNullable*>(column->data.data_types[0].get())
-// ->get_nested_type()
-// ->get_type_id(),
-// TypeIndex::JSONB);
-// }
-// } else if (column->path.get_path().size() == 5) {
-// EXPECT_EQ(column->data.get_non_null_value_size(), 10);
-// EXPECT_EQ(assert_cast<const
DataTypeNullable*>(column->data.data_types[0].get())
-// ->get_nested_type()
-// ->get_type_id(),
-// TypeIndex::JSONB);
-// for (size_t row = 0; row < 5; ++row) {
-// EXPECT_TRUE(column->data.data[0]->is_null_at(row));
-// }
-// }
-// }
-//
-// // sparse columns
-// const auto& [path, value] = dst->get_sparse_data_paths_and_values();
-// const auto& offsets = dst->serialized_sparse_column_offsets();
-//
-// // v.a v.c
-// for (int row = 0; row < 5; ++row) {
-// size_t start = offsets[row - 1];
-// size_t end = offsets[row];
-//
-// auto data = path->get_data_at(start);
-// EXPECT_EQ(data, StringRef("v.a", 3));
-// auto pair = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(pair.first.get<Int64>(), 20);
-//
-// auto data2 = path->get_data_at(start);
-// EXPECT_EQ(data2, StringRef("v.c", 3));
-// auto pair2 = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(convert_field_to_string(pair2.first),
-//
convert_field_to_string(VariantUtil::get_field("array_int")));
-//
-// EXPECT_EQ(start, end);
-// }
-//
-// for (int row = 5; row < 10; ++row) {
-// size_t start = offsets[row - 1];
-// size_t end = offsets[row];
-//
-// auto data = path->get_data_at(start);
-// auto pair = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(data, StringRef("v.a", 3));
-// EXPECT_EQ(pair.first.get<Int64>(), 20);
-//
-// auto data2 = path->get_data_at(start);
-// auto pair2 = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(data2, StringRef("v.c", 3));
-// EXPECT_EQ(convert_field_to_string(pair2.first),
-//
convert_field_to_string(VariantUtil::get_field("array_int")));
-//
-// auto data3 = path->get_data_at(start);
-// auto pair3 = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(data3, StringRef("v.d.d", 5));
-// EXPECT_EQ(convert_field_to_string(pair3.first),
-// convert_field_to_string(get_jsonb_field("array_int")));
-//
-// EXPECT_EQ(start, end);
-// }
-//
-// for (int row = 10; row < 15; ++row) {
-// size_t start = offsets[row - 1];
-// size_t end = offsets[row];
-//
-// auto data = path->get_data_at(start);
-// auto pair = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(data, StringRef("v.a", 3));
-// EXPECT_EQ(pair.first.get<Int64>(), 20);
-//
-// auto data2 = path->get_data_at(start);
-// auto pair2 = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(data2, StringRef("v.c", 3));
-// EXPECT_EQ(convert_field_to_string(pair2.first),
-//
convert_field_to_string(VariantUtil::get_field("array_int")));
-//
-// auto data3 = path->get_data_at(start);
-// auto pair3 = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(data3, StringRef("v.d.d", 5));
-// EXPECT_EQ(convert_field_to_string(pair3.first),
-// convert_field_to_string(get_jsonb_field("array_str")));
-//
-// EXPECT_EQ(start, end);
-// }
-// }
-//
-// TEST(ColumnVariantTest, empty_inset_range_from) {
-// auto src = VariantUtil::construct_varint_column_only_subcolumns();
-// EXPECT_TRUE(src->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
-// EXPECT_EQ(src->size(), 6);
-//
-// // dst is an empty column
-// auto dst = ColumnObject::create(5);
-//
-// // subcolumn->subcolumn v.a v.b v.c v.f v.e
-// dst->insert_range_from(*src, 0, 6);
-// EXPECT_EQ(dst->size(), 6);
-//
-// // 5 subcolumn
-// EXPECT_EQ(dst->subcolumns.size(), 6);
-//
-// for (const auto& column : dst->subcolumns) {
-// if (column->data.is_root) {
-// EXPECT_EQ(column->data.data.size(), 1);
-// EXPECT_EQ(column->data.data[0]->size(), 6);
-// EXPECT_EQ(column->data.get_non_null_value_size(), 1);
-// continue;
-// }
-// EXPECT_EQ(column->data.data.size(), 1);
-// EXPECT_EQ(column->data.data[0]->size(), 6);
-// EXPECT_EQ(column->data.get_non_null_value_size(), 5);
-// }
-//
-// // empty sparse column
-// const auto& [path, value] = dst->get_sparse_data_paths_and_values();
-// const auto& offsets = dst->serialized_sparse_column_offsets();
-// EXPECT_EQ(offsets[4], offsets[-1]);
-// EXPECT_EQ(path->size(), value->size());
-//
-// auto src_contains_seven_subcolumns =
VariantUtil::construct_varint_column_more_subcolumns();
-//
-// EXPECT_TRUE(
-//
src_contains_seven_subcolumns->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
-// EXPECT_EQ(src_contains_seven_subcolumns->size(), 5);
-//
-// // subcolumn->subcolumn v.a v.b v.c v.f v.e
-// // add sprase columns v.s v.x v.y v.z
-// dst->insert_range_from(*src_contains_seven_subcolumns, 0, 5);
-// EXPECT_EQ(dst->size(), 11);
-//
-// // 5 subcolumn
-// EXPECT_EQ(dst->subcolumns.size(), 6);
-//
-// for (int row = 0; row < 6; ++row) {
-// size_t start = offsets[row - 1];
-// size_t end = offsets[row];
-//
-// EXPECT_EQ(start, end);
-// }
-//
-// // v.s v.x v.y v.z
-// for (int row = 6; row < 11; ++row) {
-// size_t start = offsets[row - 1];
-// size_t end = offsets[row];
-//
-// auto data0 = path->get_data_at(start);
-// EXPECT_EQ(data0, StringRef("v.s", 3));
-// auto pair0 = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(convert_field_to_string(pair0.first),
-//
convert_field_to_string(VariantUtil::get_field("string")));
-//
-// auto data = path->get_data_at(start);
-// EXPECT_EQ(data, StringRef("v.x", 3));
-// auto pair = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(pair.first.get<Int16>(),
std::numeric_limits<Int16>::max());
-//
-// auto data2 = path->get_data_at(start);
-// EXPECT_EQ(data2, StringRef("v.y", 3));
-// auto pair2 = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(pair2.first.get<Int32>(),
std::numeric_limits<Int32>::max());
-//
-// auto data3 = path->get_data_at(start);
-// EXPECT_EQ(data3, StringRef("v.z", 3));
-// auto pair3 = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(pair3.first.get<Int64>(),
-//
Int64(static_cast<Int64>(std::numeric_limits<Int32>::max()) + 1));
-//
-// EXPECT_EQ(start, end);
-// }
-//
-// auto src_contains_subcoumns_and_sparse_columns =
VariantUtil::construct_basic_varint_column();
-// EXPECT_TRUE(src_contains_subcoumns_and_sparse_columns
-// ->finalize(ColumnObject::FinalizeMode::WRITE_MODE)
-// .ok());
-// EXPECT_EQ(src_contains_subcoumns_and_sparse_columns->size(), 10);
-//
-// // subcolumn->subcolumn v.a v.b v.c v.f v.e
-// // add sprase columns v.s v.x v.y v.b.d v.c.d v.d.d
-// dst->insert_range_from(*src_contains_subcoumns_and_sparse_columns, 0,
10);
-// EXPECT_EQ(dst->size(), 21);
-//
-// // 5 subcolumn
-// EXPECT_EQ(dst->subcolumns.size(), 6);
-//
-// for (int row = 0; row < 6; ++row) {
-// size_t start = offsets[row - 1];
-// size_t end = offsets[row];
-//
-// EXPECT_EQ(start, end);
-// }
-//
-// // v.x v.y
-// for (int row = 6; row < 11; ++row) {
-// size_t start = offsets[row - 1];
-// size_t end = offsets[row];
-//
-// auto data0 = path->get_data_at(start);
-// EXPECT_EQ(data0, StringRef("v.s", 3));
-// auto pair0 = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(convert_field_to_string(pair0.first),
-//
convert_field_to_string(VariantUtil::get_field("string")));
-//
-// auto data = path->get_data_at(start);
-// EXPECT_EQ(data, StringRef("v.x", 3));
-// auto pair = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(pair.first.get<Int16>(),
std::numeric_limits<Int16>::max());
-//
-// auto data2 = path->get_data_at(start);
-// EXPECT_EQ(data2, StringRef("v.y", 3));
-// auto pair2 = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(pair2.first.get<Int32>(),
std::numeric_limits<Int32>::max());
-//
-// auto data3 = path->get_data_at(start);
-// EXPECT_EQ(data3, StringRef("v.z", 3));
-// auto pair3 = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(pair3.first.get<Int64>(),
-//
Int64(static_cast<Int64>(std::numeric_limits<Int32>::max()) + 1));
-//
-// EXPECT_EQ(start, end);
-// }
-//
-// for (int row = 11; row < 16; ++row) {
-// size_t start = offsets[row - 1];
-// size_t end = offsets[row];
-//
-// EXPECT_EQ(start, end);
-// }
-//
-// //v.b.d v.c.d v.d.d
-// for (int row = 16; row < 21; ++row) {
-// size_t start = offsets[row - 1];
-// size_t end = offsets[row];
-//
-// auto data = path->get_data_at(start);
-// EXPECT_EQ(data, StringRef("v.b.d", 5));
-// auto pair = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(pair.first.get<Int64>(), 30);
-//
-// auto data2 = path->get_data_at(start);
-// auto pair2 = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(data2, StringRef("v.c.d", 5));
-// EXPECT_EQ(pair2.first.get<Int64>(), 30);
-//
-// auto data3 = path->get_data_at(start);
-// auto pair3 = dst->deserialize_from_sparse_column(value, start++);
-// EXPECT_EQ(data3, StringRef("v.d.d", 5));
-// EXPECT_EQ(pair3.first.get<String>(), "50");
-// EXPECT_EQ(start, end);
-// }
-// }
\ No newline at end of file
+
+std::string convert_field_to_string(doris::vectorized::Field array) {
+ rapidjson::Document doc;
+ doc.SetObject();
+ rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
+ rapidjson::Value json_value;
+ // DataTypeSerDe::convert_field_to_rapidjson(array, json_value, allocator);
+ doc.AddMember("value", json_value, allocator);
+ rapidjson::StringBuffer buffer;
+ rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(buffer);
+ doc.Accept(writer);
+ return std::string(buffer.GetString());
+}
+
+TEST(ColumnVariantTest, is_null_at) {
+ auto v = VariantUtil::construct_dst_varint_column();
+ PathInData path("v.f");
+ auto sub = v->get_subcolumn(path);
+ std::cout << sub->get_least_common_typeBase()->get_name() << std::endl;
+ EXPECT_TRUE(sub->is_null_at(0));
+
+ auto v1 = VariantUtil::construct_advanced_varint_column();
+ PathInData path1("v.b.d");
+ auto sub1 = v1->get_subcolumn(path1);
+ EXPECT_TRUE(sub1->is_null_at(2));
+ EXPECT_ANY_THROW(sub1->is_null_at(16));
+ vectorized::Field f;
+ EXPECT_ANY_THROW(sub1->get(16, f));
+ std::cout << sub1->num_rows << std::endl;
+ EXPECT_NO_THROW(sub1->resize(sub1->num_rows));
+
+ auto [sparse_column_keys, sparse_column_values] =
v1->get_sparse_data_paths_and_values();
+ std::string_view pa("v.a");
+ EXPECT_NO_THROW(
+ sub1->serialize_to_sparse_column(sparse_column_keys, pa,
sparse_column_values, 2));
+ EXPECT_ANY_THROW(
+ sub1->serialize_to_sparse_column(sparse_column_keys, pa,
sparse_column_values, 16));
+}
+
+TEST(ColumnVariantTest, advanced_finalize) {
+ auto variant = VariantUtil::construct_advanced_varint_column();
+
+ // 4. finalize
+
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
+ EXPECT_TRUE(variant->pick_subcolumns_to_sparse_column({}).ok());
+ EXPECT_EQ(variant->size(), 15);
+
+ // check finalized subcolumn
+ // 5 subcolumn + 1 root
+ EXPECT_EQ(variant->subcolumns.size(), 6);
+ for (const auto& column : variant->subcolumns) {
+ if (column->data.is_root) {
+ continue;
+ }
+ EXPECT_EQ(column->data.data.size(), 1);
+ }
+
+ // check sparse column
+ const auto& offsets = variant->serialized_sparse_column_offsets();
+ for (int row = 0; row < 5; ++row) {
+ EXPECT_EQ(offsets[row] - offsets[row - 1], 0);
+ }
+ for (int row = 5; row < 15; ++row) {
+ EXPECT_EQ(offsets[row] - offsets[row - 1], 3);
+ }
+
+ {
+ // Test fill_path_column_from_sparse_data
+ auto map = std::make_unique<NullMap>(15, 0);
+ vectorized::ColumnObject::fill_path_column_from_sparse_data(
+ *variant->get_subcolumn({}) /*root*/, map.get(), StringRef
{"array"},
+ variant->get_sparse_column(), 0, 5);
+ vectorized::ColumnObject::fill_path_column_from_sparse_data(
+ *variant->get_subcolumn({}) /*root*/, map.get(), StringRef
{"array"},
+ variant->get_sparse_column(), 5, 15);
+ }
+}
+
+TEST(ColumnVariantTest, advanced_deserialize) {
+ auto variant = VariantUtil::construct_advanced_varint_column();
+
+ // 4. finalize
+
EXPECT_TRUE(variant->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
+ EXPECT_TRUE(variant->pick_subcolumns_to_sparse_column({}).ok());
+ EXPECT_EQ(variant->size(), 15);
+
+ const auto& [path, value] = variant->get_sparse_data_paths_and_values();
+ const auto& offsets = variant->serialized_sparse_column_offsets();
+ for (size_t row = 5; row < 10; ++row) {
+ size_t start = offsets[row - 1];
+ size_t end = offsets[row];
+
+ auto data = path->get_data_at(start);
+ auto pair = variant->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(data, StringRef("v.b.d", 5));
+ EXPECT_EQ(convert_field_to_string(pair.first),
+ convert_field_to_string(get_jsonb_field("array_int")));
+
+ auto data2 = path->get_data_at(start);
+ auto pair2 = variant->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(data2, StringRef("v.c.d", 5));
+ EXPECT_EQ(convert_field_to_string(pair2.first),
+ convert_field_to_string(VariantUtil::get_field("string")));
+
+ auto data3 = path->get_data_at(start);
+ auto pair3 = variant->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(data3, StringRef("v.d.d", 5));
+ EXPECT_EQ(convert_field_to_string(pair3.first),
+ convert_field_to_string(get_jsonb_field("array_int")));
+ EXPECT_EQ(start, end);
+ }
+
+ for (size_t row = 10; row < 15; ++row) {
+ size_t start = offsets[row - 1];
+ size_t end = offsets[row];
+
+ auto data = path->get_data_at(start);
+ auto pair = variant->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(data, StringRef("v.b.d", 5));
+ EXPECT_EQ(convert_field_to_string(pair.first),
+ convert_field_to_string(get_jsonb_field("array_str")));
+
+ auto data2 = path->get_data_at(start);
+ auto pair2 = variant->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(data2, StringRef("v.c.d", 5));
+ EXPECT_EQ(convert_field_to_string(pair2.first),
+ convert_field_to_string(get_jsonb_field("int")));
+
+ auto data3 = path->get_data_at(start);
+ auto pair3 = variant->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(data3, StringRef("v.d.d", 5));
+ EXPECT_EQ(convert_field_to_string(pair3.first),
+ convert_field_to_string(get_jsonb_field("array_str")));
+ EXPECT_EQ(start, end);
+ }
+}
+
+TEST(ColumnVariantTest, advanced_insert_range_from) {
+ auto src = VariantUtil::construct_advanced_varint_column();
+ EXPECT_TRUE(src->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
+ EXPECT_TRUE(src->pick_subcolumns_to_sparse_column({}).ok());
+ EXPECT_EQ(src->size(), 15);
+
+ auto dst = VariantUtil::construct_dst_varint_column();
+
+ // subcolumn->subcolumn v.b v.f v.e
+ // subcolumn->sparse_column v.a v.c
+ // sparse_column->subcolumn v.b.d v.c.d
+ // sparse_column->sparse_column v.d.d
+ dst->insert_range_from(*src, 0, src->size());
+ dst->finalize();
+ EXPECT_EQ(dst->size(), 15);
+
+ EXPECT_EQ(dst->subcolumns.size(), 6);
+ ColumnObject::Subcolumns dst_subcolumns = dst->subcolumns;
+
+ std::sort(
+ dst_subcolumns.begin(), dst_subcolumns.end(),
+ [](const auto& lhsItem, const auto& rhsItem) { return
lhsItem->path < rhsItem->path; });
+
+ // subcolumns
+ for (const auto& column : dst_subcolumns) {
+ if (column->data.is_root) {
+ continue;
+ }
+ EXPECT_EQ(column->data.data.size(), 1);
+ EXPECT_EQ(column->data.data[0]->size(), 15);
+
+ if (column->path.get_path().size() == 3) {
+ EXPECT_EQ(column->data.get_non_null_value_size(), 15);
+ if (column->path.get_path() == "v.b") {
+ EXPECT_EQ(assert_cast<const
DataTypeNullable*>(column->data.data_types[0].get())
+ ->get_nested_type()
+ ->get_type_id(),
+ TypeIndex::JSONB);
+ }
+ } else if (column->path.get_path().size() == 5) {
+ EXPECT_EQ(column->data.get_non_null_value_size(), 10);
+ EXPECT_EQ(assert_cast<const
DataTypeNullable*>(column->data.data_types[0].get())
+ ->get_nested_type()
+ ->get_type_id(),
+ TypeIndex::JSONB);
+ for (size_t row = 0; row < 5; ++row) {
+ EXPECT_TRUE(column->data.data[0]->is_null_at(row));
+ }
+ }
+ }
+
+ // sparse columns
+ const auto& [path, value] = dst->get_sparse_data_paths_and_values();
+ const auto& offsets = dst->serialized_sparse_column_offsets();
+
+ // v.a v.c
+ for (int row = 0; row < 5; ++row) {
+ size_t start = offsets[row - 1];
+ size_t end = offsets[row];
+
+ auto data = path->get_data_at(start);
+ EXPECT_EQ(data, StringRef("v.a", 3));
+ auto pair = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(pair.first.get<Int64>(), 20);
+
+ auto data2 = path->get_data_at(start);
+ EXPECT_EQ(data2, StringRef("v.c", 3));
+ auto pair2 = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(convert_field_to_string(pair2.first),
+
convert_field_to_string(VariantUtil::get_field("array_int")));
+
+ EXPECT_EQ(start, end);
+ }
+
+ for (int row = 5; row < 10; ++row) {
+ size_t start = offsets[row - 1];
+ size_t end = offsets[row];
+
+ auto data = path->get_data_at(start);
+ auto pair = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(data, StringRef("v.a", 3));
+ EXPECT_EQ(pair.first.get<Int64>(), 20);
+
+ auto data2 = path->get_data_at(start);
+ auto pair2 = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(data2, StringRef("v.c", 3));
+ EXPECT_EQ(convert_field_to_string(pair2.first),
+
convert_field_to_string(VariantUtil::get_field("array_int")));
+
+ auto data3 = path->get_data_at(start);
+ auto pair3 = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(data3, StringRef("v.d.d", 5));
+ EXPECT_EQ(convert_field_to_string(pair3.first),
+ convert_field_to_string(get_jsonb_field("array_int")));
+
+ EXPECT_EQ(start, end);
+ }
+
+ for (int row = 10; row < 15; ++row) {
+ size_t start = offsets[row - 1];
+ size_t end = offsets[row];
+
+ auto data = path->get_data_at(start);
+ auto pair = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(data, StringRef("v.a", 3));
+ EXPECT_EQ(pair.first.get<Int64>(), 20);
+
+ auto data2 = path->get_data_at(start);
+ auto pair2 = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(data2, StringRef("v.c", 3));
+ EXPECT_EQ(convert_field_to_string(pair2.first),
+
convert_field_to_string(VariantUtil::get_field("array_int")));
+
+ auto data3 = path->get_data_at(start);
+ auto pair3 = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(data3, StringRef("v.d.d", 5));
+ EXPECT_EQ(convert_field_to_string(pair3.first),
+ convert_field_to_string(get_jsonb_field("array_str")));
+
+ EXPECT_EQ(start, end);
+ }
+}
+
+TEST(ColumnVariantTest, empty_inset_range_from) {
+ auto src = VariantUtil::construct_varint_column_only_subcolumns();
+ EXPECT_TRUE(src->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
+ EXPECT_TRUE(src->pick_subcolumns_to_sparse_column({}).ok());
+ EXPECT_EQ(src->size(), 6);
+
+ // dst is an empty column
+ auto dst = ColumnObject::create(5);
+
+ // subcolumn->subcolumn v.a v.b v.c v.f v.e
+ dst->insert_range_from(*src, 0, 6);
+ EXPECT_EQ(dst->size(), 6);
+
+ // 5 subcolumn
+ EXPECT_EQ(dst->subcolumns.size(), 6);
+
+ for (const auto& column : dst->subcolumns) {
+ if (column->data.is_root) {
+ EXPECT_EQ(column->data.data.size(), 1);
+ EXPECT_EQ(column->data.data[0]->size(), 6);
+ EXPECT_EQ(column->data.get_non_null_value_size(), 1);
+ continue;
+ }
+ EXPECT_EQ(column->data.data.size(), 1);
+ EXPECT_EQ(column->data.data[0]->size(), 6);
+ EXPECT_EQ(column->data.get_non_null_value_size(), 5);
+ }
+
+ // empty sparse column
+ const auto& [path, value] = dst->get_sparse_data_paths_and_values();
+ const auto& offsets = dst->serialized_sparse_column_offsets();
+ EXPECT_EQ(offsets[4], offsets[-1]);
+ EXPECT_EQ(path->size(), value->size());
+
+ auto src_contains_seven_subcolumns =
VariantUtil::construct_varint_column_more_subcolumns();
+
+ EXPECT_TRUE(
+
src_contains_seven_subcolumns->finalize(ColumnObject::FinalizeMode::WRITE_MODE).ok());
+
EXPECT_TRUE(src_contains_seven_subcolumns->pick_subcolumns_to_sparse_column({}).ok());
+ EXPECT_EQ(src_contains_seven_subcolumns->size(), 5);
+
+ // subcolumn->subcolumn v.a v.b v.c v.f v.e
+ // add sprase columns v.s v.x v.y v.z
+ dst->insert_range_from(*src_contains_seven_subcolumns, 0, 5);
+ EXPECT_EQ(dst->size(), 11);
+
+ // 5 subcolumn
+ EXPECT_EQ(dst->subcolumns.size(), 6);
+
+ for (int row = 0; row < 6; ++row) {
+ size_t start = offsets[row - 1];
+ size_t end = offsets[row];
+
+ EXPECT_EQ(start, end);
+ }
+
+ // v.s v.x v.y v.z
+ for (int row = 6; row < 11; ++row) {
+ size_t start = offsets[row - 1];
+ size_t end = offsets[row];
+
+ auto data0 = path->get_data_at(start);
+ EXPECT_EQ(data0, StringRef("v.s", 3));
+ auto pair0 = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(convert_field_to_string(pair0.first),
+ convert_field_to_string(VariantUtil::get_field("string")));
+
+ auto data = path->get_data_at(start);
+ EXPECT_EQ(data, StringRef("v.x", 3));
+ auto pair = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(pair.first.get<Int16>(), std::numeric_limits<Int16>::max());
+
+ auto data2 = path->get_data_at(start);
+ EXPECT_EQ(data2, StringRef("v.y", 3));
+ auto pair2 = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(pair2.first.get<Int32>(), std::numeric_limits<Int32>::max());
+
+ auto data3 = path->get_data_at(start);
+ EXPECT_EQ(data3, StringRef("v.z", 3));
+ auto pair3 = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(pair3.first.get<Int64>(),
+ Int64(static_cast<Int64>(std::numeric_limits<Int32>::max())
+ 1));
+
+ EXPECT_EQ(start, end);
+ }
+
+ auto src_contains_subcoumns_and_sparse_columns =
VariantUtil::construct_basic_varint_column();
+ EXPECT_TRUE(src_contains_subcoumns_and_sparse_columns
+ ->finalize(ColumnObject::FinalizeMode::WRITE_MODE)
+ .ok());
+ EXPECT_TRUE(
+
src_contains_subcoumns_and_sparse_columns->pick_subcolumns_to_sparse_column({}).ok());
+ EXPECT_EQ(src_contains_subcoumns_and_sparse_columns->size(), 10);
+
+ // subcolumn->subcolumn v.a v.b v.c v.f v.e
+ // add sprase columns v.s v.x v.y v.b.d v.c.d v.d.d
+ dst->insert_range_from(*src_contains_subcoumns_and_sparse_columns, 0, 10);
+ EXPECT_EQ(dst->size(), 21);
+
+ // 5 subcolumn
+ EXPECT_EQ(dst->subcolumns.size(), 6);
+
+ for (int row = 0; row < 6; ++row) {
+ size_t start = offsets[row - 1];
+ size_t end = offsets[row];
+
+ EXPECT_EQ(start, end);
+ }
+
+ // v.x v.y
+ for (int row = 6; row < 11; ++row) {
+ size_t start = offsets[row - 1];
+ size_t end = offsets[row];
+
+ auto data0 = path->get_data_at(start);
+ EXPECT_EQ(data0, StringRef("v.s", 3));
+ auto pair0 = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(convert_field_to_string(pair0.first),
+ convert_field_to_string(VariantUtil::get_field("string")));
+
+ auto data = path->get_data_at(start);
+ EXPECT_EQ(data, StringRef("v.x", 3));
+ auto pair = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(pair.first.get<Int16>(), std::numeric_limits<Int16>::max());
+
+ auto data2 = path->get_data_at(start);
+ EXPECT_EQ(data2, StringRef("v.y", 3));
+ auto pair2 = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(pair2.first.get<Int32>(), std::numeric_limits<Int32>::max());
+
+ auto data3 = path->get_data_at(start);
+ EXPECT_EQ(data3, StringRef("v.z", 3));
+ auto pair3 = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(pair3.first.get<Int64>(),
+ Int64(static_cast<Int64>(std::numeric_limits<Int32>::max())
+ 1));
+
+ EXPECT_EQ(start, end);
+ }
+
+ for (int row = 11; row < 16; ++row) {
+ size_t start = offsets[row - 1];
+ size_t end = offsets[row];
+
+ EXPECT_EQ(start, end);
+ }
+
+ //v.b.d v.c.d v.d.d
+ for (int row = 16; row < 21; ++row) {
+ size_t start = offsets[row - 1];
+ size_t end = offsets[row];
+
+ auto data = path->get_data_at(start);
+ EXPECT_EQ(data, StringRef("v.b.d", 5));
+ auto pair = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(pair.first.get<Int64>(), 30);
+
+ auto data2 = path->get_data_at(start);
+ auto pair2 = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(data2, StringRef("v.c.d", 5));
+ EXPECT_EQ(pair2.first.get<Int64>(), 30);
+
+ auto data3 = path->get_data_at(start);
+ auto pair3 = dst->deserialize_from_sparse_column(value, start++);
+ EXPECT_EQ(data3, StringRef("v.d.d", 5));
+ EXPECT_EQ(pair3.first.get<String>(), "50");
+ EXPECT_EQ(start, end);
+ }
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]