This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 2c22c282808 branch-3.1: [refactor](variant) use read type to insert
default when compaction #56066 #56146 (#56148)
2c22c282808 is described below
commit 2c22c282808d426e0cc366c09b95647c01e40dfe
Author: Sun Chenyang <[email protected]>
AuthorDate: Thu Sep 25 17:08:00 2025 +0800
branch-3.1: [refactor](variant) use read type to insert default when
compaction #56066 #56146 (#56148)
pick from master #56066 #56146
---
be/src/olap/rowset/segment_v2/segment.cpp | 29 ++++++++-----
be/test/vec/common/schema_util_rowset_test.cpp | 60 +++++++++++++++++++++++++-
2 files changed, 77 insertions(+), 12 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index 548ac7935ea..cf3eb7a9ab2 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -604,15 +604,26 @@ vectorized::DataTypePtr Segment::get_data_type_of(const
TabletColumn& column,
// Case 1: Node not found for the given path within the variant reader.
// If relative_path is empty, it means the original path pointed to the
root
// of the variant column itself. We should return the Variant type.
+ // If node is nullptr, it means the path is not exist in the variant sub
columns.
if (node == nullptr || relative_path.empty()) {
+ // nested subcolumn is not exist in the sparse column
if (column.is_nested_subcolumn()) {
return
vectorized::DataTypeFactory::instance().create_data_type(column);
}
- return column.is_nullable()
- ?
vectorized::make_nullable(std::make_shared<vectorized::DataTypeObject>(
- column.variant_max_subcolumns_count()))
- : std::make_shared<vectorized::DataTypeObject>(
- column.variant_max_subcolumns_count());
+
+ // when the path is in the sparse column or exceeded the limit, return
the variant type.
+ if (variant_reader->exist_in_sparse_column(relative_path) ||
+ variant_reader->is_exceeded_sparse_column_limit()) {
+ return column.is_nullable()
+ ?
vectorized::make_nullable(std::make_shared<vectorized::DataTypeObject>(
+ column.variant_max_subcolumns_count()))
+ : std::make_shared<vectorized::DataTypeObject>(
+ column.variant_max_subcolumns_count());
+ }
+ // now, path is not in this segment, return the default type from
column.
+ else {
+ return
vectorized::DataTypeFactory::instance().create_data_type(column);
+ }
}
bool exist_in_sparse =
variant_reader->exist_in_sparse_column(relative_path);
@@ -630,11 +641,9 @@ vectorized::DataTypePtr Segment::get_data_type_of(const
TabletColumn& column,
!variant_reader->is_exceeded_sparse_column_limit())) {
return node->data.file_column_type;
}
- return column.is_nullable()
- ?
vectorized::make_nullable(std::make_shared<vectorized::DataTypeObject>(
- column.variant_max_subcolumns_count()))
- : std::make_shared<vectorized::DataTypeObject>(
- column.variant_max_subcolumns_count());
+
+ // not the compaction read, return the default type from column.
+ return vectorized::DataTypeFactory::instance().create_data_type(column);
}
Status Segment::_create_column_meta_once(OlapReaderStatistics* stats) {
diff --git a/be/test/vec/common/schema_util_rowset_test.cpp
b/be/test/vec/common/schema_util_rowset_test.cpp
index 7d2ac9f1912..5dbc03ee8d2 100644
--- a/be/test/vec/common/schema_util_rowset_test.cpp
+++ b/be/test/vec/common/schema_util_rowset_test.cpp
@@ -306,6 +306,7 @@ TEST_F(SchemaUtilRowsetTest,
collect_path_stats_and_get_compaction_schema) {
// 2. create tablet
TabletMetaSharedPtr tablet_meta(new TabletMeta(tablet_schema));
+ tablet_meta->_tablet_id = 12345;
_tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta,
_data_dir.get());
EXPECT_TRUE(_tablet->init().ok());
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
@@ -378,7 +379,7 @@ TEST_F(SchemaUtilRowsetTest,
collect_path_stats_and_get_compaction_schema) {
auto create_rowset_writer_context = [this](TabletSchemaSPtr tablet_schema,
const SegmentsOverlapPB&
overlap,
uint32_t max_rows_per_segment,
Version version) {
- static int64_t inc_id = 1000;
+ static int64_t inc_id = 12345;
RowsetWriterContext rowset_writer_context;
RowsetId rowset_id;
rowset_id.init(inc_id);
@@ -386,7 +387,7 @@ TEST_F(SchemaUtilRowsetTest,
collect_path_stats_and_get_compaction_schema) {
rowset_writer_context.rowset_type = BETA_ROWSET;
rowset_writer_context.rowset_state = VISIBLE;
rowset_writer_context.tablet_schema = tablet_schema;
- rowset_writer_context.tablet_path = _absolute_dir + "/../";
+ rowset_writer_context.tablet_path = _tablet->tablet_path();
rowset_writer_context.version = version;
rowset_writer_context.segments_overlap = overlap;
rowset_writer_context.max_rows_per_segment = max_rows_per_segment;
@@ -411,6 +412,61 @@ TEST_F(SchemaUtilRowsetTest,
collect_path_stats_and_get_compaction_schema) {
// 7. check output rowset
EXPECT_TRUE(schema_util::check_path_stats(rowsets, out_rowset,
_tablet).ok());
+
+ // get_data_type_of check
+ auto file_path =
+ local_segment_path(_tablet->tablet_path(),
out_rowset->rowset_id().to_string(), 0);
+ OlapReaderStatistics olap_reader_stats;
+ std::shared_ptr<Segment> segment;
+ st = Segment::open(io::global_local_filesystem(), file_path,
_tablet->tablet_id(), 0,
+ out_rowset->rowset_id(), out_rowset->tablet_schema(),
+ io::FileReaderOptions(), &segment,
InvertedIndexFileInfo(),
+ &olap_reader_stats);
+ EXPECT_TRUE(st.ok()) << st.msg();
+ TabletColumn subcolumn_in_sparse;
+ subcolumn_in_sparse.set_name("v1.key3");
+ subcolumn_in_sparse.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ subcolumn_in_sparse.set_unique_id(-1);
+ subcolumn_in_sparse.set_parent_unique_id(1);
+ subcolumn_in_sparse.set_path_info(PathInData("v1.key3"));
+ subcolumn_in_sparse.set_variant_max_subcolumns_count(3);
+ subcolumn_in_sparse.set_is_nullable(true);
+ st = segment->_create_column_meta_once(&olap_reader_stats);
+ EXPECT_TRUE(st.ok()) << st.msg();
+
+ // key3 is in the sparse column, return variant type
+ auto data_type = segment->get_data_type_of(subcolumn_in_sparse, true);
+ EXPECT_TRUE(data_type != nullptr);
+ EXPECT_TRUE(data_type->get_storage_field_type() ==
FieldType::OLAP_FIELD_TYPE_VARIANT);
+
+ subcolumn_in_sparse.set_name("v1.keya");
+ subcolumn_in_sparse.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ subcolumn_in_sparse.set_path_info(PathInData("v1.keya"));
+
+ // keya is not in the segment, return string type;
+ data_type = segment->get_data_type_of(subcolumn_in_sparse, true);
+ EXPECT_TRUE(data_type != nullptr);
+ EXPECT_TRUE(data_type->get_storage_field_type() ==
FieldType::OLAP_FIELD_TYPE_STRING);
+
+ subcolumn_in_sparse.set_name("v1.keyb");
+ subcolumn_in_sparse.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+ subcolumn_in_sparse.set_path_info(PathInData("v1.keyb"));
+ subcolumn_in_sparse._column_path->has_nested = true;
+
+ // keyb has nested part, return int type;
+ data_type = segment->get_data_type_of(subcolumn_in_sparse, true);
+ EXPECT_TRUE(data_type != nullptr);
+ EXPECT_TRUE(data_type->get_storage_field_type() ==
FieldType::OLAP_FIELD_TYPE_INT);
+
+ // key1 is in the subcolumns, return string type;
+ subcolumn_in_sparse.set_name("v1.key1");
+ subcolumn_in_sparse.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ subcolumn_in_sparse.set_path_info(PathInData("v1.key1"));
+ data_type = segment->get_data_type_of(subcolumn_in_sparse, true);
+ EXPECT_TRUE(data_type != nullptr);
+ EXPECT_TRUE(data_type->get_storage_field_type() ==
FieldType::OLAP_FIELD_TYPE_STRING);
+
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
}
TabletSchemaSPtr create_compaction_schema_common(StorageEngine* _engine_ref,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]