This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 2c22c282808 branch-3.1: [refactor](variant) use read type to insert 
default when compaction #56066 #56146 (#56148)
2c22c282808 is described below

commit 2c22c282808d426e0cc366c09b95647c01e40dfe
Author: Sun Chenyang <[email protected]>
AuthorDate: Thu Sep 25 17:08:00 2025 +0800

    branch-3.1: [refactor](variant) use read type to insert default when 
compaction #56066 #56146 (#56148)
    
    pick from master #56066 #56146
---
 be/src/olap/rowset/segment_v2/segment.cpp      | 29 ++++++++-----
 be/test/vec/common/schema_util_rowset_test.cpp | 60 +++++++++++++++++++++++++-
 2 files changed, 77 insertions(+), 12 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index 548ac7935ea..cf3eb7a9ab2 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -604,15 +604,26 @@ vectorized::DataTypePtr Segment::get_data_type_of(const 
TabletColumn& column,
     // Case 1: Node not found for the given path within the variant reader.
     // If relative_path is empty, it means the original path pointed to the 
root
     // of the variant column itself. We should return the Variant type.
+    // If node is nullptr, it means the path is not exist in the variant sub 
columns.
     if (node == nullptr || relative_path.empty()) {
+        // nested subcolumn is not exist in the sparse column
         if (column.is_nested_subcolumn()) {
             return 
vectorized::DataTypeFactory::instance().create_data_type(column);
         }
-        return column.is_nullable()
-                       ? 
vectorized::make_nullable(std::make_shared<vectorized::DataTypeObject>(
-                                 column.variant_max_subcolumns_count()))
-                       : std::make_shared<vectorized::DataTypeObject>(
-                                 column.variant_max_subcolumns_count());
+
+        // when the path is in the sparse column or exceeded the limit, return 
the variant type.
+        if (variant_reader->exist_in_sparse_column(relative_path) ||
+            variant_reader->is_exceeded_sparse_column_limit()) {
+            return column.is_nullable()
+                           ? 
vectorized::make_nullable(std::make_shared<vectorized::DataTypeObject>(
+                                     column.variant_max_subcolumns_count()))
+                           : std::make_shared<vectorized::DataTypeObject>(
+                                     column.variant_max_subcolumns_count());
+        }
+        // now, path is not in this segment, return the default type from 
column.
+        else {
+            return 
vectorized::DataTypeFactory::instance().create_data_type(column);
+        }
     }
 
     bool exist_in_sparse = 
variant_reader->exist_in_sparse_column(relative_path);
@@ -630,11 +641,9 @@ vectorized::DataTypePtr Segment::get_data_type_of(const 
TabletColumn& column,
                              
!variant_reader->is_exceeded_sparse_column_limit())) {
         return node->data.file_column_type;
     }
-    return column.is_nullable()
-                   ? 
vectorized::make_nullable(std::make_shared<vectorized::DataTypeObject>(
-                             column.variant_max_subcolumns_count()))
-                   : std::make_shared<vectorized::DataTypeObject>(
-                             column.variant_max_subcolumns_count());
+
+    // not the compaction read, return the default type from column.
+    return vectorized::DataTypeFactory::instance().create_data_type(column);
 }
 
 Status Segment::_create_column_meta_once(OlapReaderStatistics* stats) {
diff --git a/be/test/vec/common/schema_util_rowset_test.cpp 
b/be/test/vec/common/schema_util_rowset_test.cpp
index 7d2ac9f1912..5dbc03ee8d2 100644
--- a/be/test/vec/common/schema_util_rowset_test.cpp
+++ b/be/test/vec/common/schema_util_rowset_test.cpp
@@ -306,6 +306,7 @@ TEST_F(SchemaUtilRowsetTest, 
collect_path_stats_and_get_compaction_schema) {
 
     // 2. create tablet
     TabletMetaSharedPtr tablet_meta(new TabletMeta(tablet_schema));
+    tablet_meta->_tablet_id = 12345;
     _tablet = std::make_shared<Tablet>(*_engine_ref, tablet_meta, 
_data_dir.get());
     EXPECT_TRUE(_tablet->init().ok());
     
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
@@ -378,7 +379,7 @@ TEST_F(SchemaUtilRowsetTest, 
collect_path_stats_and_get_compaction_schema) {
     auto create_rowset_writer_context = [this](TabletSchemaSPtr tablet_schema,
                                                const SegmentsOverlapPB& 
overlap,
                                                uint32_t max_rows_per_segment, 
Version version) {
-        static int64_t inc_id = 1000;
+        static int64_t inc_id = 12345;
         RowsetWriterContext rowset_writer_context;
         RowsetId rowset_id;
         rowset_id.init(inc_id);
@@ -386,7 +387,7 @@ TEST_F(SchemaUtilRowsetTest, 
collect_path_stats_and_get_compaction_schema) {
         rowset_writer_context.rowset_type = BETA_ROWSET;
         rowset_writer_context.rowset_state = VISIBLE;
         rowset_writer_context.tablet_schema = tablet_schema;
-        rowset_writer_context.tablet_path = _absolute_dir + "/../";
+        rowset_writer_context.tablet_path = _tablet->tablet_path();
         rowset_writer_context.version = version;
         rowset_writer_context.segments_overlap = overlap;
         rowset_writer_context.max_rows_per_segment = max_rows_per_segment;
@@ -411,6 +412,61 @@ TEST_F(SchemaUtilRowsetTest, 
collect_path_stats_and_get_compaction_schema) {
 
     // 7. check output rowset
     EXPECT_TRUE(schema_util::check_path_stats(rowsets, out_rowset, 
_tablet).ok());
+
+    // get_data_type_of check
+    auto file_path =
+            local_segment_path(_tablet->tablet_path(), 
out_rowset->rowset_id().to_string(), 0);
+    OlapReaderStatistics olap_reader_stats;
+    std::shared_ptr<Segment> segment;
+    st = Segment::open(io::global_local_filesystem(), file_path, 
_tablet->tablet_id(), 0,
+                       out_rowset->rowset_id(), out_rowset->tablet_schema(),
+                       io::FileReaderOptions(), &segment, 
InvertedIndexFileInfo(),
+                       &olap_reader_stats);
+    EXPECT_TRUE(st.ok()) << st.msg();
+    TabletColumn subcolumn_in_sparse;
+    subcolumn_in_sparse.set_name("v1.key3");
+    subcolumn_in_sparse.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    subcolumn_in_sparse.set_unique_id(-1);
+    subcolumn_in_sparse.set_parent_unique_id(1);
+    subcolumn_in_sparse.set_path_info(PathInData("v1.key3"));
+    subcolumn_in_sparse.set_variant_max_subcolumns_count(3);
+    subcolumn_in_sparse.set_is_nullable(true);
+    st = segment->_create_column_meta_once(&olap_reader_stats);
+    EXPECT_TRUE(st.ok()) << st.msg();
+
+    // key3 is in the sparse column, return variant type
+    auto data_type = segment->get_data_type_of(subcolumn_in_sparse, true);
+    EXPECT_TRUE(data_type != nullptr);
+    EXPECT_TRUE(data_type->get_storage_field_type() == 
FieldType::OLAP_FIELD_TYPE_VARIANT);
+
+    subcolumn_in_sparse.set_name("v1.keya");
+    subcolumn_in_sparse.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    subcolumn_in_sparse.set_path_info(PathInData("v1.keya"));
+
+    // keya is not in the segment, return string type;
+    data_type = segment->get_data_type_of(subcolumn_in_sparse, true);
+    EXPECT_TRUE(data_type != nullptr);
+    EXPECT_TRUE(data_type->get_storage_field_type() == 
FieldType::OLAP_FIELD_TYPE_STRING);
+
+    subcolumn_in_sparse.set_name("v1.keyb");
+    subcolumn_in_sparse.set_type(FieldType::OLAP_FIELD_TYPE_INT);
+    subcolumn_in_sparse.set_path_info(PathInData("v1.keyb"));
+    subcolumn_in_sparse._column_path->has_nested = true;
+
+    // keyb has nested part, return int type;
+    data_type = segment->get_data_type_of(subcolumn_in_sparse, true);
+    EXPECT_TRUE(data_type != nullptr);
+    EXPECT_TRUE(data_type->get_storage_field_type() == 
FieldType::OLAP_FIELD_TYPE_INT);
+
+    // key1 is in the subcolumns, return string type;
+    subcolumn_in_sparse.set_name("v1.key1");
+    subcolumn_in_sparse.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    subcolumn_in_sparse.set_path_info(PathInData("v1.key1"));
+    data_type = segment->get_data_type_of(subcolumn_in_sparse, true);
+    EXPECT_TRUE(data_type != nullptr);
+    EXPECT_TRUE(data_type->get_storage_field_type() == 
FieldType::OLAP_FIELD_TYPE_STRING);
+
+    
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
 }
 
 TabletSchemaSPtr create_compaction_schema_common(StorageEngine* _engine_ref,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to