This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d7f9fa57f2a [refactor](be)simplify interface in schema and rowcursor 
(#63925)
d7f9fa57f2a is described below

commit d7f9fa57f2a51867117a6e0987db6546a0029f37
Author: yiguolei <[email protected]>
AuthorDate: Mon Jun 1 15:08:22 2026 +0800

    [refactor](be)simplify interface in schema and rowcursor (#63925)
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/storage/row_cursor.cpp                      | 34 +----------
 be/src/storage/row_cursor.h                        |  9 +--
 be/src/storage/schema.cpp                          |  2 -
 be/src/storage/schema.h                            | 57 ------------------
 be/src/storage/segment/segment_iterator.cpp        |  4 +-
 be/src/storage/tablet/tablet_reader.cpp            | 10 +---
 be/test/exec/scan/vgeneric_iterators_test.cpp      | 21 ++++---
 .../delta_writer/delta_writer_cluster_key_test.cpp | 15 ++++-
 be/test/load/delta_writer/delta_writer_test.cpp    | 19 +++++-
 .../compaction/ordered_data_compaction_test.cpp    | 10 +++-
 .../compaction/vertical_compaction_test.cpp        | 10 +++-
 .../delete/delete_bitmap_calculator_test.cpp       |  3 +-
 be/test/storage/row_cursor_test.cpp                | 69 ++++++++++++----------
 be/test/storage/segment/segment_cache_test.cpp     | 15 ++++-
 .../storage/segment/segment_corruption_test.cpp    |  6 +-
 .../segment_iterator_apply_index_expr_test.cpp     |  6 +-
 .../segment/segment_iterator_limit_opt_test.cpp    |  6 +-
 .../segment_iterator_no_need_read_data_test.cpp    |  6 +-
 18 files changed, 145 insertions(+), 157 deletions(-)

diff --git a/be/src/storage/row_cursor.cpp b/be/src/storage/row_cursor.cpp
index 9e09593991f..7194718afc6 100644
--- a/be/src/storage/row_cursor.cpp
+++ b/be/src/storage/row_cursor.cpp
@@ -48,23 +48,6 @@ void RowCursor::_init_schema(TabletSchemaSPtr schema, 
uint32_t column_count) {
     _schema.reset(new Schema(schema->columns(), columns));
 }
 
-void RowCursor::_init_schema(const std::shared_ptr<Schema>& shared_schema, 
uint32_t column_count) {
-    _schema.reset(new Schema(*shared_schema));
-}
-
-Status RowCursor::init(TabletSchemaSPtr schema, size_t num_columns) {
-    if (num_columns > schema->num_columns()) {
-        return Status::Error<INVALID_ARGUMENT>(
-                "Input param are invalid. Column count is bigger than 
num_columns of schema. "
-                "column_count={}, schema.num_columns={}",
-                num_columns, schema->num_columns());
-    }
-    _init_schema(schema, cast_set<uint32_t>(num_columns));
-    // Initialize all fields as null (TYPE_NULL).
-    _fields.resize(num_columns);
-    return Status::OK();
-}
-
 Status RowCursor::init(TabletSchemaSPtr schema, const OlapTuple& tuple) {
     size_t key_size = tuple.size();
     if (key_size > schema->num_columns()) {
@@ -74,20 +57,7 @@ Status RowCursor::init(TabletSchemaSPtr schema, const 
OlapTuple& tuple) {
                 key_size, schema->num_columns());
     }
     _init_schema(schema, cast_set<uint32_t>(key_size));
-    return from_tuple(tuple);
-}
-
-Status RowCursor::init(TabletSchemaSPtr schema, const OlapTuple& tuple,
-                       const std::shared_ptr<Schema>& shared_schema) {
-    size_t key_size = tuple.size();
-    if (key_size > schema->num_columns()) {
-        return Status::Error<INVALID_ARGUMENT>(
-                "Input param are invalid. Column count is bigger than 
num_columns of schema. "
-                "column_count={}, schema.num_columns={}",
-                key_size, schema->num_columns());
-    }
-    _init_schema(shared_schema, cast_set<uint32_t>(key_size));
-    return from_tuple(tuple);
+    return _from_tuple(tuple);
 }
 
 Status RowCursor::init_scan_key(TabletSchemaSPtr schema, std::vector<Field> 
fields) {
@@ -103,7 +73,7 @@ Status RowCursor::init_scan_key(TabletSchemaSPtr schema, 
std::vector<Field> fiel
     return Status::OK();
 }
 
-Status RowCursor::from_tuple(const OlapTuple& tuple) {
+Status RowCursor::_from_tuple(const OlapTuple& tuple) {
     if (tuple.size() != _schema->num_column_ids()) {
         return Status::Error<INVALID_ARGUMENT>(
                 "column count does not match. tuple_size={}, field_count={}", 
tuple.size(),
diff --git a/be/src/storage/row_cursor.h b/be/src/storage/row_cursor.h
index 19850f9604b..f0f484d3a70 100644
--- a/be/src/storage/row_cursor.h
+++ b/be/src/storage/row_cursor.h
@@ -49,12 +49,6 @@ public:
     // Initialize from OlapTuple (which now stores Fields).
     // Sets up the schema and copies Fields from the tuple.
     Status init(TabletSchemaSPtr schema, const OlapTuple& tuple);
-    Status init(TabletSchemaSPtr schema, const OlapTuple& tuple,
-                const std::shared_ptr<Schema>& shared_schema);
-
-    // Initialize with schema and num_columns, creating null Fields.
-    // Caller sets individual fields via mutable_field().
-    Status init(TabletSchemaSPtr schema, size_t num_columns);
 
     // Initialize from typed Fields directly.
     Status init_scan_key(TabletSchemaSPtr schema, std::vector<Field> fields);
@@ -94,10 +88,9 @@ public:
 
 private:
     // Copy Fields from an OlapTuple into this cursor.
-    Status from_tuple(const OlapTuple& tuple);
+    Status _from_tuple(const OlapTuple& tuple);
 
     void _init_schema(TabletSchemaSPtr schema, uint32_t column_count);
-    void _init_schema(const std::shared_ptr<Schema>& shared_schema, uint32_t 
column_count);
 
     // Helper: encode a single non-null field for the given column.
     // Converts the core::Field to storage format and calls KeyCoder.
diff --git a/be/src/storage/schema.cpp b/be/src/storage/schema.cpp
index 12904338fde..99088f93878 100644
--- a/be/src/storage/schema.cpp
+++ b/be/src/storage/schema.cpp
@@ -53,7 +53,6 @@ Schema& Schema::operator=(const Schema& other) {
 
 void Schema::_copy_from(const Schema& other) {
     _col_ids = other._col_ids;
-    _unique_ids = other._unique_ids;
     _num_key_columns = other._num_key_columns;
     _delete_sign_idx = other._delete_sign_idx;
     _has_sequence_col = other._has_sequence_col;
@@ -61,7 +60,6 @@ void Schema::_copy_from(const Schema& other) {
     _version_col_idx = other._version_col_idx;
     _lsn_col_idx = other._lsn_col_idx;
     _tso_col_idx = other._tso_col_idx;
-    _mem_size = other._mem_size;
 
     _cols.resize(other._cols.size());
     for (auto cid : _col_ids) {
diff --git a/be/src/storage/schema.h b/be/src/storage/schema.h
index 88504400674..fd442cc16cc 100644
--- a/be/src/storage/schema.h
+++ b/be/src/storage/schema.h
@@ -49,51 +49,9 @@ class Schema;
 using SchemaSPtr = std::shared_ptr<const Schema>;
 class Schema {
 public:
-    Schema(TabletSchemaSPtr tablet_schema) {
-        size_t num_columns = tablet_schema->num_columns();
-        // ignore this column
-        if (tablet_schema->columns().back()->name() == 
BeConsts::ROW_STORE_COL) {
-            --num_columns;
-        }
-        std::vector<ColumnId> col_ids(num_columns);
-        _unique_ids.resize(num_columns);
-        std::vector<TabletColumnPtr> columns;
-        columns.reserve(num_columns);
-
-        size_t num_key_columns = 0;
-        for (uint32_t cid = 0; cid < num_columns; ++cid) {
-            col_ids[cid] = cid;
-            const TabletColumn& column = tablet_schema->column(cid);
-            _unique_ids[cid] = column.unique_id();
-            if (column.is_key()) {
-                ++num_key_columns;
-            }
-            if (column.name() == BeConsts::ROWID_COL ||
-                column.name().starts_with(BeConsts::GLOBAL_ROWID_COL)) {
-                _rowid_col_idx = cid;
-            }
-            if (column.name() == VERSION_COL) {
-                _version_col_idx = cid;
-            }
-            if (column.name() == std::string(kRowBinlogLsnColName)) {
-                _lsn_col_idx = cid;
-            }
-            if (column.name() == std::string(kRowBinlogTimestampColName)) {
-                _tso_col_idx = cid;
-            }
-            columns.push_back(std::make_shared<TabletColumn>(column));
-        }
-        _delete_sign_idx = tablet_schema->delete_sign_idx();
-        if (tablet_schema->has_sequence_col() || tablet_schema->has_seq_map()) 
{
-            _has_sequence_col = true;
-        }
-        _init(columns, col_ids, num_key_columns);
-    }
-
     // All the columns of one table may exist in the columns param, but 
col_ids is only a subset.
     Schema(const std::vector<TabletColumnPtr>& columns, const 
std::vector<ColumnId>& col_ids) {
         size_t num_key_columns = 0;
-        _unique_ids.resize(columns.size());
         for (int i = 0; i < columns.size(); ++i) {
             if (columns[i]->is_key()) {
                 ++num_key_columns;
@@ -114,23 +72,10 @@ public:
             if (columns[i]->name() == std::string(kRowBinlogTimestampColName)) 
{
                 _tso_col_idx = i;
             }
-            _unique_ids[i] = columns[i]->unique_id();
         }
         _init(columns, col_ids, num_key_columns);
     }
 
-    // Only for UT
-    Schema(const std::vector<TabletColumnPtr>& columns, size_t 
num_key_columns) {
-        std::vector<ColumnId> col_ids(columns.size());
-        _unique_ids.resize(columns.size());
-        for (uint32_t cid = 0; cid < columns.size(); ++cid) {
-            col_ids[cid] = cid;
-            _unique_ids[cid] = columns[cid]->unique_id();
-        }
-
-        _init(columns, col_ids, num_key_columns);
-    }
-
     Schema(const Schema&);
     Schema& operator=(const Schema& other);
 
@@ -151,7 +96,6 @@ public:
     size_t num_column_ids() const { return _col_ids.size(); }
     const std::vector<ColumnId>& column_ids() const { return _col_ids; }
     ColumnId column_id(size_t index) const { return _col_ids[index]; }
-    int32_t unique_id(size_t index) const { return _unique_ids[index]; }
     int32_t delete_sign_idx() const { return _delete_sign_idx; }
     bool has_sequence_col() const { return _has_sequence_col; }
     int32_t rowid_col_idx() const { return _rowid_col_idx; }
@@ -172,7 +116,6 @@ private:
     // NOTE: The ColumnId here represents the sequential index number 
(starting from 0) of
     // a column in current row, not the unique id-identifier of each column
     std::vector<ColumnId> _col_ids;
-    std::vector<int32_t> _unique_ids;
     // NOTE: _cols[cid] can only be accessed when the cid is
     // contained in _col_ids
     std::vector<TabletColumnPtr> _cols;
diff --git a/be/src/storage/segment/segment_iterator.cpp 
b/be/src/storage/segment/segment_iterator.cpp
index 7e9be2653a6..e81b523a94c 100644
--- a/be/src/storage/segment/segment_iterator.cpp
+++ b/be/src/storage/segment/segment_iterator.cpp
@@ -859,7 +859,9 @@ Status SegmentIterator::_prepare_seek(const 
StorageReadOptions::KeyRange& key_ra
         for (const TabletColumn* col : key_columns) {
             cols.emplace_back(std::make_shared<TabletColumn>(*col));
         }
-        _seek_schema = std::make_unique<Schema>(cols, cols.size());
+        std::vector<uint32_t> column_ids(cols.size());
+        std::iota(column_ids.begin(), column_ids.end(), 0);
+        _seek_schema = std::make_unique<Schema>(cols, column_ids);
     }
     // todo(wb) need refactor here, when using pk to search, _seek_block is 
useless
     if (_seek_block.size() == 0) {
diff --git a/be/src/storage/tablet/tablet_reader.cpp 
b/be/src/storage/tablet/tablet_reader.cpp
index ffb98c46981..1cf4435ba1b 100644
--- a/be/src/storage/tablet/tablet_reader.cpp
+++ b/be/src/storage/tablet/tablet_reader.cpp
@@ -348,11 +348,6 @@ Status TabletReader::_init_keys_param(const ReaderParams& 
read_params) {
                 scan_key_size, _tablet_schema->num_columns());
     }
 
-    std::vector<uint32_t> columns(scan_key_size);
-    std::iota(columns.begin(), columns.end(), 0);
-
-    std::shared_ptr<Schema> schema = 
std::make_shared<Schema>(_tablet_schema->columns(), columns);
-
     for (size_t i = 0; i < start_key_size; ++i) {
         if (read_params.start_key[i].size() != scan_key_size) {
             return Status::Error<INVALID_ARGUMENT>(
@@ -360,8 +355,7 @@ Status TabletReader::_init_keys_param(const ReaderParams& 
read_params) {
                     read_params.start_key[i].size(), scan_key_size);
         }
 
-        Status res =
-                _keys_param.start_keys[i].init(_tablet_schema, 
read_params.start_key[i], schema);
+        Status res = _keys_param.start_keys[i].init(_tablet_schema, 
read_params.start_key[i]);
         if (!res.ok()) {
             LOG(WARNING) << "fail to init row cursor. res = " << res;
             return res;
@@ -378,7 +372,7 @@ Status TabletReader::_init_keys_param(const ReaderParams& 
read_params) {
                     read_params.end_key[i].size(), scan_key_size);
         }
 
-        Status res = _keys_param.end_keys[i].init(_tablet_schema, 
read_params.end_key[i], schema);
+        Status res = _keys_param.end_keys[i].init(_tablet_schema, 
read_params.end_key[i]);
         if (!res.ok()) {
             LOG(WARNING) << "fail to init row cursor. res = " << res;
             return res;
diff --git a/be/test/exec/scan/vgeneric_iterators_test.cpp 
b/be/test/exec/scan/vgeneric_iterators_test.cpp
index 34349b8bd36..fae7b164f8c 100644
--- a/be/test/exec/scan/vgeneric_iterators_test.cpp
+++ b/be/test/exec/scan/vgeneric_iterators_test.cpp
@@ -44,19 +44,26 @@ public:
 
 static Schema create_schema() {
     std::vector<TabletColumnPtr> col_schemas;
-    col_schemas.emplace_back(
-            
std::make_shared<TabletColumn>(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE,
-                                           
FieldType::OLAP_FIELD_TYPE_SMALLINT, true));
+    auto c1 = 
std::make_shared<TabletColumn>(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE,
+                                             
FieldType::OLAP_FIELD_TYPE_SMALLINT, true);
+    c1->set_is_key(true);
+    col_schemas.emplace_back(c1);
     // c2: int
-    col_schemas.emplace_back(
-            
std::make_shared<TabletColumn>(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE,
-                                           FieldType::OLAP_FIELD_TYPE_INT, 
true));
+    auto c2 = 
std::make_shared<TabletColumn>(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE,
+                                             FieldType::OLAP_FIELD_TYPE_INT, 
true);
+    c2->set_is_key(true);
+    col_schemas.emplace_back(c2);
     // c3: big int
     col_schemas.emplace_back(
             
std::make_shared<TabletColumn>(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM,
                                            FieldType::OLAP_FIELD_TYPE_BIGINT, 
true));
 
-    Schema schema(col_schemas, 2);
+    std::vector<ColumnId> column_ids(col_schemas.size());
+    for (uint32_t cid = 0; cid < column_ids.size(); ++cid) {
+        column_ids[cid] = cid;
+    }
+
+    Schema schema(col_schemas, column_ids);
     return schema;
 }
 
diff --git a/be/test/load/delta_writer/delta_writer_cluster_key_test.cpp 
b/be/test/load/delta_writer/delta_writer_cluster_key_test.cpp
index 5590ecaa93a..ee360c54d22 100644
--- a/be/test/load/delta_writer/delta_writer_cluster_key_test.cpp
+++ b/be/test/load/delta_writer/delta_writer_cluster_key_test.cpp
@@ -70,6 +70,19 @@ class OlapMeta;
 static const uint32_t MAX_PATH_LEN = 1024;
 static StorageEngine* engine_ref = nullptr;
 
+static std::shared_ptr<Schema> create_full_schema(const TabletSchemaSPtr& 
tablet_schema) {
+    size_t num_columns = tablet_schema->num_columns();
+    if (num_columns > 0 && tablet_schema->columns().back()->name() == 
BeConsts::ROW_STORE_COL) {
+        --num_columns;
+    }
+
+    std::vector<ColumnId> column_ids(num_columns);
+    for (uint32_t cid = 0; cid < num_columns; ++cid) {
+        column_ids[cid] = cid;
+    }
+    return std::make_shared<Schema>(tablet_schema->columns(), column_ids);
+}
+
 static void set_up() {
     char buffer[MAX_PATH_LEN];
     EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr);
@@ -340,7 +353,7 @@ TEST_F(TestDeltaWriterClusterKey, vec_sequence_col) {
     opts.tablet_schema = rowset->tablet_schema();
 
     std::unique_ptr<RowwiseIterator> iter;
-    std::shared_ptr<Schema> schema = 
std::make_shared<Schema>(rowset->tablet_schema());
+    std::shared_ptr<Schema> schema = 
create_full_schema(rowset->tablet_schema());
     auto s = segments[0]->new_iterator(schema, opts, &iter);
     ASSERT_TRUE(s.ok());
     auto read_block = rowset->tablet_schema()->create_block();
diff --git a/be/test/load/delta_writer/delta_writer_test.cpp 
b/be/test/load/delta_writer/delta_writer_test.cpp
index 5d3aebc4e23..22928ab30fe 100644
--- a/be/test/load/delta_writer/delta_writer_test.cpp
+++ b/be/test/load/delta_writer/delta_writer_test.cpp
@@ -71,6 +71,19 @@ class OlapMeta;
 static const uint32_t MAX_PATH_LEN = 1024;
 static StorageEngine* engine_ref = nullptr;
 
+static std::shared_ptr<Schema> create_full_schema(const TabletSchemaSPtr& 
tablet_schema) {
+    size_t num_columns = tablet_schema->num_columns();
+    if (num_columns > 0 && tablet_schema->columns().back()->name() == 
BeConsts::ROW_STORE_COL) {
+        --num_columns;
+    }
+
+    std::vector<ColumnId> column_ids(num_columns);
+    for (uint32_t cid = 0; cid < num_columns; ++cid) {
+        column_ids[cid] = cid;
+    }
+    return std::make_shared<Schema>(tablet_schema->columns(), column_ids);
+}
+
 static void set_up() {
     char buffer[MAX_PATH_LEN];
     EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr);
@@ -824,7 +837,7 @@ TEST_F(TestDeltaWriter, vec_sequence_col) {
     opts.tablet_schema = rowset->tablet_schema();
 
     std::unique_ptr<RowwiseIterator> iter;
-    std::shared_ptr<Schema> schema = 
std::make_shared<Schema>(rowset->tablet_schema());
+    std::shared_ptr<Schema> schema = 
create_full_schema(rowset->tablet_schema());
     auto s = segments[0]->new_iterator(schema, opts, &iter);
     ASSERT_TRUE(s.ok());
     auto read_block = rowset->tablet_schema()->create_block();
@@ -1032,7 +1045,7 @@ TEST_F(TestDeltaWriter, 
vec_sequence_col_concurrent_write) {
         opts.delete_bitmap.emplace(0, 
tablet->tablet_meta()->delete_bitmap().get_agg(
                                               {rowset1->rowset_id(), 0, 
cur_version}));
         std::unique_ptr<RowwiseIterator> iter;
-        std::shared_ptr<Schema> schema = 
std::make_shared<Schema>(rowset1->tablet_schema());
+        std::shared_ptr<Schema> schema = 
create_full_schema(rowset1->tablet_schema());
         std::vector<segment_v2::SegmentSharedPtr> segments;
         
static_cast<void>(((BetaRowset*)rowset1.get())->load_segments(&segments));
         auto s = segments[0]->new_iterator(schema, opts, &iter);
@@ -1060,7 +1073,7 @@ TEST_F(TestDeltaWriter, 
vec_sequence_col_concurrent_write) {
         opts.delete_bitmap.emplace(0, 
tablet->tablet_meta()->delete_bitmap().get_agg(
                                               {rowset2->rowset_id(), 0, 
cur_version}));
         std::unique_ptr<RowwiseIterator> iter;
-        std::shared_ptr<Schema> schema = 
std::make_shared<Schema>(rowset2->tablet_schema());
+        std::shared_ptr<Schema> schema = 
create_full_schema(rowset2->tablet_schema());
         std::vector<segment_v2::SegmentSharedPtr> segments;
         
static_cast<void>(((BetaRowset*)rowset2.get())->load_segments(&segments));
         auto s = segments[0]->new_iterator(schema, opts, &iter);
diff --git a/be/test/storage/compaction/ordered_data_compaction_test.cpp 
b/be/test/storage/compaction/ordered_data_compaction_test.cpp
index 333cad0cdfb..43efedc13c1 100644
--- a/be/test/storage/compaction/ordered_data_compaction_test.cpp
+++ b/be/test/storage/compaction/ordered_data_compaction_test.cpp
@@ -452,7 +452,15 @@ protected:
 
     void block_create(TabletSchemaSPtr tablet_schema, Block* block) {
         block->clear();
-        Schema schema(tablet_schema);
+        size_t num_columns = tablet_schema->num_columns();
+        if (num_columns > 0 && tablet_schema->columns().back()->name() == 
BeConsts::ROW_STORE_COL) {
+            --num_columns;
+        }
+        std::vector<ColumnId> schema_column_ids(num_columns);
+        for (uint32_t cid = 0; cid < num_columns; ++cid) {
+            schema_column_ids[cid] = cid;
+        }
+        Schema schema(tablet_schema->columns(), schema_column_ids);
         const auto& column_ids = schema.column_ids();
         for (size_t i = 0; i < schema.num_column_ids(); ++i) {
             auto column_desc = schema.column(column_ids[i]);
diff --git a/be/test/storage/compaction/vertical_compaction_test.cpp 
b/be/test/storage/compaction/vertical_compaction_test.cpp
index 542c12fd919..66890e97668 100644
--- a/be/test/storage/compaction/vertical_compaction_test.cpp
+++ b/be/test/storage/compaction/vertical_compaction_test.cpp
@@ -368,7 +368,15 @@ protected:
 
     void block_create(TabletSchemaSPtr tablet_schema, Block* block) {
         block->clear();
-        Schema schema(tablet_schema);
+        size_t num_columns = tablet_schema->num_columns();
+        if (num_columns > 0 && tablet_schema->columns().back()->name() == 
BeConsts::ROW_STORE_COL) {
+            --num_columns;
+        }
+        std::vector<ColumnId> schema_column_ids(num_columns);
+        for (uint32_t cid = 0; cid < num_columns; ++cid) {
+            schema_column_ids[cid] = cid;
+        }
+        Schema schema(tablet_schema->columns(), schema_column_ids);
         const auto& column_ids = schema.column_ids();
         for (size_t i = 0; i < schema.num_column_ids(); ++i) {
             auto column_desc = schema.column(column_ids[i]);
diff --git a/be/test/storage/delete/delete_bitmap_calculator_test.cpp 
b/be/test/storage/delete/delete_bitmap_calculator_test.cpp
index 81b16cc8b3d..0d2ab81f1c1 100644
--- a/be/test/storage/delete/delete_bitmap_calculator_test.cpp
+++ b/be/test/storage/delete/delete_bitmap_calculator_test.cpp
@@ -83,7 +83,8 @@ void build_segment(SegmentWriterOptions opts, 
TabletSchemaSPtr build_schema, siz
     EXPECT_TRUE(st.ok());
 
     RowCursor row;
-    auto olap_st = row.init(build_schema, build_schema->num_columns());
+    std::vector<Field> fields(build_schema->num_columns(), 
Field(PrimitiveType::TYPE_NULL));
+    auto olap_st = row.init_scan_key(build_schema, std::move(fields));
     EXPECT_EQ(Status::OK(), olap_st);
 
     for (size_t rid = 0; rid < nrows; ++rid) {
diff --git a/be/test/storage/row_cursor_test.cpp 
b/be/test/storage/row_cursor_test.cpp
index 26b1204a681..31e88ab2b4f 100644
--- a/be/test/storage/row_cursor_test.cpp
+++ b/be/test/storage/row_cursor_test.cpp
@@ -251,6 +251,15 @@ void set_tablet_schema_for_scan_key(TabletSchemaSPtr 
tablet_schema) {
     tablet_schema->init_from_pb(tablet_schema_pb);
 }
 
+void init_row_cursor_with_nulls(RowCursor& row, const TabletSchemaSPtr& 
tablet_schema,
+                                size_t num_columns) {
+    OlapTuple tuple;
+    for (size_t i = 0; i < num_columns; ++i) {
+        tuple.add_null();
+    }
+    ASSERT_TRUE(row.init(tablet_schema, tuple).ok());
+}
+
 class TestRowCursor : public testing::Test {
 public:
     TestRowCursor() { _arena.reset(new Arena()); }
@@ -288,7 +297,7 @@ TEST_F(TestRowCursor, encode_key) {
     // test encoding with padding
     {
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 2));
+        init_row_cursor_with_nulls(row, tablet_schema, 2);
 
         row.mutable_field(0) = Field::create_field<TYPE_INT>(int32_t(12345));
         row.mutable_field(1) = Field::create_field<TYPE_INT>(int32_t(54321));
@@ -348,7 +357,7 @@ TEST_F(TestRowCursor, encode_key_int_float_char) {
     // _encode_field for CHAR pads to col_length=8 with \0
     {
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 3));
+        init_row_cursor_with_nulls(row, tablet_schema, 3);
 
         row.mutable_field(0) = Field::create_field<TYPE_INT>(int32_t(12345));
         row.mutable_field(1) = Field::create_field<TYPE_FLOAT>(3.14f);
@@ -373,7 +382,7 @@ TEST_F(TestRowCursor, encode_key_int_float_char) {
     // Padding: only 2 keys initialized, 3rd padded as KEY_MINIMAL_MARKER(0x00)
     {
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 2));
+        init_row_cursor_with_nulls(row, tablet_schema, 2);
 
         row.mutable_field(0) = Field::create_field<TYPE_INT>(int32_t(12345));
         row.mutable_field(1) = Field::create_field<TYPE_FLOAT>(3.14f);
@@ -395,7 +404,7 @@ TEST_F(TestRowCursor, encode_key_int_float_char) {
     // Null: INT(12345) present, FLOAT null, CHAR('ab') present
     {
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 3));
+        init_row_cursor_with_nulls(row, tablet_schema, 3);
 
         row.mutable_field(0) = Field::create_field<TYPE_INT>(int32_t(12345));
         // field(1) stays null (default from init)
@@ -420,7 +429,7 @@ TEST_F(TestRowCursor, encode_key_int_date_varchar) {
     tablet_schema->_num_short_key_columns = 3;
 
     RowCursor row;
-    static_cast<void>(row.init(tablet_schema, 3));
+    init_row_cursor_with_nulls(row, tablet_schema, 3);
 
     row.mutable_field(0) = Field::create_field<TYPE_INT>(int32_t(12345));
     // DATE(2020-01-01): raw uint24_t = 2020*512 + 1*32 + 1 = 1034273
@@ -454,7 +463,7 @@ TEST_F(TestRowCursor, encode_key_double_nan_decimal_string) 
{
     tablet_schema->_num_short_key_columns = 3;
 
     RowCursor row;
-    static_cast<void>(row.init(tablet_schema, 3));
+    init_row_cursor_with_nulls(row, tablet_schema, 3);
 
     row.mutable_field(0) =
             
Field::create_field<TYPE_DOUBLE>(std::numeric_limits<double>::quiet_NaN());
@@ -491,7 +500,7 @@ TEST_F(TestRowCursor, encode_key_bigint_datev2_char) {
     tablet_schema->_num_short_key_columns = 3;
 
     RowCursor row;
-    static_cast<void>(row.init(tablet_schema, 3));
+    init_row_cursor_with_nulls(row, tablet_schema, 3);
 
     row.mutable_field(0) = 
Field::create_field<TYPE_BIGINT>(int64_t(9999999999LL));
     // DATEV2(2024-12-31): packed = (2024<<9)|(12<<5)|31 = 1036703
@@ -528,7 +537,7 @@ TEST_F(TestRowCursor, 
encode_key_float_inf_datetime_varchar) {
     tablet_schema->_num_short_key_columns = 3;
 
     RowCursor row;
-    static_cast<void>(row.init(tablet_schema, 3));
+    init_row_cursor_with_nulls(row, tablet_schema, 3);
 
     row.mutable_field(0) = 
Field::create_field<TYPE_FLOAT>(std::numeric_limits<float>::infinity());
     // DATETIME(2020-01-01 12:00:00) = 20200101120000 in olap datetime format
@@ -564,7 +573,7 @@ TEST_F(TestRowCursor, encode_key_with_padding_mow) {
     tablet_schema->_num_short_key_columns = 3;
 
     RowCursor row;
-    static_cast<void>(row.init(tablet_schema, 2));
+    init_row_cursor_with_nulls(row, tablet_schema, 2);
 
     row.mutable_field(0) = Field::create_field<TYPE_INT>(int32_t(12345));
     row.mutable_field(1) = Field::create_field<TYPE_INT>(int32_t(54321));
@@ -595,7 +604,7 @@ TEST_F(TestRowCursor, encode_key_negative_values) {
     tablet_schema->_num_short_key_columns = 3;
 
     RowCursor row;
-    static_cast<void>(row.init(tablet_schema, 3));
+    init_row_cursor_with_nulls(row, tablet_schema, 3);
 
     row.mutable_field(0) = Field::create_field<TYPE_INT>(int32_t(-12345));
     row.mutable_field(1) = Field::create_field<TYPE_DOUBLE>(-1.0);
@@ -623,7 +632,7 @@ TEST_F(TestRowCursor, encode_key_float_special_values) {
     // Test -infinity
     {
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 2));
+        init_row_cursor_with_nulls(row, tablet_schema, 2);
         row.mutable_field(0) =
                 
Field::create_field<TYPE_FLOAT>(-std::numeric_limits<float>::infinity());
         row.mutable_field(1) = Field::create_field<TYPE_INT>(int32_t(0));
@@ -636,7 +645,7 @@ TEST_F(TestRowCursor, encode_key_float_special_values) {
     // Test INT boundary values
     {
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 2));
+        init_row_cursor_with_nulls(row, tablet_schema, 2);
         row.mutable_field(0) =
                 
Field::create_field<TYPE_FLOAT>(std::numeric_limits<float>::quiet_NaN());
         row.mutable_field(1) = 
Field::create_field<TYPE_INT>(std::numeric_limits<int32_t>::max());
@@ -655,7 +664,7 @@ TEST_F(TestRowCursor, encode_key_float_special_values) {
         schema2->_num_short_key_columns = 1;
 
         RowCursor row2;
-        static_cast<void>(row2.init(schema2, 1));
+        init_row_cursor_with_nulls(row2, schema2, 1);
         row2.mutable_field(0) = 
Field::create_field<TYPE_INT>(std::numeric_limits<int32_t>::min());
 
         std::string buf;
@@ -675,7 +684,7 @@ TEST_F(TestRowCursor, encode_key_all_null) {
     tablet_schema->_num_short_key_columns = 3;
 
     RowCursor row;
-    static_cast<void>(row.init(tablet_schema, 3));
+    init_row_cursor_with_nulls(row, tablet_schema, 3);
     // All fields stay null (default from init)
 
     {
@@ -696,7 +705,7 @@ TEST_F(TestRowCursor, encode_key_datetime_v1) {
 
     {
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 1));
+        init_row_cursor_with_nulls(row, tablet_schema, 1);
         row.mutable_field(0) =
                 
Field::create_field_from_olap_value<TYPE_DATETIME>(uint64_t(20241231123045ULL));
 
@@ -717,7 +726,7 @@ TEST_F(TestRowCursor, encode_key_datetimev2) {
         tablet_schema->_num_short_key_columns = 1;
 
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 1));
+        init_row_cursor_with_nulls(row, tablet_schema, 1);
         uint64_t packed = (uint64_t(2024) << 46) | (uint64_t(12) << 42) | 
(uint64_t(31) << 37) |
                           (uint64_t(12) << 32) | (uint64_t(30) << 26) | 
(uint64_t(45) << 20) |
                           uint64_t(0);
@@ -737,7 +746,7 @@ TEST_F(TestRowCursor, encode_key_datetimev2) {
         tablet_schema->_num_short_key_columns = 1;
 
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 1));
+        init_row_cursor_with_nulls(row, tablet_schema, 1);
         uint64_t packed = (uint64_t(2024) << 46) | (uint64_t(12) << 42) | 
(uint64_t(31) << 37) |
                           (uint64_t(12) << 32) | (uint64_t(30) << 26) | 
(uint64_t(45) << 20) |
                           uint64_t(123000);
@@ -757,7 +766,7 @@ TEST_F(TestRowCursor, encode_key_datetimev2) {
         tablet_schema->_num_short_key_columns = 1;
 
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 1));
+        init_row_cursor_with_nulls(row, tablet_schema, 1);
         uint64_t packed = (uint64_t(2024) << 46) | (uint64_t(12) << 42) | 
(uint64_t(31) << 37) |
                           (uint64_t(12) << 32) | (uint64_t(30) << 26) | 
(uint64_t(45) << 20) |
                           uint64_t(123456);
@@ -777,7 +786,7 @@ TEST_F(TestRowCursor, encode_key_datetimev2) {
         tablet_schema->_num_short_key_columns = 1;
 
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 1));
+        init_row_cursor_with_nulls(row, tablet_schema, 1);
         // field(0) stays null
 
         std::string buf;
@@ -796,7 +805,7 @@ TEST_F(TestRowCursor, encode_key_timestamptz) {
 
     {
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 1));
+        init_row_cursor_with_nulls(row, tablet_schema, 1);
         // TimestampTzValue wraps DateV2Value<DateTimeV2ValueType> internally.
         // The uint64_t is the packed DateTimeV2 representation.
         row.mutable_field(0) =
@@ -819,7 +828,7 @@ TEST_F(TestRowCursor, encode_key_decimal32) {
     // Positive: 12345
     {
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 1));
+        init_row_cursor_with_nulls(row, tablet_schema, 1);
         row.mutable_field(0) = 
Field::create_field<TYPE_DECIMAL32>(Decimal32(int32_t(12345)));
 
         std::string buf;
@@ -830,7 +839,7 @@ TEST_F(TestRowCursor, encode_key_decimal32) {
     // Negative: -12345
     {
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 1));
+        init_row_cursor_with_nulls(row, tablet_schema, 1);
         row.mutable_field(0) = 
Field::create_field<TYPE_DECIMAL32>(Decimal32(int32_t(-12345)));
 
         std::string buf;
@@ -841,7 +850,7 @@ TEST_F(TestRowCursor, encode_key_decimal32) {
     // Zero
     {
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 1));
+        init_row_cursor_with_nulls(row, tablet_schema, 1);
         row.mutable_field(0) = 
Field::create_field<TYPE_DECIMAL32>(Decimal32(int32_t(0)));
 
         std::string buf;
@@ -861,7 +870,7 @@ TEST_F(TestRowCursor, encode_key_decimal64) {
     // Positive: 9999999999
     {
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 1));
+        init_row_cursor_with_nulls(row, tablet_schema, 1);
         row.mutable_field(0) =
                 
Field::create_field<TYPE_DECIMAL64>(Decimal64(int64_t(9999999999LL)));
 
@@ -873,7 +882,7 @@ TEST_F(TestRowCursor, encode_key_decimal64) {
     // Negative: -123456789
     {
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 1));
+        init_row_cursor_with_nulls(row, tablet_schema, 1);
         row.mutable_field(0) =
                 
Field::create_field<TYPE_DECIMAL64>(Decimal64(int64_t(-123456789LL)));
 
@@ -894,7 +903,7 @@ TEST_F(TestRowCursor, encode_key_decimal128i) {
     // Positive: 123456789012345678
     {
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 1));
+        init_row_cursor_with_nulls(row, tablet_schema, 1);
         int128_t val = static_cast<int128_t>(123456789012345678LL);
         row.mutable_field(0) = 
Field::create_field<TYPE_DECIMAL128I>(Decimal128V3(val));
 
@@ -907,7 +916,7 @@ TEST_F(TestRowCursor, encode_key_decimal128i) {
     // Negative: -123456789012345678
     {
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 1));
+        init_row_cursor_with_nulls(row, tablet_schema, 1);
         int128_t val = static_cast<int128_t>(-123456789012345678LL);
         row.mutable_field(0) = 
Field::create_field<TYPE_DECIMAL128I>(Decimal128V3(val));
 
@@ -929,7 +938,7 @@ TEST_F(TestRowCursor, encode_key_decimal256) {
     // Positive: 123456789012345678901234567890
     {
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 1));
+        init_row_cursor_with_nulls(row, tablet_schema, 1);
         wide::Int256 val({0xC373E0EE4E3F0AD2ULL, 0x000000018EE90FF6ULL, 0ULL, 
0ULL});
         row.mutable_field(0) = 
Field::create_field<TYPE_DECIMAL256>(Decimal256(val));
 
@@ -942,7 +951,7 @@ TEST_F(TestRowCursor, encode_key_decimal256) {
     // Negative: -123456789012345678901234567890
     {
         RowCursor row;
-        static_cast<void>(row.init(tablet_schema, 1));
+        init_row_cursor_with_nulls(row, tablet_schema, 1);
         wide::Int256 val({0x3C8C1F11B1C0F52EULL, 0xFFFFFFFE7116F009ULL, 
0xFFFFFFFFFFFFFFFFULL,
                           0xFFFFFFFFFFFFFFFFULL});
         row.mutable_field(0) = 
Field::create_field<TYPE_DECIMAL256>(Decimal256(val));
@@ -965,7 +974,7 @@ TEST_F(TestRowCursor, 
encode_key_datetimev2_decimal128i_decimal32) {
     tablet_schema->_num_short_key_columns = 3;
 
     RowCursor row;
-    static_cast<void>(row.init(tablet_schema, 3));
+    init_row_cursor_with_nulls(row, tablet_schema, 3);
 
     // DATETIMEV2(3): 2024-12-31 12:30:45.123
     {
diff --git a/be/test/storage/segment/segment_cache_test.cpp 
b/be/test/storage/segment/segment_cache_test.cpp
index a48c50eaa02..0c2dae24f57 100644
--- a/be/test/storage/segment/segment_cache_test.cpp
+++ b/be/test/storage/segment/segment_cache_test.cpp
@@ -69,6 +69,19 @@ class OlapMeta;
 static const uint32_t MAX_PATH_LEN = 1024;
 static StorageEngine* engine_ref = nullptr;
 
+static std::shared_ptr<Schema> create_full_schema(const TabletSchemaSPtr& 
tablet_schema) {
+    size_t num_columns = tablet_schema->num_columns();
+    if (num_columns > 0 && tablet_schema->columns().back()->name() == 
BeConsts::ROW_STORE_COL) {
+        --num_columns;
+    }
+
+    std::vector<ColumnId> column_ids(num_columns);
+    for (uint32_t cid = 0; cid < num_columns; ++cid) {
+        column_ids[cid] = cid;
+    }
+    return std::make_shared<Schema>(tablet_schema->columns(), column_ids);
+}
+
 static void set_up() {
     char buffer[MAX_PATH_LEN];
     EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr);
@@ -351,7 +364,7 @@ TEST_F(SegmentCacheTest, vec_sequence_col) {
     opts.tablet_schema = rowset->tablet_schema();
 
     std::unique_ptr<RowwiseIterator> iter;
-    std::shared_ptr<Schema> schema = 
std::make_shared<Schema>(rowset->tablet_schema());
+    std::shared_ptr<Schema> schema = 
create_full_schema(rowset->tablet_schema());
     auto s = segments[0]->new_iterator(schema, opts, &iter);
     ASSERT_TRUE(s.ok());
     auto read_block = rowset->tablet_schema()->create_block();
diff --git a/be/test/storage/segment/segment_corruption_test.cpp 
b/be/test/storage/segment/segment_corruption_test.cpp
index 0212fd90d62..fee14a9b924 100644
--- a/be/test/storage/segment/segment_corruption_test.cpp
+++ b/be/test/storage/segment/segment_corruption_test.cpp
@@ -225,7 +225,11 @@ public:
 
         // Write rows
         RowCursor row;
-        auto olap_st = row.init(schema, schema->num_columns());
+        OlapTuple tuple;
+        for (size_t i = 0; i < schema->num_columns(); ++i) {
+            tuple.add_null();
+        }
+        auto olap_st = row.init(schema, tuple);
         EXPECT_EQ(Status::OK(), olap_st);
 
         // Write one row: (1, "hello")
diff --git a/be/test/storage/segment/segment_iterator_apply_index_expr_test.cpp 
b/be/test/storage/segment/segment_iterator_apply_index_expr_test.cpp
index b5c723065a3..9d550bb4b8f 100644
--- a/be/test/storage/segment/segment_iterator_apply_index_expr_test.cpp
+++ b/be/test/storage/segment/segment_iterator_apply_index_expr_test.cpp
@@ -121,7 +121,11 @@ protected:
         _tablet_schema = make_tablet_schema();
         _segment = make_stub_segment(100, _tablet_schema);
 
-        _read_schema = std::make_shared<Schema>(_tablet_schema);
+        std::vector<ColumnId> read_column_ids(_tablet_schema->num_columns());
+        for (uint32_t cid = 0; cid < read_column_ids.size(); ++cid) {
+            read_column_ids[cid] = cid;
+        }
+        _read_schema = std::make_shared<Schema>(_tablet_schema->columns(), 
read_column_ids);
         _iter = std::make_unique<SegmentIterator>(_segment, _read_schema);
 
         // Set up RuntimeState with fallback enabled so 
_downgrade_without_index works
diff --git a/be/test/storage/segment/segment_iterator_limit_opt_test.cpp 
b/be/test/storage/segment/segment_iterator_limit_opt_test.cpp
index 4d8d84e6ab8..858947436a8 100644
--- a/be/test/storage/segment/segment_iterator_limit_opt_test.cpp
+++ b/be/test/storage/segment/segment_iterator_limit_opt_test.cpp
@@ -74,7 +74,11 @@ protected:
 
         _tablet_schema = std::make_shared<TabletSchema>();
         _tablet_schema->init_from_pb(schema_pb);
-        _read_schema = std::make_shared<Schema>(_tablet_schema);
+        std::vector<ColumnId> read_column_ids(_tablet_schema->num_columns());
+        for (uint32_t cid = 0; cid < read_column_ids.size(); ++cid) {
+            read_column_ids[cid] = cid;
+        }
+        _read_schema = std::make_shared<Schema>(_tablet_schema->columns(), 
read_column_ids);
     }
 
     // Build a SegmentIterator with minimal opts for _can_opt_limit_reads() 
testing.
diff --git 
a/be/test/storage/segment/segment_iterator_no_need_read_data_test.cpp 
b/be/test/storage/segment/segment_iterator_no_need_read_data_test.cpp
index f5119208e13..0bd73f93c97 100644
--- a/be/test/storage/segment/segment_iterator_no_need_read_data_test.cpp
+++ b/be/test/storage/segment/segment_iterator_no_need_read_data_test.cpp
@@ -44,7 +44,11 @@ TEST(SegmentIteratorNoNeedReadDataTest, 
extracted_variant_count_on_index) {
     const ColumnId subcol_cid = 
tablet_schema->field_index(*subcol.path_info_ptr());
     ASSERT_GE(subcol_cid, 0);
 
-    auto read_schema = std::make_shared<Schema>(tablet_schema);
+    std::vector<ColumnId> read_column_ids(tablet_schema->num_columns());
+    for (uint32_t cid = 0; cid < read_column_ids.size(); ++cid) {
+        read_column_ids[cid] = cid;
+    }
+    auto read_schema = std::make_shared<Schema>(tablet_schema->columns(), 
read_column_ids);
     SegmentIterator iter(nullptr, read_schema);
     iter._opts.tablet_schema = tablet_schema;
     iter._opts.push_down_agg_type_opt = TPushAggOp::COUNT_ON_INDEX;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to