github-actions[bot] commented on code in PR #24788:
URL: https://github.com/apache/doris/pull/24788#discussion_r1334075068


##########
be/src/olap/memtable.cpp:
##########
@@ -287,6 +287,56 @@
     return same_keys_num;
 }
 
+void MemTable::_sort_by_cluster_keys() {
+    SCOPED_RAW_TIMER(&_stat.sort_ns);
+    _stat.sort_times++;
+    // sort all rows
+    vectorized::Block in_block = _output_mutable_block.to_block();
+    auto cloneBlock = in_block.clone_without_columns();
+    _output_mutable_block = 
vectorized::MutableBlock::build_mutable_block(&cloneBlock);
+    vectorized::MutableBlock mutable_block =
+            vectorized::MutableBlock::build_mutable_block(&in_block);
+
+    std::vector<RowInBlock*> row_in_blocks;
+    std::unique_ptr<int, std::function<void(int*)>> 
row_in_blocks_deleter((int*)0x01, [&](int*) {
+        std::for_each(row_in_blocks.begin(), row_in_blocks.end(),
+                      std::default_delete<RowInBlock>());
+    });
+    row_in_blocks.reserve(mutable_block.rows());
+    for (size_t i = 0; i < mutable_block.rows(); i++) {
+        row_in_blocks.emplace_back(new RowInBlock {i});
+    }
+    Tie tie = Tie(0, mutable_block.rows());
+
+    for (auto i : _tablet_schema->cluster_key_idxes()) {
+        auto cmp = [&](const RowInBlock* lhs, const RowInBlock* rhs) -> int {
+            return mutable_block.compare_one_column(lhs->_row_pos, 
rhs->_row_pos, i, -1);
+        };
+        _sort_one_column(row_in_blocks, tie, cmp);
+    }
+
+    // sort extra round by _row_pos to make the sort stable
+    auto iter = tie.iter();
+    while (iter.next()) {
+        pdqsort(std::next(row_in_blocks.begin(), iter.left()),
+                std::next(row_in_blocks.begin(), iter.right()),
+                [](const RowInBlock* lhs, const RowInBlock* rhs) -> bool {
+                    return lhs->_row_pos < rhs->_row_pos;
+                });
+    }
+
+    in_block = mutable_block.to_block();
+    SCOPED_RAW_TIMER(&_stat.put_into_output_ns);
+    std::vector<int> row_pos_vec;

Review Comment:
   warning: variable 'row_pos_vec' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
       std::vector<int> row_pos_vec = 0;
   ```
   



##########
be/src/olap/memtable.cpp:
##########
@@ -287,6 +287,56 @@ size_t MemTable::_sort() {
     return same_keys_num;
 }
 
+void MemTable::_sort_by_cluster_keys() {
+    SCOPED_RAW_TIMER(&_stat.sort_ns);
+    _stat.sort_times++;
+    // sort all rows
+    vectorized::Block in_block = _output_mutable_block.to_block();
+    auto cloneBlock = in_block.clone_without_columns();
+    _output_mutable_block = 
vectorized::MutableBlock::build_mutable_block(&cloneBlock);
+    vectorized::MutableBlock mutable_block =
+            vectorized::MutableBlock::build_mutable_block(&in_block);
+
+    std::vector<RowInBlock*> row_in_blocks;

Review Comment:
   warning: variable 'row_in_blocks' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
       std::vector<RowInBlock*> row_in_blocks = 0;
   ```
   



##########
be/src/olap/rowset/segment_v2/segment.cpp:
##########
@@ -350,13 +350,18 @@ Status Segment::new_inverted_index_iterator(const 
TabletColumn& tablet_column,
 Status Segment::lookup_row_key(const Slice& key, bool with_seq_col, 
RowLocation* row_location) {
     RETURN_IF_ERROR(load_pk_index_and_bf());
     bool has_seq_col = _tablet_schema->has_sequence_col();
+    bool has_rowid = !_tablet_schema->cluster_key_idxes().empty();

Review Comment:
   warning: variable 'has_rowid' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
       bool has_rowid = false = !_tablet_schema->cluster_key_idxes().empty();
   ```
   



##########
be/src/olap/rowset/segment_v2/segment_writer.cpp:
##########
@@ -709,22 +738,100 @@
                                                     
converted_result.second->get_data(), num_rows));
     }
     if (_has_key) {
-        if (_tablet_schema->keys_type() == UNIQUE_KEYS && 
_opts.enable_unique_key_merge_on_write) {
+        bool need_primary_key_indexes = (_tablet_schema->keys_type() == 
UNIQUE_KEYS &&
+                                         
_opts.enable_unique_key_merge_on_write);
+        bool need_short_key_indexes =
+                !need_primary_key_indexes ||
+                (need_primary_key_indexes && 
_tablet_schema->cluster_key_idxes().size() > 0);
+        if (need_primary_key_indexes) {
             // create primary indexes
-            std::string last_key;
-            for (size_t pos = 0; pos < num_rows; pos++) {
-                std::string key = _full_encode_keys(key_columns, pos);
-                if (_tablet_schema->has_sequence_col()) {
-                    _encode_seq_column(seq_column, pos, &key);
+            if (!need_short_key_indexes) {
+                std::string last_key;
+                for (size_t pos = 0; pos < num_rows; pos++) {
+                    std::string key = _full_encode_keys(key_columns, pos);
+                    if (_tablet_schema->has_sequence_col()) {
+                        _encode_seq_column(seq_column, pos, &key);
+                    }
+                    DCHECK(key.compare(last_key) > 0)
+                            << "found duplicate key or key is not sorted! 
current key: " << key
+                            << ", last key" << last_key;
+                    RETURN_IF_ERROR(_primary_key_index_builder->add_item(key));
+                    _maybe_invalid_row_cache(key);
+                    last_key = std::move(key);
+                }
+            } else {
+                std::vector<vectorized::IOlapColumnDataAccessor*> 
primary_key_columns;
+                primary_key_columns.swap(key_columns);
+                key_columns.clear();
+                for (const auto& cid : _tablet_schema->cluster_key_idxes()) {
+                    for (size_t id = 0; id < _column_writers.size(); ++id) {
+                        // olap data convertor alway start from id = 0
+                        auto converted_result = 
_olap_data_convertor->convert_column_data(id);
+                        if (cid == _column_ids[id]) {
+                            key_columns.push_back(converted_result.second);
+                            break;
+                        }
+                    }
+                }
+                std::vector<std::string> primary_keys;
+                // keep primary keys in memory
+                for (uint32_t pos = 0; pos < num_rows; pos++) {
+                    std::string key =
+                            _full_encode_keys(_primary_key_coders, 
primary_key_columns, pos);
+                    Slice slice(key);
+                    if (_tablet_schema->has_sequence_col()) {
+                        _encode_seq_column(seq_column, pos, &key);
+                    }
+                    _encode_rowid(pos, &key);
+                    primary_keys.emplace_back(std::move(key));
+                }
+                // sort primary keys
+                std::sort(primary_keys.begin(), primary_keys.end());
+                // write primary keys
+                std::string last_key;

Review Comment:
   warning: variable 'last_key' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
                   std::string last_key = 0;
   ```
   



##########
be/test/olap/primary_key_index_test.cpp:
##########
@@ -57,7 +57,7 @@ TEST_F(PrimaryKeyIndexTest, builder) {
     auto fs = io::global_local_filesystem();
     EXPECT_TRUE(fs->create_file(filename, &file_writer).ok());
 
-    PrimaryKeyIndexBuilder builder(file_writer.get(), 0);
+    PrimaryKeyIndexBuilder builder(file_writer.get(), 0, 0);

Review Comment:
   warning: variable 'builder' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
       PrimaryKeyIndexBuilder builder = 0(file_writer.get(), 0, 0);
   ```
   



##########
be/src/olap/rowset/segment_v2/segment_writer.cpp:
##########
@@ -709,22 +738,100 @@
                                                     
converted_result.second->get_data(), num_rows));
     }
     if (_has_key) {
-        if (_tablet_schema->keys_type() == UNIQUE_KEYS && 
_opts.enable_unique_key_merge_on_write) {
+        bool need_primary_key_indexes = (_tablet_schema->keys_type() == 
UNIQUE_KEYS &&
+                                         
_opts.enable_unique_key_merge_on_write);
+        bool need_short_key_indexes =
+                !need_primary_key_indexes ||
+                (need_primary_key_indexes && 
_tablet_schema->cluster_key_idxes().size() > 0);
+        if (need_primary_key_indexes) {
             // create primary indexes
-            std::string last_key;
-            for (size_t pos = 0; pos < num_rows; pos++) {
-                std::string key = _full_encode_keys(key_columns, pos);
-                if (_tablet_schema->has_sequence_col()) {
-                    _encode_seq_column(seq_column, pos, &key);
+            if (!need_short_key_indexes) {
+                std::string last_key;

Review Comment:
   warning: variable 'last_key' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
                   std::string last_key = 0;
   ```
   



##########
be/src/olap/rowset/segment_v2/segment_writer.cpp:
##########
@@ -709,22 +738,100 @@
                                                     
converted_result.second->get_data(), num_rows));
     }
     if (_has_key) {
-        if (_tablet_schema->keys_type() == UNIQUE_KEYS && 
_opts.enable_unique_key_merge_on_write) {
+        bool need_primary_key_indexes = (_tablet_schema->keys_type() == 
UNIQUE_KEYS &&
+                                         
_opts.enable_unique_key_merge_on_write);
+        bool need_short_key_indexes =

Review Comment:
   warning: variable 'need_short_key_indexes' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
           bool need_short_key_indexes = false =
   ```
   



##########
be/src/olap/tablet_schema.h:
##########
@@ -248,6 +248,7 @@ class TabletSchema {
     std::vector<TabletColumn>& mutable_columns();
     size_t num_columns() const { return _num_columns; }
     size_t num_key_columns() const { return _num_key_columns; }
+    std::vector<uint32_t> cluster_key_idxes() const { return 
_cluster_key_idxes; }

Review Comment:
   warning: function 'cluster_key_idxes' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] std::vector<uint32_t> cluster_key_idxes() const { return 
_cluster_key_idxes; }
   ```
   



##########
be/src/olap/rowset/segment_v2/segment_writer.cpp:
##########
@@ -709,22 +738,100 @@ Status SegmentWriter::append_block(const 
vectorized::Block* block, size_t row_po
                                                     
converted_result.second->get_data(), num_rows));
     }
     if (_has_key) {
-        if (_tablet_schema->keys_type() == UNIQUE_KEYS && 
_opts.enable_unique_key_merge_on_write) {
+        bool need_primary_key_indexes = (_tablet_schema->keys_type() == 
UNIQUE_KEYS &&

Review Comment:
   warning: variable 'need_primary_key_indexes' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
           bool need_primary_key_indexes = false = (_tablet_schema->keys_type() 
== UNIQUE_KEYS &&
   ```
   



##########
be/src/olap/rowset/segment_v2/segment_writer.cpp:
##########
@@ -709,22 +738,100 @@
                                                     
converted_result.second->get_data(), num_rows));
     }
     if (_has_key) {
-        if (_tablet_schema->keys_type() == UNIQUE_KEYS && 
_opts.enable_unique_key_merge_on_write) {
+        bool need_primary_key_indexes = (_tablet_schema->keys_type() == 
UNIQUE_KEYS &&
+                                         
_opts.enable_unique_key_merge_on_write);
+        bool need_short_key_indexes =
+                !need_primary_key_indexes ||
+                (need_primary_key_indexes && 
_tablet_schema->cluster_key_idxes().size() > 0);
+        if (need_primary_key_indexes) {
             // create primary indexes
-            std::string last_key;
-            for (size_t pos = 0; pos < num_rows; pos++) {
-                std::string key = _full_encode_keys(key_columns, pos);
-                if (_tablet_schema->has_sequence_col()) {
-                    _encode_seq_column(seq_column, pos, &key);
+            if (!need_short_key_indexes) {
+                std::string last_key;
+                for (size_t pos = 0; pos < num_rows; pos++) {
+                    std::string key = _full_encode_keys(key_columns, pos);

Review Comment:
   warning: variable 'key' is not initialized [cppcoreguidelines-init-variables]
   
   ```suggestion
                       std::string key = 0 = _full_encode_keys(key_columns, 
pos);
   ```
   



##########
be/src/olap/rowset/segment_v2/segment_writer.cpp:
##########
@@ -709,22 +738,100 @@
                                                     
converted_result.second->get_data(), num_rows));
     }
     if (_has_key) {
-        if (_tablet_schema->keys_type() == UNIQUE_KEYS && 
_opts.enable_unique_key_merge_on_write) {
+        bool need_primary_key_indexes = (_tablet_schema->keys_type() == 
UNIQUE_KEYS &&
+                                         
_opts.enable_unique_key_merge_on_write);
+        bool need_short_key_indexes =
+                !need_primary_key_indexes ||
+                (need_primary_key_indexes && 
_tablet_schema->cluster_key_idxes().size() > 0);
+        if (need_primary_key_indexes) {
             // create primary indexes
-            std::string last_key;
-            for (size_t pos = 0; pos < num_rows; pos++) {
-                std::string key = _full_encode_keys(key_columns, pos);
-                if (_tablet_schema->has_sequence_col()) {
-                    _encode_seq_column(seq_column, pos, &key);
+            if (!need_short_key_indexes) {
+                std::string last_key;
+                for (size_t pos = 0; pos < num_rows; pos++) {
+                    std::string key = _full_encode_keys(key_columns, pos);
+                    if (_tablet_schema->has_sequence_col()) {
+                        _encode_seq_column(seq_column, pos, &key);
+                    }
+                    DCHECK(key.compare(last_key) > 0)
+                            << "found duplicate key or key is not sorted! 
current key: " << key
+                            << ", last key" << last_key;
+                    RETURN_IF_ERROR(_primary_key_index_builder->add_item(key));
+                    _maybe_invalid_row_cache(key);
+                    last_key = std::move(key);
+                }
+            } else {
+                std::vector<vectorized::IOlapColumnDataAccessor*> 
primary_key_columns;
+                primary_key_columns.swap(key_columns);
+                key_columns.clear();
+                for (const auto& cid : _tablet_schema->cluster_key_idxes()) {
+                    for (size_t id = 0; id < _column_writers.size(); ++id) {
+                        // olap data convertor alway start from id = 0
+                        auto converted_result = 
_olap_data_convertor->convert_column_data(id);
+                        if (cid == _column_ids[id]) {
+                            key_columns.push_back(converted_result.second);
+                            break;
+                        }
+                    }
+                }
+                std::vector<std::string> primary_keys;
+                // keep primary keys in memory
+                for (uint32_t pos = 0; pos < num_rows; pos++) {
+                    std::string key =

Review Comment:
   warning: variable 'key' is not initialized [cppcoreguidelines-init-variables]
   
   ```suggestion
                       std::string key = 0 =
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to