Re: [PR] [feature-wip](merge-on-write) MOW table support different primary keys and sort keys [doris]

via GitHub Fri, 03 Nov 2023 01:12:52 -0700


zhannngchen commented on code in PR #24788:
URL: https://github.com/apache/doris/pull/24788#discussion_r1378535603



##########
be/src/olap/rowset/segment_v2/segment.cpp:
##########
@@ -284,13 +284,17 @@ Status Segment::load_index() {
 
 Status Segment::_load_index_impl() {
     return _load_index_once.call([this] {
+        bool load_short_key_index = _tablet_schema->keys_type() != UNIQUE_KEYS 
||

Review Comment:
   Seems no need to change here?
   MoW table will not load short key index, and `CLUSTER BY` now only works on 
MoW table



##########
be/src/olap/rowset/segment_v2/segment.cpp:
##########
@@ -463,6 +475,25 @@ Status Segment::lookup_row_key(const Slice& key, bool 
with_seq_col, RowLocation*
                     "key with higher sequence id exists");
         }
     }
+    if (!has_seq_col && has_rowid) {
+        Slice sought_key_without_rowid =
+                Slice(sought_key.get_data(), sought_key.get_size() - 
rowid_length);
+        // compare key
+        if (key_without_seq.compare(sought_key_without_rowid) != 0) {
+            return Status::Error<ErrorCode::KEY_NOT_FOUND>("Can't find key in 
the segment");
+        }
+    }
+    if (has_rowid) {

Review Comment:
   Add comment here: 
   ```
   // found the key, use rowid in pk index if necessary.
   ```



##########
be/src/olap/rowset/segment_v2/segment_writer.cpp:
##########
@@ -816,6 +904,33 @@ std::string SegmentWriter::_full_encode_keys(
     return encoded_keys;
 }
 
+std::string SegmentWriter::_full_encode_keys(
+        std::vector<const KeyCoder*>& key_coders,

Review Comment:
   should be a const reference type



##########
be/src/olap/rowset/segment_v2/segment_writer.cpp:
##########
@@ -816,6 +904,33 @@ std::string SegmentWriter::_full_encode_keys(
     return encoded_keys;
 }
 
+std::string SegmentWriter::_full_encode_keys(

Review Comment:
   you can rewrite the previous method in this way:
   ```
   std::string SegmentWriter::_full_encode_keys(
           const std::vector<vectorized::IOlapColumnDataAccessor*>& 
key_columns, size_t pos,
           bool null_first) {
       _full_encode_keys(_key_coders, key_columns, pos, null_first);
   }
   ```



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1212,6 +1213,12 @@ Status 
SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool
 
     // The sequence column needs to be removed from primary key index when 
comparing key
     bool has_seq_col = _segment->_tablet_schema->has_sequence_col();
+    bool has_rowid = !_segment->_tablet_schema->cluster_key_idxes().empty();

Review Comment:
   Is it possiable to add a new page type for IndexedColumnWrter that can write 
key:value into it, using the rowid as value.
   In this case, we don't need to process logics of rowid here.



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -1228,13 +1235,31 @@ Status 
SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool
         Slice sought_key =
                 Slice(index_column->get_data_at(0).data, 
index_column->get_data_at(0).size);
         Slice sought_key_without_seq =
-                Slice(sought_key.get_data(), sought_key.get_size() - 
seq_col_length);
+                Slice(sought_key.get_data(), sought_key.get_size() - 
seq_col_length - rowid_length);
 
         // compare key
         if (Slice(index_key).compare(sought_key_without_seq) == 0) {
             exact_match = true;
         }
     }
+    if (!has_seq_col && has_rowid) {

Review Comment:
   else if (has_rowid)



##########
be/src/olap/rowset/segment_v2/segment.h:
##########
@@ -95,16 +95,17 @@ class Segment : public 
std::enable_shared_from_this<Segment> {
                                        std::unique_ptr<InvertedIndexIterator>* 
iter);
 
     const ShortKeyIndexDecoder* get_short_key_index() const {
-        DCHECK(_load_index_once.has_called() && 
_load_index_once.stored_result().ok());
+        // DCHECK(_load_index_once.has_called() && 
_load_index_once.stored_result().ok());

Review Comment:
   why remove the DCHECK?



##########
be/src/olap/tablet.h:
##########
@@ -423,7 +423,7 @@ class Tablet final : public BaseTablet {
                           const std::vector<RowsetSharedPtr>& 
specified_rowsets,
                           RowLocation* row_location, uint32_t version,
                           std::vector<std::unique_ptr<SegmentCacheHandle>>& 
segment_caches,
-                          RowsetSharedPtr* rowset = nullptr);
+                          RowsetSharedPtr* rowset = nullptr, bool with_rowid = 
true);

Review Comment:
   why default value is true?



##########
be/src/olap/rowset/segment_v2/segment_writer.cpp:
##########
@@ -745,22 +774,81 @@ Status SegmentWriter::append_block(const 
vectorized::Block* block, size_t row_po
                                                     
converted_result.second->get_data(), num_rows));
     }
     if (_has_key) {
-        if (_tablet_schema->keys_type() == UNIQUE_KEYS && 
_opts.enable_unique_key_merge_on_write) {
+        bool need_primary_key_indexes = (_tablet_schema->keys_type() == 
UNIQUE_KEYS &&
+                                         
_opts.enable_unique_key_merge_on_write);
+        bool need_short_key_indexes =
+                !need_primary_key_indexes ||
+                (need_primary_key_indexes && 
_tablet_schema->cluster_key_idxes().size() > 0);
+        if (need_primary_key_indexes) {
             // create primary indexes
-            std::string last_key;
-            for (size_t pos = 0; pos < num_rows; pos++) {
-                std::string key = _full_encode_keys(key_columns, pos);
-                if (_tablet_schema->has_sequence_col()) {
-                    _encode_seq_column(seq_column, pos, &key);
+            if (!need_short_key_indexes) {

Review Comment:
   Maybe you need to add some member function to make the logic here more 
clear. Such as:
   ```
   _append_for_pk()
   _append_for_short_key()
   _append_for_cluster_key()
   ```



##########
be/src/olap/rowset/segment_v2/segment_writer.cpp:
##########
@@ -745,22 +774,81 @@ Status SegmentWriter::append_block(const 
vectorized::Block* block, size_t row_po
                                                     
converted_result.second->get_data(), num_rows));
     }
     if (_has_key) {
-        if (_tablet_schema->keys_type() == UNIQUE_KEYS && 
_opts.enable_unique_key_merge_on_write) {
+        bool need_primary_key_indexes = (_tablet_schema->keys_type() == 
UNIQUE_KEYS &&
+                                         
_opts.enable_unique_key_merge_on_write);

Review Comment:
   Add comments here, for now we don't need to query short key index for 
`CLUSTER BY` feature, but we still write the index for future usage.



##########
be/src/olap/memtable.cpp:
##########
@@ -290,6 +290,56 @@ size_t MemTable::_sort() {
     return same_keys_num;
 }
 
+void MemTable::_sort_by_cluster_keys() {
+    SCOPED_RAW_TIMER(&_stat.sort_ns);
+    _stat.sort_times++;
+    // sort all rows
+    vectorized::Block in_block = _output_mutable_block.to_block();
+    auto cloneBlock = in_block.clone_without_columns();
+    _output_mutable_block = 
vectorized::MutableBlock::build_mutable_block(&cloneBlock);
+    vectorized::MutableBlock mutable_block =
+            vectorized::MutableBlock::build_mutable_block(&in_block);
+
+    std::vector<RowInBlock*> row_in_blocks;
+    std::unique_ptr<int, std::function<void(int*)>> 
row_in_blocks_deleter((int*)0x01, [&](int*) {
+        std::for_each(row_in_blocks.begin(), row_in_blocks.end(),
+                      std::default_delete<RowInBlock>());
+    });
+    row_in_blocks.reserve(mutable_block.rows());
+    for (size_t i = 0; i < mutable_block.rows(); i++) {
+        row_in_blocks.emplace_back(new RowInBlock {i});
+    }
+    Tie tie = Tie(0, mutable_block.rows());
+
+    for (auto i : _tablet_schema->cluster_key_idxes()) {
+        auto cmp = [&](const RowInBlock* lhs, const RowInBlock* rhs) -> int {
+            return mutable_block.compare_one_column(lhs->_row_pos, 
rhs->_row_pos, i, -1);
+        };
+        _sort_one_column(row_in_blocks, tie, cmp);
+    }
+
+    // sort extra round by _row_pos to make the sort stable
+    auto iter = tie.iter();
+    while (iter.next()) {
+        pdqsort(std::next(row_in_blocks.begin(), iter.left()),
+                std::next(row_in_blocks.begin(), iter.right()),
+                [](const RowInBlock* lhs, const RowInBlock* rhs) -> bool {
+                    return lhs->_row_pos < rhs->_row_pos;
+                });
+    }
+
+    in_block = mutable_block.to_block();

Review Comment:
   It's confusing for the usage of `in_block`
   ```
   vectorized::Block in_block = _output_mutable_block.to_block();
   vectorized::MutableBlock mutable_block =
               vectorized::MutableBlock::build_mutable_block(&in_block);
   in_block = mutable_block.to_block();
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [feature-wip](merge-on-write) MOW table support different primary keys and sort keys [doris]

Reply via email to