This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 82579126cf [fix](Dictionary-codec) heap overflow with in-predicate on 
nullable columns (#14319) (#14641)
82579126cf is described below

commit 82579126cff3c225d0a796c17b84b99a8705e377
Author: zhengyu <[email protected]>
AuthorDate: Tue Nov 29 21:22:18 2022 +0800

    [fix](Dictionary-codec) heap overflow with in-predicate on nullable columns 
(#14319) (#14641)
    
    Losing segmentid info will mess up the _segment_id_to_value_in_dict_flags 
map
    in InListPredicate, causing two distinct segments to collide and crash the 
BE
    at last.
    
    Signed-off-by: freemandealer <[email protected]>
    
    Signed-off-by: freemandealer <[email protected]>
---
 be/src/olap/in_list_predicate.h                    | 11 ++++++++--
 be/src/olap/rowset/segment_v2/segment.h            |  2 +-
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  3 ++-
 be/src/vec/columns/column_dictionary.h             | 25 ++++++++++++++++++++++
 be/src/vec/columns/column_nullable.h               |  8 +++++++
 5 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index cac644040f..1a9fac9dcd 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -458,12 +458,19 @@ private:
                 auto* nested_col_ptr = vectorized::check_and_get_column<
                         
vectorized::ColumnDictionary<vectorized::Int32>>(column);
                 auto& data_array = nested_col_ptr->get_data();
-                auto& value_in_dict_flags =
-                        
_segment_id_to_value_in_dict_flags[column->get_rowset_segment_id()];
+                auto segid = column->get_rowset_segment_id();
+                DCHECK((segid.first.hi | segid.first.mi | segid.first.lo) != 
0);
+                auto& value_in_dict_flags = 
_segment_id_to_value_in_dict_flags[segid];
                 if (value_in_dict_flags.empty()) {
                     nested_col_ptr->find_codes(*_values, value_in_dict_flags);
                 }
 
+                CHECK(value_in_dict_flags.size() == 
nested_col_ptr->dict_size())
+                        << 
"value_in_dict_flags.size()!=nested_col_ptr->dict_size(), "
+                        << value_in_dict_flags.size() << " vs " << 
nested_col_ptr->dict_size()
+                        << " rowsetid=" << segid.first << " segmentid=" << 
segid.second
+                        << "dict_info" << nested_col_ptr->dict_debug_string();
+
                 for (uint16_t i = 0; i < size; i++) {
                     uint16_t idx = sel[i];
                     if constexpr (is_nullable) {
diff --git a/be/src/olap/rowset/segment_v2/segment.h 
b/be/src/olap/rowset/segment_v2/segment.h
index dd2457ef51..b0dea5aeb8 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -71,7 +71,7 @@ public:
     Status new_iterator(const Schema& schema, const StorageReadOptions& 
read_options,
                         std::unique_ptr<RowwiseIterator>* iter);
 
-    uint64_t id() const { return _segment_id; }
+    uint32_t id() const { return _segment_id; }
 
     RowsetId rowset_id() const { return _rowset_id; }
 
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index e6eff66685..1caa62f638 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1113,6 +1113,8 @@ Status SegmentIterator::next_batch(vectorized::Block* 
block) {
             if (_is_pred_column[cid]) {
                 _current_return_columns[cid] =
                         
Schema::get_predicate_column_nullable_ptr(*column_desc);
+                _current_return_columns[cid]->set_rowset_segment_id(
+                        {_segment->rowset_id(), _segment->id()});
                 _current_return_columns[cid]->reserve(_opts.block_row_max);
             } else if (i >= block->columns()) {
                 // if i >= block->columns means the column and not the 
pred_column means `column i` is
@@ -1259,7 +1261,6 @@ void 
SegmentIterator::_convert_dict_code_for_predicate_if_necessary_impl(
         ColumnPredicate* predicate) {
     auto& column = _current_return_columns[predicate->column_id()];
     auto* col_ptr = column.get();
-    column->set_rowset_segment_id({_segment->rowset_id(), _segment->id()});
 
     if (PredicateTypeTraits::is_range(predicate->type())) {
         col_ptr->convert_dict_codes_if_necessary();
diff --git a/be/src/vec/columns/column_dictionary.h 
b/be/src/vec/columns/column_dictionary.h
index 681ed20852..97a2b5c98f 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -300,6 +300,10 @@ public:
         return result;
     }
 
+    size_t dict_size() const { return _dict.size(); }
+
+    std::string dict_debug_string() const { return _dict.debug_string(); }
+
     class Dictionary {
     public:
         Dictionary() : _dict_data(new DictContainer()), _total_str_len(0) {};
@@ -436,6 +440,27 @@ public:
 
         size_t avg_str_len() { return empty() ? 0 : _total_str_len / 
_dict_data->size(); }
 
+        size_t size() const {
+            if (!_dict_data) {
+                return 0;
+            }
+            return _dict_data->size();
+        }
+
+        std::string debug_string() const {
+            std::string str = "[";
+            if (_dict_data) {
+                for (size_t i = 0; i < _dict_data->size(); i++) {
+                    if (i) {
+                        str += ',';
+                    }
+                    str += (*_dict_data)[i].to_string();
+                }
+            }
+            str += ']';
+            return str;
+        }
+
     private:
         StringValue _null_value = StringValue();
         StringValue::Comparator _comparator;
diff --git a/be/src/vec/columns/column_nullable.h 
b/be/src/vec/columns/column_nullable.h
index cf5414540f..9e3b480c15 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -314,6 +314,14 @@ public:
     void sort_column(const ColumnSorter* sorter, EqualFlags& flags, 
IColumn::Permutation& perms,
                      EqualRange& range, bool last_column) const override;
 
+    void set_rowset_segment_id(std::pair<RowsetId, uint32_t> 
rowset_segment_id) override {
+        nested_column->set_rowset_segment_id(rowset_segment_id);
+    }
+
+    std::pair<RowsetId, uint32_t> get_rowset_segment_id() const override {
+        return nested_column->get_rowset_segment_id();
+    }
+
 private:
     // the two functions will not update `_need_update_has_null`
     ColumnUInt8& _get_null_map_column() { return 
assert_cast<ColumnUInt8&>(*null_map); }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to