This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 82579126cf [fix](Dictionary-codec) heap overflow with in-predicate on
nullable columns (#14319) (#14641)
82579126cf is described below
commit 82579126cff3c225d0a796c17b84b99a8705e377
Author: zhengyu <[email protected]>
AuthorDate: Tue Nov 29 21:22:18 2022 +0800
[fix](Dictionary-codec) heap overflow with in-predicate on nullable columns
(#14319) (#14641)
Losing segmentid info will mess up the _segment_id_to_value_in_dict_flags
map
in InListPredicate, causing two distinct segments to collide and crash the
BE
at last.
Signed-off-by: freemandealer <[email protected]>
Signed-off-by: freemandealer <[email protected]>
---
be/src/olap/in_list_predicate.h | 11 ++++++++--
be/src/olap/rowset/segment_v2/segment.h | 2 +-
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 3 ++-
be/src/vec/columns/column_dictionary.h | 25 ++++++++++++++++++++++
be/src/vec/columns/column_nullable.h | 8 +++++++
5 files changed, 45 insertions(+), 4 deletions(-)
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index cac644040f..1a9fac9dcd 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -458,12 +458,19 @@ private:
auto* nested_col_ptr = vectorized::check_and_get_column<
vectorized::ColumnDictionary<vectorized::Int32>>(column);
auto& data_array = nested_col_ptr->get_data();
- auto& value_in_dict_flags =
-
_segment_id_to_value_in_dict_flags[column->get_rowset_segment_id()];
+ auto segid = column->get_rowset_segment_id();
+ DCHECK((segid.first.hi | segid.first.mi | segid.first.lo) !=
0);
+ auto& value_in_dict_flags =
_segment_id_to_value_in_dict_flags[segid];
if (value_in_dict_flags.empty()) {
nested_col_ptr->find_codes(*_values, value_in_dict_flags);
}
+ CHECK(value_in_dict_flags.size() ==
nested_col_ptr->dict_size())
+ <<
"value_in_dict_flags.size()!=nested_col_ptr->dict_size(), "
+ << value_in_dict_flags.size() << " vs " <<
nested_col_ptr->dict_size()
+ << " rowsetid=" << segid.first << " segmentid=" <<
segid.second
+ << "dict_info" << nested_col_ptr->dict_debug_string();
+
for (uint16_t i = 0; i < size; i++) {
uint16_t idx = sel[i];
if constexpr (is_nullable) {
diff --git a/be/src/olap/rowset/segment_v2/segment.h
b/be/src/olap/rowset/segment_v2/segment.h
index dd2457ef51..b0dea5aeb8 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -71,7 +71,7 @@ public:
Status new_iterator(const Schema& schema, const StorageReadOptions&
read_options,
std::unique_ptr<RowwiseIterator>* iter);
- uint64_t id() const { return _segment_id; }
+ uint32_t id() const { return _segment_id; }
RowsetId rowset_id() const { return _rowset_id; }
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index e6eff66685..1caa62f638 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1113,6 +1113,8 @@ Status SegmentIterator::next_batch(vectorized::Block*
block) {
if (_is_pred_column[cid]) {
_current_return_columns[cid] =
Schema::get_predicate_column_nullable_ptr(*column_desc);
+ _current_return_columns[cid]->set_rowset_segment_id(
+ {_segment->rowset_id(), _segment->id()});
_current_return_columns[cid]->reserve(_opts.block_row_max);
} else if (i >= block->columns()) {
// if i >= block->columns means the column and not the
pred_column means `column i` is
@@ -1259,7 +1261,6 @@ void
SegmentIterator::_convert_dict_code_for_predicate_if_necessary_impl(
ColumnPredicate* predicate) {
auto& column = _current_return_columns[predicate->column_id()];
auto* col_ptr = column.get();
- column->set_rowset_segment_id({_segment->rowset_id(), _segment->id()});
if (PredicateTypeTraits::is_range(predicate->type())) {
col_ptr->convert_dict_codes_if_necessary();
diff --git a/be/src/vec/columns/column_dictionary.h
b/be/src/vec/columns/column_dictionary.h
index 681ed20852..97a2b5c98f 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -300,6 +300,10 @@ public:
return result;
}
+ size_t dict_size() const { return _dict.size(); }
+
+ std::string dict_debug_string() const { return _dict.debug_string(); }
+
class Dictionary {
public:
Dictionary() : _dict_data(new DictContainer()), _total_str_len(0) {};
@@ -436,6 +440,27 @@ public:
size_t avg_str_len() { return empty() ? 0 : _total_str_len /
_dict_data->size(); }
+ size_t size() const {
+ if (!_dict_data) {
+ return 0;
+ }
+ return _dict_data->size();
+ }
+
+ std::string debug_string() const {
+ std::string str = "[";
+ if (_dict_data) {
+ for (size_t i = 0; i < _dict_data->size(); i++) {
+ if (i) {
+ str += ',';
+ }
+ str += (*_dict_data)[i].to_string();
+ }
+ }
+ str += ']';
+ return str;
+ }
+
private:
StringValue _null_value = StringValue();
StringValue::Comparator _comparator;
diff --git a/be/src/vec/columns/column_nullable.h
b/be/src/vec/columns/column_nullable.h
index cf5414540f..9e3b480c15 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -314,6 +314,14 @@ public:
void sort_column(const ColumnSorter* sorter, EqualFlags& flags,
IColumn::Permutation& perms,
EqualRange& range, bool last_column) const override;
+ void set_rowset_segment_id(std::pair<RowsetId, uint32_t>
rowset_segment_id) override {
+ nested_column->set_rowset_segment_id(rowset_segment_id);
+ }
+
+ std::pair<RowsetId, uint32_t> get_rowset_segment_id() const override {
+ return nested_column->get_rowset_segment_id();
+ }
+
private:
// the two functions will not update `_need_update_has_null`
ColumnUInt8& _get_null_map_column() { return
assert_cast<ColumnUInt8&>(*null_map); }
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]