This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new d711d64 [fix](vectorization)Some small fix for SegmentIter
Vectorization (#8267)
d711d64 is described below
commit d711d64dda3b92f3b51154ee585bf0c92b9aed0d
Author: wangbo <[email protected]>
AuthorDate: Tue Mar 8 13:13:17 2022 +0800
[fix](vectorization)Some small fix for SegmentIter Vectorization (#8267)
1. No longer using short-circuit to evaluate date type, because the cost of
read date type is small,
lazy materialization has higher costs.
2. Fix read hll/bitmap/date type error results.
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 5 +----
be/src/vec/columns/column_complex.h | 16 ++++++++++++++++
be/src/vec/columns/column_vector.h | 4 ++--
3 files changed, 19 insertions(+), 6 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 2883e63..6a8febb 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -613,10 +613,7 @@ void SegmentIterator::_vec_init_lazy_materialization() {
_is_pred_column[cid] = true;
pred_column_ids.insert(cid);
- // for date type which can not be executed in a vectorized way,
using short circuit execution
- if (type == OLAP_FIELD_TYPE_VARCHAR || type ==
OLAP_FIELD_TYPE_CHAR ||
- type == OLAP_FIELD_TYPE_DECIMAL || type ==
OLAP_FIELD_TYPE_DATE ||
- predicate->is_in_predicate()) {
+ if (type == OLAP_FIELD_TYPE_VARCHAR || type ==
OLAP_FIELD_TYPE_CHAR || type == OLAP_FIELD_TYPE_STRING ||
predicate->is_in_predicate()) {
short_cir_pred_col_id_set.insert(cid);
_short_cir_eval_predicate.push_back(predicate);
_is_all_column_basic_type = false;
diff --git a/be/src/vec/columns/column_complex.h
b/be/src/vec/columns/column_complex.h
index cd26c7f..df2b582 100644
--- a/be/src/vec/columns/column_complex.h
+++ b/be/src/vec/columns/column_complex.h
@@ -63,6 +63,7 @@ public:
}
void insert_many_binary_data(char* data_array, uint32_t* len_array,
uint32_t* start_offset_array, size_t num) override {
+ resize(num);
if constexpr (std::is_same_v<T, BitmapValue>) {
for (size_t i = 0; i < num; i++) {
uint32_t len = len_array[i];
@@ -76,6 +77,21 @@ public:
*pvalue =
std::move(*reinterpret_cast<BitmapValue*>(data_array + start_offset));
}
}
+ } else if constexpr (std::is_same_v<T, HyperLogLog>) {
+ for (size_t i = 0; i < num; i++) {
+ uint32_t len = len_array[i];
+ uint32_t start_offset = start_offset_array[i];
+ HyperLogLog* pvalue = &get_element(size() - 1);
+ if (len != 0) {
+ HyperLogLog value;
+ value.deserialize(Slice(data_array + start_offset, len));
+ *pvalue = std::move(value);
+ } else {
+ *pvalue =
std::move(*reinterpret_cast<HyperLogLog*>(data_array + start_offset));
+ }
+ }
+ } else {
+ LOG(FATAL) << "Unexpected type in column complex";
}
}
diff --git a/be/src/vec/columns/column_vector.h
b/be/src/vec/columns/column_vector.h
index c6b2311..216e8fd 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -179,7 +179,7 @@ public:
value |= *(unsigned char*)(cur_ptr);
vectorized::VecDateTimeValue date;
date.from_olap_date(value);
- data.push_back_without_reserve(date);
+ this->insert_data(reinterpret_cast<char*>(&date), 0);
}
}
@@ -189,7 +189,7 @@ public:
const char* cur_ptr = data_ptr + value_size * i;
uint64_t value = *reinterpret_cast<const uint64_t*>(cur_ptr);
vectorized::VecDateTimeValue date(value);
- data.push_back_without_reserve(date);
+ this->insert_data(reinterpret_cast<char*>(&date), 0);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]