This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b473b98d58a [Improvement](join) optimization for
build_side_output_column (#30826)
b473b98d58a is described below
commit b473b98d58acdc7955dffd28f5f43d9615c5f06f
Author: Pxl <[email protected]>
AuthorDate: Mon Feb 19 14:08:09 2024 +0800
[Improvement](join) optimization for build_side_output_column (#30826)
optimization for build_side_output_column
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 20 +++----
be/src/vec/columns/column.h | 7 ---
be/src/vec/columns/column_array.cpp | 31 ----------
be/src/vec/columns/column_array.h | 2 +-
be/src/vec/columns/column_complex.h | 14 -----
be/src/vec/columns/column_const.cpp | 6 --
be/src/vec/columns/column_const.h | 2 +-
be/src/vec/columns/column_decimal.cpp | 12 ----
be/src/vec/columns/column_decimal.h | 2 -
be/src/vec/columns/column_dictionary.h | 4 --
be/src/vec/columns/column_dummy.h | 4 --
be/src/vec/columns/column_fixed_length_object.h | 4 --
be/src/vec/columns/column_map.cpp | 20 -------
be/src/vec/columns/column_map.h | 1 -
be/src/vec/columns/column_nullable.cpp | 15 +++--
be/src/vec/columns/column_nullable.h | 3 +-
be/src/vec/columns/column_object.cpp | 12 ----
be/src/vec/columns/column_object.h | 2 -
be/src/vec/columns/column_string.cpp | 28 ---------
be/src/vec/columns/column_string.h | 2 -
be/src/vec/columns/column_struct.cpp | 9 ---
be/src/vec/columns/column_struct.h | 1 -
be/src/vec/columns/column_vector.cpp | 18 +-----
be/src/vec/columns/column_vector.h | 2 -
be/src/vec/columns/predicate_column.h | 4 --
be/src/vec/exec/join/process_hash_table_probe.h | 3 +
.../vec/exec/join/process_hash_table_probe_impl.h | 70 ++++++++++++++++------
.../array/function_array_with_constant.cpp | 4 +-
be/test/vec/core/column_array_test.cpp | 48 ---------------
29 files changed, 82 insertions(+), 268 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 4c659e92df8..97fbaf5e0e1 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1526,21 +1526,21 @@ Status
SegmentIterator::_vec_init_lazy_materialization() {
std::set<ColumnId> short_cir_pred_col_id_set; // using set for
distinct cid
std::set<ColumnId> vec_pred_col_id_set;
- for (auto predicate : _col_predicates) {
+ for (auto* predicate : _col_predicates) {
auto cid = predicate->column_id();
_is_pred_column[cid] = true;
pred_column_ids.insert(cid);
// check pred using short eval or vec eval
if (_can_evaluated_by_vectorized(predicate)) {
- vec_pred_col_id_set.insert(predicate->column_id());
+ vec_pred_col_id_set.insert(cid);
_pre_eval_block_predicate.push_back(predicate);
} else {
short_cir_pred_col_id_set.insert(cid);
_short_cir_eval_predicate.push_back(predicate);
- if (predicate->is_filter()) {
- _filter_info_id.push_back(predicate);
- }
+ }
+ if (predicate->is_filter()) {
+ _filter_info_id.push_back(predicate);
}
}
@@ -1959,17 +1959,17 @@ uint16_t
SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_
bool ret_flags[original_size];
DCHECK(!_pre_eval_block_predicate.empty());
bool is_first = true;
- for (int i = 0; i < _pre_eval_block_predicate.size(); i++) {
- if (_pre_eval_block_predicate[i]->always_true()) {
+ for (auto& pred : _pre_eval_block_predicate) {
+ if (pred->always_true()) {
continue;
}
- auto column_id = _pre_eval_block_predicate[i]->column_id();
+ auto column_id = pred->column_id();
auto& column = _current_return_columns[column_id];
if (is_first) {
- _pre_eval_block_predicate[i]->evaluate_vec(*column, original_size,
ret_flags);
+ pred->evaluate_vec(*column, original_size, ret_flags);
is_first = false;
} else {
- _pre_eval_block_predicate[i]->evaluate_and_vec(*column,
original_size, ret_flags);
+ pred->evaluate_and_vec(*column, original_size, ret_flags);
}
}
diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index 23f9073eff5..322456a8f77 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -483,13 +483,6 @@ public:
*/
virtual Ptr replicate(const Offsets& offsets) const = 0;
- /** Copies each element according offsets parameter.
- * (i-th element should be copied counts[i] times.)
- * If `begin` and `count_sz` specified, it means elements in range
[`begin`, `begin` + `count_sz`) will be replicated.
- * If `count_sz` is -1, `begin` must be 0.
- */
- virtual void replicate(const uint32_t* indexs, size_t target_size,
IColumn& column) const = 0;
-
/// Appends one field multiple times. Can be optimized in inherited
classes.
virtual void insert_many(const Field& field, size_t length) {
for (size_t i = 0; i < length; ++i) {
diff --git a/be/src/vec/columns/column_array.cpp
b/be/src/vec/columns/column_array.cpp
index 8d1cbdd69ac..6d2914d8053 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -856,37 +856,6 @@ ColumnPtr ColumnArray::replicate(const IColumn::Offsets&
replicate_offsets) cons
return replicate_generic(replicate_offsets);
}
-void ColumnArray::replicate(const uint32_t* indices, size_t target_size,
IColumn& column) const {
- if (target_size == 0) {
- return;
- }
-
- auto& dst_col = assert_cast<ColumnArray&>(column);
- auto& dst_data_col = dst_col.get_data();
- auto& dst_offsets = dst_col.get_offsets();
- dst_offsets.reserve(target_size);
-
- PODArray<uint32> data_indices_to_replicate;
-
- for (size_t i = 0; i < target_size; ++i) {
- const auto index = indices[i];
- const auto start = offset_at(index);
- const auto length = size_at(index);
- dst_offsets.push_back(dst_offsets.back() + length);
- if (UNLIKELY(length == 0)) {
- continue;
- }
-
- data_indices_to_replicate.reserve(data_indices_to_replicate.size() +
length);
- for (size_t j = start; j != start + length; ++j) {
- data_indices_to_replicate.push_back(j);
- }
- }
-
- get_data().replicate(data_indices_to_replicate.data(),
data_indices_to_replicate.size(),
- dst_data_col);
-}
-
template <typename T>
ColumnPtr ColumnArray::replicate_number(const IColumn::Offsets&
replicate_offsets) const {
size_t col_size = size();
diff --git a/be/src/vec/columns/column_array.h
b/be/src/vec/columns/column_array.h
index f2f187a7236..046bc22ac48 100644
--- a/be/src/vec/columns/column_array.h
+++ b/be/src/vec/columns/column_array.h
@@ -174,7 +174,7 @@ public:
size_t byte_size() const override;
size_t allocated_bytes() const override;
ColumnPtr replicate(const IColumn::Offsets& replicate_offsets) const
override;
- void replicate(const uint32_t* counts, size_t target_size, IColumn&
column) const override;
+
ColumnPtr convert_to_full_column_if_const() const override;
/** More efficient methods of manipulation */
diff --git a/be/src/vec/columns/column_complex.h
b/be/src/vec/columns/column_complex.h
index b004d946aae..8e9686cb2a5 100644
--- a/be/src/vec/columns/column_complex.h
+++ b/be/src/vec/columns/column_complex.h
@@ -265,8 +265,6 @@ public:
ColumnPtr replicate(const IColumn::Offsets& replicate_offsets) const
override;
- void replicate(const uint32_t* indexs, size_t target_size, IColumn&
column) const override;
-
[[noreturn]] MutableColumns scatter(IColumn::ColumnIndex num_columns,
const IColumn::Selector& selector)
const override {
LOG(FATAL) << "scatter not implemented";
@@ -403,18 +401,6 @@ ColumnPtr ColumnComplexType<T>::replicate(const
IColumn::Offsets& offsets) const
return res;
}
-template <typename T>
-void ColumnComplexType<T>::replicate(const uint32_t* indexs, size_t
target_size,
- IColumn& column) const {
- auto& res = reinterpret_cast<ColumnComplexType<T>&>(column);
- typename Self::Container& res_data = res.get_data();
- res_data.resize(target_size);
-
- for (size_t i = 0; i < target_size; ++i) {
- res_data[i] = data[indexs[i]];
- }
-}
-
using ColumnBitmap = ColumnComplexType<BitmapValue>;
using ColumnHLL = ColumnComplexType<HyperLogLog>;
using ColumnQuantileState = ColumnComplexType<QuantileState>;
diff --git a/be/src/vec/columns/column_const.cpp
b/be/src/vec/columns/column_const.cpp
index 3fb851b2a9c..f7efec1f72a 100644
--- a/be/src/vec/columns/column_const.cpp
+++ b/be/src/vec/columns/column_const.cpp
@@ -78,12 +78,6 @@ ColumnPtr ColumnConst::replicate(const Offsets& offsets)
const {
return ColumnConst::create(data, replicated_size);
}
-void ColumnConst::replicate(const uint32_t* counts, size_t target_size,
IColumn& column) const {
- if (s == 0) return;
- auto& res = reinterpret_cast<ColumnConst&>(column);
- res.s = target_size;
-}
-
ColumnPtr ColumnConst::permute(const Permutation& perm, size_t limit) const {
if (limit == 0) {
limit = s;
diff --git a/be/src/vec/columns/column_const.h
b/be/src/vec/columns/column_const.h
index c832c02bcbd..5498fbf7c20 100644
--- a/be/src/vec/columns/column_const.h
+++ b/be/src/vec/columns/column_const.h
@@ -192,7 +192,7 @@ public:
size_t filter(const Filter& filter) override;
ColumnPtr replicate(const Offsets& offsets) const override;
- void replicate(const uint32_t* indexs, size_t target_size, IColumn&
column) const override;
+
ColumnPtr permute(const Permutation& perm, size_t limit) const override;
// ColumnPtr index(const IColumn & indexes, size_t limit) const override;
void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
diff --git a/be/src/vec/columns/column_decimal.cpp
b/be/src/vec/columns/column_decimal.cpp
index dd42b3563a8..95b247fc668 100644
--- a/be/src/vec/columns/column_decimal.cpp
+++ b/be/src/vec/columns/column_decimal.cpp
@@ -437,18 +437,6 @@ ColumnPtr ColumnDecimal<T>::replicate(const
IColumn::Offsets& offsets) const {
return res;
}
-template <typename T>
-void ColumnDecimal<T>::replicate(const uint32_t* __restrict indexs, size_t
target_size,
- IColumn& column) const {
- auto& res = reinterpret_cast<ColumnDecimal<T>&>(column);
- typename Self::Container& res_data = res.get_data();
- res_data.resize(target_size);
-
- for (size_t i = 0; i < target_size; ++i) {
- res_data[i] = data[indexs[i]];
- }
-}
-
template <typename T>
void ColumnDecimal<T>::sort_column(const ColumnSorter* sorter, EqualFlags&
flags,
IColumn::Permutation& perms, EqualRange&
range,
diff --git a/be/src/vec/columns/column_decimal.h
b/be/src/vec/columns/column_decimal.h
index 3681f9190ae..49b58ebaa4f 100644
--- a/be/src/vec/columns/column_decimal.h
+++ b/be/src/vec/columns/column_decimal.h
@@ -230,8 +230,6 @@ public:
ColumnPtr replicate(const IColumn::Offsets& offsets) const override;
- void replicate(const uint32_t* indexs, size_t target_size, IColumn&
column) const override;
-
MutableColumns scatter(IColumn::ColumnIndex num_columns,
const IColumn::Selector& selector) const override {
return this->template scatter_impl<Self>(num_columns, selector);
diff --git a/be/src/vec/columns/column_dictionary.h
b/be/src/vec/columns/column_dictionary.h
index 6de729fd2de..518d8faa008 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -303,10 +303,6 @@ public:
return _rowset_segment_id;
}
- void replicate(const uint32_t* indexs, size_t target_size, IColumn&
column) const override {
- LOG(FATAL) << "not support";
- }
-
bool is_dict_sorted() const { return _dict_sorted; }
bool is_dict_empty() const { return _dict.empty(); }
diff --git a/be/src/vec/columns/column_dummy.h
b/be/src/vec/columns/column_dummy.h
index b51cd8faa65..b94464be5ba 100644
--- a/be/src/vec/columns/column_dummy.h
+++ b/be/src/vec/columns/column_dummy.h
@@ -116,10 +116,6 @@ public:
return clone_dummy(offsets.back());
}
- void replicate(const uint32_t* indexs, size_t target_size, IColumn&
column) const override {
- LOG(FATAL) << "Not implemented";
- }
-
MutableColumns scatter(ColumnIndex num_columns, const Selector& selector)
const override {
if (s != selector.size()) {
LOG(FATAL) << "Size of selector doesn't match size of column.";
diff --git a/be/src/vec/columns/column_fixed_length_object.h
b/be/src/vec/columns/column_fixed_length_object.h
index 6eefc789aa0..2e67b00d312 100644
--- a/be/src/vec/columns/column_fixed_length_object.h
+++ b/be/src/vec/columns/column_fixed_length_object.h
@@ -200,10 +200,6 @@ public:
LOG(FATAL) << "get_permutation not supported";
}
- void replicate(const uint32_t* indexs, size_t target_size, IColumn&
column) const override {
- LOG(FATAL) << "not support";
- }
-
ColumnPtr index(const IColumn& indexes, size_t limit) const override {
LOG(FATAL) << "index not supported";
}
diff --git a/be/src/vec/columns/column_map.cpp
b/be/src/vec/columns/column_map.cpp
index 53d23df8ae3..dcfcdb46d70 100644
--- a/be/src/vec/columns/column_map.cpp
+++ b/be/src/vec/columns/column_map.cpp
@@ -458,26 +458,6 @@ ColumnPtr ColumnMap::replicate(const Offsets& offsets)
const {
return res;
}
-void ColumnMap::replicate(const uint32_t* indices, size_t target_size,
IColumn& column) const {
- auto& res = reinterpret_cast<ColumnMap&>(column);
-
- auto keys_array =
- ColumnArray::create(keys_column->assume_mutable(),
offsets_column->assume_mutable());
-
- auto result_array = ColumnArray::create(res.keys_column->assume_mutable(),
-
res.offsets_column->assume_mutable());
- keys_array->replicate(indices, target_size,
result_array->assume_mutable_ref());
-
- result_array = ColumnArray::create(res.values_column->assume_mutable(),
- res.offsets_column->clone_empty());
-
- auto values_array =
- ColumnArray::create(values_column->assume_mutable(),
offsets_column->assume_mutable());
-
- /// FIXME: To reuse the replicate of ColumnArray, the offsets column was
replicated twice
- values_array->replicate(indices, target_size,
result_array->assume_mutable_ref());
-}
-
MutableColumnPtr ColumnMap::get_shrinked_column() {
MutableColumns new_columns(2);
diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h
index fe1ccfb6f82..206660d6a06 100644
--- a/be/src/vec/columns/column_map.h
+++ b/be/src/vec/columns/column_map.h
@@ -116,7 +116,6 @@ public:
size_t filter(const Filter& filter) override;
ColumnPtr permute(const Permutation& perm, size_t limit) const override;
ColumnPtr replicate(const Offsets& offsets) const override;
- void replicate(const uint32_t* indexs, size_t target_size, IColumn&
column) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector& selector)
const override {
return scatter_impl<ColumnMap>(num_columns, selector);
}
diff --git a/be/src/vec/columns/column_nullable.cpp
b/be/src/vec/columns/column_nullable.cpp
index 8b0008d6e2e..0cd671eb110 100644
--- a/be/src/vec/columns/column_nullable.cpp
+++ b/be/src/vec/columns/column_nullable.cpp
@@ -313,6 +313,15 @@ void ColumnNullable::insert_indices_from(const IColumn&
src, const uint32_t* ind
_need_update_has_null = true;
}
+void ColumnNullable::insert_indices_from_not_has_null(const IColumn& src,
+ const uint32_t*
indices_begin,
+ const uint32_t*
indices_end) {
+ const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
+ get_nested_column().insert_indices_from(src_concrete.get_nested_column(),
indices_begin,
+ indices_end);
+ _get_null_map_column().insert_many_defaults(indices_end - indices_begin);
+}
+
void ColumnNullable::insert(const Field& x) {
if (x.is_null()) {
get_nested_column().insert_default();
@@ -508,12 +517,6 @@ ColumnPtr ColumnNullable::replicate(const Offsets&
offsets) const {
return ColumnNullable::create(replicated_data, replicated_null_map);
}
-void ColumnNullable::replicate(const uint32_t* counts, size_t target_size,
IColumn& column) const {
- auto& res = reinterpret_cast<ColumnNullable&>(column);
- get_nested_column().replicate(counts, target_size,
res.get_nested_column());
- get_null_map_column().replicate(counts, target_size,
res.get_null_map_column());
-}
-
template <bool negative>
void ColumnNullable::apply_null_map_impl(const ColumnUInt8& map) {
NullMap& arr1 = get_null_map_data();
diff --git a/be/src/vec/columns/column_nullable.h
b/be/src/vec/columns/column_nullable.h
index 91128fb69a8..eca4c57fceb 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -121,6 +121,8 @@ public:
void insert_range_from(const IColumn& src, size_t start, size_t length)
override;
void insert_indices_from(const IColumn& src, const uint32_t* indices_begin,
const uint32_t* indices_end) override;
+ void insert_indices_from_not_has_null(const IColumn& src, const uint32_t*
indices_begin,
+ const uint32_t* indices_end);
void insert(const Field& x) override;
void insert_from(const IColumn& src, size_t n) override;
@@ -211,7 +213,6 @@ public:
size_t byte_size() const override;
size_t allocated_bytes() const override;
ColumnPtr replicate(const Offsets& replicate_offsets) const override;
- void replicate(const uint32_t* counts, size_t target_size, IColumn&
column) const override;
void update_xxHash_with_value(size_t start, size_t end, uint64_t& hash,
const uint8_t* __restrict null_data) const
override;
void update_crc_with_value(size_t start, size_t end, uint32_t& hash,
diff --git a/be/src/vec/columns/column_object.cpp
b/be/src/vec/columns/column_object.cpp
index 552ad31809a..33ce8fad6aa 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -1267,18 +1267,6 @@ void ColumnObject::strip_outer_array() {
std::swap(subcolumns, new_subcolumns);
}
-void ColumnObject::replicate(const uint32_t* indexs, size_t target_size,
IColumn& column) const {
- if (!is_finalized()) {
- const_cast<ColumnObject*>(this)->finalize();
- }
- auto& var = assert_cast<ColumnObject&>(column);
- for (auto& entry : subcolumns) {
- auto replica = entry->data.get_finalized_column().clone_empty();
- entry->data.get_finalized_column().replicate(indexs, target_size,
*replica);
- var.add_sub_column(entry->path, std::move(replica),
entry->data.get_least_common_type());
- }
-}
-
ColumnPtr ColumnObject::filter(const Filter& filter, ssize_t count) const {
if (!is_finalized()) {
const_cast<ColumnObject*>(this)->finalize();
diff --git a/be/src/vec/columns/column_object.h
b/be/src/vec/columns/column_object.h
index 7e50e34bc9d..c460bb813d7 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -473,8 +473,6 @@ public:
LOG(FATAL) << "should not call the method in column object";
}
- void replicate(const uint32_t* indexs, size_t target_size, IColumn&
column) const override;
-
template <typename Func>
MutableColumnPtr apply_for_subcolumns(Func&& func) const;
diff --git a/be/src/vec/columns/column_string.cpp
b/be/src/vec/columns/column_string.cpp
index 337f5e5663a..5c9b1361ac7 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -472,34 +472,6 @@ ColumnPtr ColumnString::replicate(const Offsets&
replicate_offsets) const {
return res;
}
-void ColumnString::replicate(const uint32_t* indexs, size_t target_size,
IColumn& column) const {
- auto& res = reinterpret_cast<ColumnString&>(column);
-
- Chars& res_chars = res.chars;
- Offsets& res_offsets = res.offsets;
-
- size_t byte_size = 0;
- res_offsets.resize(target_size);
- for (size_t i = 0; i < target_size; ++i) {
- long row_idx = indexs[i];
- auto str_size = offsets[row_idx] - offsets[row_idx - 1];
- res_offsets[i] = res_offsets[i - 1] + str_size;
- byte_size += str_size;
- }
-
- res_chars.resize(byte_size);
- auto* __restrict dest = res.chars.data();
- auto* __restrict src = chars.data();
- for (size_t i = 0; i < target_size; ++i) {
- long row_idx = indexs[i];
- auto str_size = offsets[row_idx] - offsets[row_idx - 1];
- memcpy_small_allow_read_write_overflow15(dest + res_offsets[i - 1],
- src + offsets[row_idx - 1],
str_size);
- }
-
- check_chars_length(res_chars.size(), res_offsets.size());
-}
-
void ColumnString::reserve(size_t n) {
offsets.reserve(n);
chars.reserve(n);
diff --git a/be/src/vec/columns/column_string.h
b/be/src/vec/columns/column_string.h
index e6b27f20054..5c50f5ed2f1 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -520,8 +520,6 @@ public:
ColumnPtr replicate(const Offsets& replicate_offsets) const override;
- void replicate(const uint32_t* indexs, size_t target_size, IColumn&
column) const override;
-
MutableColumns scatter(ColumnIndex num_columns, const Selector& selector)
const override {
return scatter_impl<ColumnString>(num_columns, selector);
}
diff --git a/be/src/vec/columns/column_struct.cpp
b/be/src/vec/columns/column_struct.cpp
index 4e69db0a545..f6ab9c9604f 100644
--- a/be/src/vec/columns/column_struct.cpp
+++ b/be/src/vec/columns/column_struct.cpp
@@ -300,15 +300,6 @@ ColumnPtr ColumnStruct::replicate(const Offsets& offsets)
const {
return ColumnStruct::create(new_columns);
}
-void ColumnStruct::replicate(const uint32_t* indexs, size_t target_size,
IColumn& column) const {
- auto& res = reinterpret_cast<ColumnStruct&>(column);
- res.columns.resize(columns.size());
-
- for (size_t i = 0; i != columns.size(); ++i) {
- columns[i]->replicate(indexs, target_size, *res.columns[i]);
- }
-}
-
MutableColumnPtr ColumnStruct::get_shrinked_column() {
const size_t tuple_size = columns.size();
MutableColumns new_columns(tuple_size);
diff --git a/be/src/vec/columns/column_struct.h
b/be/src/vec/columns/column_struct.h
index 73f40713729..1b1daee4452 100644
--- a/be/src/vec/columns/column_struct.h
+++ b/be/src/vec/columns/column_struct.h
@@ -154,7 +154,6 @@ public:
size_t filter(const Filter& filter) override;
ColumnPtr permute(const Permutation& perm, size_t limit) const override;
ColumnPtr replicate(const Offsets& offsets) const override;
- void replicate(const uint32_t* counts, size_t target_size, IColumn&
column) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector& selector)
const override;
// ColumnPtr index(const IColumn & indexes, size_t limit) const override;
diff --git a/be/src/vec/columns/column_vector.cpp
b/be/src/vec/columns/column_vector.cpp
index 05dd3d2ddeb..71f55af0a79 100644
--- a/be/src/vec/columns/column_vector.cpp
+++ b/be/src/vec/columns/column_vector.cpp
@@ -374,7 +374,7 @@ void ColumnVector<T>::insert_indices_from(const IColumn&
src, const uint32_t* in
auto copy = [](const T* __restrict src, T* __restrict dest, const
uint32_t* __restrict begin,
const uint32_t* __restrict end) {
- for (auto it = begin; it != end; ++it) {
+ for (const auto* it = begin; it != end; ++it) {
*dest = src[*it];
++dest;
}
@@ -541,22 +541,6 @@ ColumnPtr ColumnVector<T>::replicate(const
IColumn::Offsets& offsets) const {
return res;
}
-template <typename T>
-void ColumnVector<T>::replicate(const uint32_t* __restrict indexs, size_t
target_size,
- IColumn& column) const {
- auto& res = reinterpret_cast<ColumnVector<T>&>(column);
- typename Self::Container& res_data = res.get_data();
- DCHECK(res_data.empty());
- res_data.resize(target_size);
- auto* __restrict left = res_data.data();
- auto* __restrict right = data.data();
- auto* __restrict idxs = indexs;
-
- for (size_t i = 0; i < target_size; ++i) {
- left[i] = right[idxs[i]];
- }
-}
-
template <typename T>
ColumnPtr ColumnVector<T>::index(const IColumn& indexes, size_t limit) const {
return select_index_impl(*this, indexes, limit);
diff --git a/be/src/vec/columns/column_vector.h
b/be/src/vec/columns/column_vector.h
index d9aab68697d..dbc7524eaac 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -412,8 +412,6 @@ public:
ColumnPtr replicate(const IColumn::Offsets& offsets) const override;
- void replicate(const uint32_t* indexs, size_t target_size, IColumn&
column) const override;
-
MutableColumns scatter(IColumn::ColumnIndex num_columns,
const IColumn::Selector& selector) const override {
return this->template scatter_impl<Self>(num_columns, selector);
diff --git a/be/src/vec/columns/predicate_column.h
b/be/src/vec/columns/predicate_column.h
index 95a8a84327b..198c7ee9cd4 100644
--- a/be/src/vec/columns/predicate_column.h
+++ b/be/src/vec/columns/predicate_column.h
@@ -373,10 +373,6 @@ public:
__builtin_unreachable();
}
- void replicate(const uint32_t* indexs, size_t target_size, IColumn&
column) const override {
- LOG(FATAL) << "not support";
- }
-
// it's impossible to use ComplexType as key , so we don't have to
implement them
[[noreturn]] StringRef serialize_value_into_arena(size_t n, Arena& arena,
char const*& begin)
const override {
diff --git a/be/src/vec/exec/join/process_hash_table_probe.h
b/be/src/vec/exec/join/process_hash_table_probe.h
index 924974ca915..ee46197617a 100644
--- a/be/src/vec/exec/join/process_hash_table_probe.h
+++ b/be/src/vec/exec/join/process_hash_table_probe.h
@@ -120,6 +120,9 @@ struct ProcessHashTableProbe {
bool _is_right_semi_anti;
std::vector<bool>* _left_output_slot_flags = nullptr;
std::vector<bool>* _right_output_slot_flags = nullptr;
+ // nullable column but not has null except first row
+ std::vector<bool> _build_column_has_null;
+ bool _need_calculate_build_index_has_zero = true;
bool* _has_null_in_build_side;
RuntimeProfile::Counter* _rows_returned_counter = nullptr;
diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h
b/be/src/vec/exec/join/process_hash_table_probe_impl.h
index d7b47bed9c0..b4212405aed 100644
--- a/be/src/vec/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h
@@ -17,12 +17,15 @@
#pragma once
+#include <gen_cpp/PlanNodes_types.h>
+
#include "common/status.h"
#include "pipeline/exec/hashjoin_probe_operator.h"
#include "process_hash_table_probe.h"
#include "runtime/thread_context.h" // IWYU pragma: keep
#include "util/simd/bits.h"
#include "vec/columns/column_filter_helper.h"
+#include "vec/columns/column_nullable.h"
#include "vec/exprs/vexpr_context.h"
#include "vhash_join_node.h"
@@ -73,20 +76,13 @@ void ProcessHashTableProbe<JoinOpType,
Parent>::build_side_output_column(
constexpr auto probe_all =
JoinOpType == TJoinOp::LEFT_OUTER_JOIN || JoinOpType ==
TJoinOp::FULL_OUTER_JOIN;
- if ((!is_semi_anti_join || have_other_join_conjunct ||
- (is_mark_join && !_parent->_mark_join_conjuncts.empty())) &&
- size) {
- for (int i = 0; i < _right_col_len; i++) {
- const auto& column = *_build_block->safe_get_by_position(i).column;
- if (output_slot_flags[i]) {
- mcol[i + _right_col_idx]->insert_indices_from(column,
_build_indexs.data(),
-
_build_indexs.data() + size);
- } else {
- mcol[i + _right_col_idx]->insert_many_defaults(size);
- }
- }
- }
-
+ // indicates whether build_indexs contain 0
+ bool build_index_has_zero =
+ (JoinOpType != TJoinOp::INNER_JOIN && JoinOpType !=
TJoinOp::RIGHT_OUTER_JOIN) ||
+ have_other_join_conjunct || is_mark_join;
+ bool need_output = (!is_semi_anti_join || have_other_join_conjunct ||
+ (is_mark_join &&
!_parent->_mark_join_conjuncts.empty())) &&
+ size;
// Dispose right tuple is null flags columns
if (probe_all && !have_other_join_conjunct) {
_tuple_is_null_right_flags->resize(size);
@@ -94,6 +90,42 @@ void ProcessHashTableProbe<JoinOpType,
Parent>::build_side_output_column(
for (int i = 0; i < size; ++i) {
null_data[i] = _build_indexs[i] == 0;
}
+ if (need_output && _need_calculate_build_index_has_zero) {
+ build_index_has_zero = simd::contain_byte(null_data, size, 1);
+ }
+ }
+
+ if (need_output) {
+ if (!build_index_has_zero && _build_column_has_null.empty()) {
+ _need_calculate_build_index_has_zero = false;
+ _build_column_has_null.resize(output_slot_flags.size());
+ for (int i = 0; i < _right_col_len; i++) {
+ const auto& column =
*_build_block->safe_get_by_position(i).column;
+ _build_column_has_null[i] = false;
+ if (output_slot_flags[i] && column.is_nullable()) {
+ const auto& nullable = assert_cast<const
ColumnNullable&>(column);
+ _build_column_has_null[i] = !simd::contain_byte(
+ nullable.get_null_map_data().data() + 1,
nullable.size() - 1, 1);
+ _need_calculate_build_index_has_zero |=
_build_column_has_null[i];
+ }
+ }
+ }
+
+ for (int i = 0; i < _right_col_len; i++) {
+ const auto& column = *_build_block->safe_get_by_position(i).column;
+ if (output_slot_flags[i]) {
+ if (!build_index_has_zero && _build_column_has_null[i]) {
+ assert_cast<ColumnNullable*>(mcol[i +
_right_col_idx].get())
+ ->insert_indices_from_not_has_null(column,
_build_indexs.data(),
+
_build_indexs.data() + size);
+ } else {
+ mcol[i + _right_col_idx]->insert_indices_from(column,
_build_indexs.data(),
+
_build_indexs.data() + size);
+ }
+ } else {
+ mcol[i + _right_col_idx]->insert_many_defaults(size);
+ }
+ }
}
}
@@ -109,7 +141,8 @@ void ProcessHashTableProbe<JoinOpType,
Parent>::probe_side_output_column(
if (all_match_one) {
mcol[i]->insert_range_from(*column, last_probe_index, size);
} else {
- column->replicate(_probe_indexs.data(), size, *mcol[i]);
+ mcol[i]->insert_indices_from(*column, _probe_indexs.data(),
+ _probe_indexs.data() + size);
}
} else {
mcol[i]->insert_many_defaults(size);
@@ -147,6 +180,7 @@ typename HashTableType::State
ProcessHashTableProbe<JoinOpType, Parent>::_init_p
COUNTER_SET(_parent->_probe_arena_memory_usage,
(int64_t)hash_table_ctx.serialized_keys_size(false));
}
+
return typename HashTableType::State(_parent->_probe_columns);
}
@@ -237,9 +271,11 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::do_process(HashTableType& hash
JoinOpType !=
TJoinOp::RIGHT_ANTI_JOIN)) {
auto check_all_match_one = [](const std::vector<uint32_t>& vecs,
uint32_t probe_idx,
int size) {
- if (size < 1 || vecs[0] != probe_idx) return false;
+ if (!size || vecs[0] != probe_idx || vecs[size - 1] != probe_idx +
size - 1) {
+ return false;
+ }
for (int i = 1; i < size; i++) {
- if (vecs[i] - vecs[i - 1] != 1) {
+ if (vecs[i] == vecs[i - 1]) {
return false;
}
}
diff --git a/be/src/vec/functions/array/function_array_with_constant.cpp
b/be/src/vec/functions/array/function_array_with_constant.cpp
index f9a8981a335..bb3cbb53e41 100644
--- a/be/src/vec/functions/array/function_array_with_constant.cpp
+++ b/be/src/vec/functions/array/function_array_with_constant.cpp
@@ -101,8 +101,8 @@ public:
}
auto clone = value->clone_empty();
clone->reserve(input_rows_count);
- RETURN_IF_CATCH_EXCEPTION(
- value->replicate(array_sizes.data(), offset,
*clone->assume_mutable().get()));
+ clone->assume_mutable()->insert_indices_from(*value,
array_sizes.data(),
+ array_sizes.data() +
offset);
if (!clone->is_nullable()) {
clone = ColumnNullable::create(std::move(clone),
ColumnUInt8::create(clone->size(), 0));
}
diff --git a/be/test/vec/core/column_array_test.cpp
b/be/test/vec/core/column_array_test.cpp
index fd2ed212730..c371dda25e8 100644
--- a/be/test/vec/core/column_array_test.cpp
+++ b/be/test/vec/core/column_array_test.cpp
@@ -186,52 +186,4 @@ TEST(ColumnArrayTest, EmptyArrayPermuteTest) {
check_array_data<int32_t>(*res2, {});
}
-TEST(ColumnArrayTest, IntArrayReplicateTest) {
- auto off_column = ColumnVector<ColumnArray::Offset64>::create();
- auto data_column = ColumnVector<int32_t>::create();
- // init column array with [[1,2,3],[],[4],[5,6]]
- std::vector<ColumnArray::Offset64> offs = {0, 3, 3, 4, 6};
- std::vector<int32_t> vals = {1, 2, 3, 4, 5, 6};
- for (size_t i = 1; i < offs.size(); ++i) {
- off_column->insert_data((const char*)(&offs[i]), 0);
- }
- for (auto& v : vals) {
- data_column->insert_data((const char*)(&v), 0);
- }
- ColumnArray array_column(std::move(data_column), std::move(off_column));
-
- uint32_t counts[] = {0, 0, 1, 3, 3, 3}; // size should be equal
array_column.size()
- size_t target_size = 6; // sum(counts)
-
- // return array column: [[1,2,3],[1,2,3],[],[5,6],[5,6],[5,6]];
- auto res1 = array_column.clone_empty();
- array_column.replicate(counts, target_size, *res1);
- check_array_offsets(*res1, {3, 6, 6, 8, 10, 12});
- check_array_data<int32_t>(*res1, {1, 2, 3, 1, 2, 3, 5, 6, 5, 6, 5, 6});
-}
-
-TEST(ColumnArrayTest, StringArrayReplicateTest) {
- auto off_column = ColumnVector<ColumnArray::Offset64>::create();
- auto data_column = ColumnString::create();
- // init column array with [["abc","d"],["ef"],[], [""]];
- std::vector<ColumnArray::Offset64> offs = {0, 2, 3, 3, 4};
- std::vector<std::string> vals = {"abc", "d", "ef", ""};
- for (size_t i = 1; i < offs.size(); ++i) {
- off_column->insert_data((const char*)(&offs[i]), 0);
- }
- for (auto& v : vals) {
- data_column->insert_data(v.data(), v.size());
- }
- ColumnArray array_column(std::move(data_column), std::move(off_column));
-
- uint32_t counts[] = {0, 0, 1, 3, 3, 3}; // size should be equal
array_column.size()
- size_t target_size = 6; // sum(counts)
-
- // return array column: [["abc","d"],["abc","d"],["ef"],[""],[""],[""]];
- auto res1 = array_column.clone_empty();
- array_column.replicate(counts, target_size, *res1);
- check_array_offsets(*res1, {2, 4, 5, 6, 7, 8});
- check_array_data<std::string>(*res1, {"abc", "d", "abc", "d", "ef", "",
"", ""});
-}
-
} // namespace doris::vectorized
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]