This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new ae9c231925 [Enhancement][Storage] refactor
InListPredicate/NotInListPredicate (#10139)
ae9c231925 is described below
commit ae9c231925d7b4dd76acac2f924b550217f64b28
Author: Pxl <[email protected]>
AuthorDate: Thu Jun 16 18:09:29 2022 +0800
[Enhancement][Storage] refactor InListPredicate/NotInListPredicate (#10139)
* refactor in_list_pred
* update
---
be/src/olap/CMakeLists.txt | 1 -
be/src/olap/in_list_predicate.cpp | 391 ---------------------------------
be/src/olap/in_list_predicate.h | 313 +++++++++++++++++++++++---
be/src/vec/columns/column_dictionary.h | 18 +-
4 files changed, 289 insertions(+), 434 deletions(-)
diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt
index 2c02122883..615a9fff44 100644
--- a/be/src/olap/CMakeLists.txt
+++ b/be/src/olap/CMakeLists.txt
@@ -47,7 +47,6 @@ add_library(Olap STATIC
file_stream.cpp
generic_iterators.cpp
hll.cpp
- in_list_predicate.cpp
bloom_filter_predicate.cpp
in_stream.cpp
key_coder.cpp
diff --git a/be/src/olap/in_list_predicate.cpp
b/be/src/olap/in_list_predicate.cpp
deleted file mode 100644
index 1b2ab20f3b..0000000000
--- a/be/src/olap/in_list_predicate.cpp
+++ /dev/null
@@ -1,391 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "olap/in_list_predicate.h"
-
-#include "olap/field.h"
-#include "runtime/string_value.hpp"
-#include "runtime/vectorized_row_batch.h"
-#include "vec/columns/column_dictionary.h"
-#include "vec/columns/column_nullable.h"
-#include "vec/columns/predicate_column.h"
-
-namespace doris {
-
-#define IN_LIST_PRED_CONSTRUCTOR(CLASS)
\
- template <class T>
\
- CLASS<T>::CLASS(uint32_t column_id, phmap::flat_hash_set<T>&& values, bool
opposite) \
- : ColumnPredicate(column_id, opposite), _values(std::move(values))
{}
-
-IN_LIST_PRED_CONSTRUCTOR(InListPredicate)
-IN_LIST_PRED_CONSTRUCTOR(NotInListPredicate)
-
-#define IN_LIST_PRED_EVALUATE(CLASS, OP)
\
- template <class T>
\
- void CLASS<T>::evaluate(VectorizedRowBatch* batch) const {
\
- uint16_t n = batch->size();
\
- if (n == 0) {
\
- return;
\
- }
\
- uint16_t* sel = batch->selected();
\
- const T* col_vector = reinterpret_cast<const
T*>(batch->column(_column_id)->col_data()); \
- uint16_t new_size = 0;
\
- if (batch->column(_column_id)->no_nulls()) {
\
- if (batch->selected_in_use()) {
\
- for (uint16_t j = 0; j != n; ++j) {
\
- uint16_t i = sel[j];
\
- sel[new_size] = i;
\
- new_size += (_values.find(col_vector[i]) OP
_values.end()); \
- }
\
- batch->set_size(new_size);
\
- } else {
\
- for (uint16_t i = 0; i != n; ++i) {
\
- sel[new_size] = i;
\
- new_size += (_values.find(col_vector[i]) OP
_values.end()); \
- }
\
- if (new_size < n) {
\
- batch->set_size(new_size);
\
- batch->set_selected_in_use(true);
\
- }
\
- }
\
- } else {
\
- bool* is_null = batch->column(_column_id)->is_null();
\
- if (batch->selected_in_use()) {
\
- for (uint16_t j = 0; j != n; ++j) {
\
- uint16_t i = sel[j];
\
- sel[new_size] = i;
\
- new_size += (!is_null[i] && _values.find(col_vector[i]) OP
_values.end()); \
- }
\
- batch->set_size(new_size);
\
- } else {
\
- for (int i = 0; i != n; ++i) {
\
- sel[new_size] = i;
\
- new_size += (!is_null[i] && _values.find(col_vector[i]) OP
_values.end()); \
- }
\
- if (new_size < n) {
\
- batch->set_size(new_size);
\
- batch->set_selected_in_use(true);
\
- }
\
- }
\
- }
\
- }
-
-IN_LIST_PRED_EVALUATE(InListPredicate, !=)
-IN_LIST_PRED_EVALUATE(NotInListPredicate, ==)
-
-#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(CLASS, OP)
\
- template <class T>
\
- void CLASS<T>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size)
const { \
- uint16_t new_size = 0;
\
- if (block->is_nullable()) {
\
- for (uint16_t i = 0; i < *size; ++i) {
\
- uint16_t idx = sel[i];
\
- sel[new_size] = idx;
\
- const T* cell_value = reinterpret_cast<const
T*>(block->cell(idx).cell_ptr()); \
- auto result = (!block->cell(idx).is_null() &&
_values.find(*cell_value) \
- OP
_values.end()); \
- new_size += _opposite ? !result : result;
\
- }
\
- } else {
\
- for (uint16_t i = 0; i < *size; ++i) {
\
- uint16_t idx = sel[i];
\
- sel[new_size] = idx;
\
- const T* cell_value = reinterpret_cast<const
T*>(block->cell(idx).cell_ptr()); \
- auto result = (_values.find(*cell_value) OP _values.end());
\
- new_size += _opposite ? !result : result;
\
- }
\
- }
\
- *size = new_size;
\
- }
-
-IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(InListPredicate, !=)
-IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
-
-// todo(zeno) define interface in IColumn to simplify code
-#define IN_LIST_PRED_COLUMN_EVALUATE(CLASS, OP)
\
- template <class T>
\
- void CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel,
uint16_t* size) const { \
- uint16_t new_size = 0;
\
- if (column.is_nullable()) {
\
- auto* nullable_col =
\
-
vectorized::check_and_get_column<vectorized::ColumnNullable>(column); \
- auto& null_bitmap = reinterpret_cast<const
vectorized::ColumnUInt8&>( \
- nullable_col->get_null_map_column())
\
- .get_data();
\
- auto& nested_col = nullable_col->get_nested_column();
\
- if (nested_col.is_column_dictionary()) {
\
- if constexpr (std::is_same_v<T, StringValue>) {
\
- auto* nested_col_ptr = vectorized::check_and_get_column<
\
-
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col); \
- auto& data_array = nested_col_ptr->get_data();
\
- std::vector<bool> selected;
\
- nested_col_ptr->find_codes(_values, selected);
\
- for (uint16_t i = 0; i < *size; i++) {
\
- uint16_t idx = sel[i];
\
- sel[new_size] = idx;
\
- const auto& cell_value = data_array[idx];
\
- DCHECK(cell_value < (int64_t)selected.size());
\
- bool ret = !null_bitmap[idx] && (selected[cell_value]
OP false); \
- new_size += _opposite ? !ret : ret;
\
- }
\
- }
\
- } else {
\
- auto* nested_col_ptr =
\
-
vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>( \
- nested_col);
\
- auto& data_array = nested_col_ptr->get_data();
\
- for (uint16_t i = 0; i < *size; i++) {
\
- uint16_t idx = sel[i];
\
- sel[new_size] = idx;
\
- const auto& cell_value = reinterpret_cast<const
T&>(data_array[idx]); \
- bool ret = !null_bitmap[idx] && (_values.find(cell_value)
OP _values.end()); \
- new_size += _opposite ? !ret : ret;
\
- }
\
- }
\
- } else if (column.is_column_dictionary()) {
\
- if constexpr (std::is_same_v<T, StringValue>) {
\
- auto& dict_col =
\
-
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \
- column);
\
- auto& data_array = dict_col.get_data();
\
- std::vector<bool> selected;
\
- dict_col.find_codes(_values, selected);
\
- for (uint16_t i = 0; i < *size; i++) {
\
- uint16_t idx = sel[i];
\
- sel[new_size] = idx;
\
- const auto& cell_value = data_array[idx];
\
- DCHECK(cell_value < (int64_t)selected.size());
\
- auto result = (selected[cell_value] OP false);
\
- new_size += _opposite ? !result : result;
\
- }
\
- }
\
- } else {
\
- auto& number_column =
reinterpret_cast<vectorized::PredicateColumnType<T>&>(column); \
- auto& data_array = number_column.get_data();
\
- for (uint16_t i = 0; i < *size; i++) {
\
- uint16_t idx = sel[i];
\
- sel[new_size] = idx;
\
- const auto& cell_value = reinterpret_cast<const
T&>(data_array[idx]); \
- auto result = (_values.find(cell_value) OP _values.end());
\
- new_size += _opposite ? !result : result;
\
- }
\
- }
\
- *size = new_size;
\
- }
-
-IN_LIST_PRED_COLUMN_EVALUATE(InListPredicate, !=)
-IN_LIST_PRED_COLUMN_EVALUATE(NotInListPredicate, ==)
-
-#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(CLASS, OP)
\
- template <class T>
\
- void CLASS<T>::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t
size, bool* flags) \
- const {
\
- if (block->is_nullable()) {
\
- for (uint16_t i = 0; i < size; ++i) {
\
- if (flags[i]) continue;
\
- uint16_t idx = sel[i];
\
- const T* cell_value = reinterpret_cast<const
T*>(block->cell(idx).cell_ptr()); \
- auto result = (!block->cell(idx).is_null() &&
_values.find(*cell_value) \
- OP
_values.end()); \
- flags[i] |= _opposite ? !result : result;
\
- }
\
- } else {
\
- for (uint16_t i = 0; i < size; ++i) {
\
- if (flags[i]) continue;
\
- uint16_t idx = sel[i];
\
- const T* cell_value = reinterpret_cast<const
T*>(block->cell(idx).cell_ptr()); \
- auto result = (_values.find(*cell_value) OP _values.end());
\
- flags[i] |= _opposite ? !result : result;
\
- }
\
- }
\
- }
-
-IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(InListPredicate, !=)
-IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(NotInListPredicate, ==)
-
-#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(CLASS, OP)
\
- template <class T>
\
- void CLASS<T>::evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t
size, bool* flags) \
- const {
\
- if (block->is_nullable()) {
\
- for (uint16_t i = 0; i < size; ++i) {
\
- if (!flags[i]) continue;
\
- uint16_t idx = sel[i];
\
- const T* cell_value = reinterpret_cast<const
T*>(block->cell(idx).cell_ptr()); \
- auto result = (!block->cell(idx).is_null() &&
_values.find(*cell_value) \
- OP
_values.end()); \
- flags[i] &= _opposite ? !result : result;
\
- }
\
- } else {
\
- for (uint16_t i = 0; i < size; ++i) {
\
- if (!flags[i]) continue;
\
- uint16_t idx = sel[i];
\
- const T* cell_value = reinterpret_cast<const
T*>(block->cell(idx).cell_ptr()); \
- auto result = (_values.find(*cell_value) OP _values.end());
\
- flags[i] &= _opposite ? !result : result;
\
- }
\
- }
\
- }
-
-IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(InListPredicate, !=)
-IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(NotInListPredicate, ==)
-
-#define IN_LIST_PRED_BITMAP_EVALUATE(CLASS, OP)
\
- template <class T>
\
- Status CLASS<T>::evaluate(const Schema& schema,
\
- const std::vector<BitmapIndexIterator*>&
iterators, \
- uint32_t num_rows, roaring::Roaring* result)
const { \
- BitmapIndexIterator* iterator = iterators[_column_id];
\
- if (iterator == nullptr) {
\
- return Status::OK();
\
- }
\
- if (iterator->has_null_bitmap()) {
\
- roaring::Roaring null_bitmap;
\
- RETURN_IF_ERROR(iterator->read_null_bitmap(&null_bitmap));
\
- *result -= null_bitmap;
\
- }
\
- roaring::Roaring indices;
\
- for (auto value : _values) {
\
- bool exact_match;
\
- Status s = iterator->seek_dictionary(&value, &exact_match);
\
- rowid_t seeked_ordinal = iterator->current_ordinal();
\
- if (!s.is_not_found()) {
\
- if (!s.ok()) {
\
- return s;
\
- }
\
- if (exact_match) {
\
- roaring::Roaring index;
\
- RETURN_IF_ERROR(iterator->read_bitmap(seeked_ordinal,
&index)); \
- indices |= index;
\
- }
\
- }
\
- }
\
- *result OP indices;
\
- return Status::OK();
\
- }
-
-IN_LIST_PRED_BITMAP_EVALUATE(InListPredicate, &=)
-IN_LIST_PRED_BITMAP_EVALUATE(NotInListPredicate, -=)
-
-#define IN_LIST_PRED_CONSTRUCTOR_DECLARATION(CLASS)
\
- template CLASS<int8_t>::CLASS(uint32_t column_id,
phmap::flat_hash_set<int8_t>&& values, \
- bool opposite);
\
- template CLASS<int16_t>::CLASS(uint32_t column_id,
phmap::flat_hash_set<int16_t>&& values, \
- bool opposite);
\
- template CLASS<int32_t>::CLASS(uint32_t column_id,
phmap::flat_hash_set<int32_t>&& values, \
- bool opposite);
\
- template CLASS<int64_t>::CLASS(uint32_t column_id,
phmap::flat_hash_set<int64_t>&& values, \
- bool opposite);
\
- template CLASS<int128_t>::CLASS(uint32_t column_id,
phmap::flat_hash_set<int128_t>&& values, \
- bool opposite);
\
- template CLASS<float>::CLASS(uint32_t column_id,
phmap::flat_hash_set<float>&& values, \
- bool opposite);
\
- template CLASS<double>::CLASS(uint32_t column_id,
phmap::flat_hash_set<double>&& values, \
- bool opposite);
\
- template CLASS<decimal12_t>::CLASS(uint32_t column_id,
\
- phmap::flat_hash_set<decimal12_t>&&
values, bool opposite); \
- template CLASS<StringValue>::CLASS(uint32_t column_id,
\
- phmap::flat_hash_set<StringValue>&&
values, bool opposite); \
- template CLASS<uint24_t>::CLASS(uint32_t column_id,
phmap::flat_hash_set<uint24_t>&& values, \
- bool opposite);
\
- template CLASS<uint64_t>::CLASS(uint32_t column_id,
phmap::flat_hash_set<uint64_t>&& values, \
- bool opposite);
-
-IN_LIST_PRED_CONSTRUCTOR_DECLARATION(InListPredicate)
-IN_LIST_PRED_CONSTRUCTOR_DECLARATION(NotInListPredicate)
-
-#define IN_LIST_PRED_EVALUATE_DECLARATION(CLASS)
\
- template void CLASS<int8_t>::evaluate(VectorizedRowBatch* batch) const;
\
- template void CLASS<int16_t>::evaluate(VectorizedRowBatch* batch) const;
\
- template void CLASS<int32_t>::evaluate(VectorizedRowBatch* batch) const;
\
- template void CLASS<int64_t>::evaluate(VectorizedRowBatch* batch) const;
\
- template void CLASS<int128_t>::evaluate(VectorizedRowBatch* batch) const;
\
- template void CLASS<float>::evaluate(VectorizedRowBatch* batch) const;
\
- template void CLASS<double>::evaluate(VectorizedRowBatch* batch) const;
\
- template void CLASS<decimal12_t>::evaluate(VectorizedRowBatch* batch)
const; \
- template void CLASS<StringValue>::evaluate(VectorizedRowBatch* batch)
const; \
- template void CLASS<uint24_t>::evaluate(VectorizedRowBatch* batch) const;
\
- template void CLASS<uint64_t>::evaluate(VectorizedRowBatch* batch) const;
-
-IN_LIST_PRED_EVALUATE_DECLARATION(InListPredicate)
-IN_LIST_PRED_EVALUATE_DECLARATION(NotInListPredicate)
-
-#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(CLASS)
\
- template void CLASS<int8_t>::evaluate(ColumnBlock* block, uint16_t* sel,
uint16_t* size) \
- const;
\
- template void CLASS<int16_t>::evaluate(ColumnBlock* block, uint16_t* sel,
uint16_t* size) \
- const;
\
- template void CLASS<int32_t>::evaluate(ColumnBlock* block, uint16_t* sel,
uint16_t* size) \
- const;
\
- template void CLASS<int64_t>::evaluate(ColumnBlock* block, uint16_t* sel,
uint16_t* size) \
- const;
\
- template void CLASS<int128_t>::evaluate(ColumnBlock* block, uint16_t* sel,
uint16_t* size) \
- const;
\
- template void CLASS<float>::evaluate(ColumnBlock* block, uint16_t* sel,
uint16_t* size) const; \
- template void CLASS<double>::evaluate(ColumnBlock* block, uint16_t* sel,
uint16_t* size) \
- const;
\
- template void CLASS<decimal12_t>::evaluate(ColumnBlock* block, uint16_t*
sel, uint16_t* size) \
- const;
\
- template void CLASS<StringValue>::evaluate(ColumnBlock* block, uint16_t*
sel, uint16_t* size) \
- const;
\
- template void CLASS<uint24_t>::evaluate(ColumnBlock* block, uint16_t* sel,
uint16_t* size) \
- const;
\
- template void CLASS<uint64_t>::evaluate(ColumnBlock* block, uint16_t* sel,
uint16_t* size) \
- const;
-
-IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(InListPredicate)
-IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(NotInListPredicate)
-
-#define IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(CLASS)
\
- template Status CLASS<int8_t>::evaluate(const Schema& schema,
\
- const
std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows,
roaring::Roaring* bitmap) const; \
- template Status CLASS<int16_t>::evaluate(const Schema& schema,
\
- const
std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows,
roaring::Roaring* bitmap) const; \
- template Status CLASS<int32_t>::evaluate(const Schema& schema,
\
- const
std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows,
roaring::Roaring* bitmap) const; \
- template Status CLASS<int64_t>::evaluate(const Schema& schema,
\
- const
std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows,
roaring::Roaring* bitmap) const; \
- template Status CLASS<int128_t>::evaluate(const Schema& schema,
\
- const
std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows,
roaring::Roaring* bitmap) const; \
- template Status CLASS<float>::evaluate(const Schema& schema,
\
- const
std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows,
roaring::Roaring* bitmap) const; \
- template Status CLASS<double>::evaluate(const Schema& schema,
\
- const
std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows,
roaring::Roaring* bitmap) const; \
- template Status CLASS<decimal12_t>::evaluate(
\
- const Schema& schema, const std::vector<BitmapIndexIterator*>&
iterators, \
- uint32_t num_rows, roaring::Roaring* bitmap) const;
\
- template Status CLASS<StringValue>::evaluate(
\
- const Schema& schema, const std::vector<BitmapIndexIterator*>&
iterators, \
- uint32_t num_rows, roaring::Roaring* bitmap) const;
\
- template Status CLASS<uint24_t>::evaluate(const Schema& schema,
\
- const
std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows,
roaring::Roaring* bitmap) const; \
- template Status CLASS<uint64_t>::evaluate(const Schema& schema,
\
- const
std::vector<BitmapIndexIterator*>& iterators, \
- uint32_t num_rows,
roaring::Roaring* bitmap) const;
-
-IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(InListPredicate)
-IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(NotInListPredicate)
-
-} //namespace doris
\ No newline at end of file
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index cf2f4b2cdc..f41a7f51c5 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -21,11 +21,15 @@
#include <stdint.h>
#include <roaring/roaring.hh>
+#include <type_traits>
#include "decimal12.h"
#include "olap/column_predicate.h"
#include "runtime/string_value.h"
+#include "runtime/vectorized_row_batch.h"
#include "uint24.h"
+#include "vec/columns/column_dictionary.h"
+#include "vec/core/types.h"
namespace std {
// for string value
@@ -75,34 +79,289 @@ namespace doris {
class VectorizedRowBatch;
-// todo(wb) support evaluate_and,evaluate_or
-
-#define IN_LIST_PRED_CLASS_DEFINE(CLASS, PT)
\
- template <class T>
\
- class CLASS : public ColumnPredicate {
\
- public:
\
- CLASS(uint32_t column_id, phmap::flat_hash_set<T>&& values, bool
is_opposite = false); \
- PredicateType type() const override { return PredicateType::PT; }
\
- virtual void evaluate(VectorizedRowBatch* batch) const override;
\
- void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const
override; \
- void evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size,
\
- bool* flags) const override;
\
- void evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size,
\
- bool* flags) const override;
\
- virtual Status evaluate(const Schema& schema,
\
- const std::vector<BitmapIndexIterator*>&
iterators, \
- uint32_t num_rows, roaring::Roaring* bitmap)
const override; \
- void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t*
size) const override; \
- void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t
size, \
- bool* flags) const override {}
\
- void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t
size, \
- bool* flags) const override {}
\
-
\
- private:
\
- phmap::flat_hash_set<T> _values;
\
+template <class T, PredicateType PT>
+class InListPredicateBase : public ColumnPredicate {
+public:
+ InListPredicateBase(uint32_t column_id, phmap::flat_hash_set<T>&& values,
+ bool is_opposite = false)
+ : ColumnPredicate(column_id, is_opposite),
_values(std::move(values)) {}
+
+ PredicateType type() const override { return PT; }
+
+ void evaluate(VectorizedRowBatch* batch) const override {
+ uint16_t n = batch->size();
+ if (!n) {
+ return;
+ }
+
+ uint16_t* sel = batch->selected();
+ const T* col_vector = reinterpret_cast<const
T*>(batch->column(_column_id)->col_data());
+ uint16_t new_size = 0;
+ if (batch->column(_column_id)->no_nulls()) {
+ if (batch->selected_in_use()) {
+ for (uint16_t j = 0; j != n; ++j) {
+ uint16_t i = sel[j];
+ sel[new_size] = i;
+ new_size += _operator(_values.find(col_vector[i]),
_values.end());
+ }
+ batch->set_size(new_size);
+ } else {
+ for (uint16_t i = 0; i != n; ++i) {
+ sel[new_size] = i;
+ new_size += _operator(_values.find(col_vector[i]),
_values.end());
+ }
+ if (new_size < n) {
+ batch->set_size(new_size);
+ batch->set_selected_in_use(true);
+ }
+ }
+ } else {
+ bool* is_null = batch->column(_column_id)->is_null();
+ if (batch->selected_in_use()) {
+ for (uint16_t j = 0; j != n; ++j) {
+ uint16_t i = sel[j];
+ sel[new_size] = i;
+ new_size +=
+ (!is_null[i] &&
_operator(_values.find(col_vector[i]), _values.end()));
+ }
+ batch->set_size(new_size);
+ } else {
+ for (int i = 0; i != n; ++i) {
+ sel[new_size] = i;
+ new_size +=
+ (!is_null[i] &&
_operator(_values.find(col_vector[i]), _values.end()));
+ }
+ if (new_size < n) {
+ batch->set_size(new_size);
+ batch->set_selected_in_use(true);
+ }
+ }
+ }
};
-IN_LIST_PRED_CLASS_DEFINE(InListPredicate, IN_LIST)
-IN_LIST_PRED_CLASS_DEFINE(NotInListPredicate, NOT_IN_LIST)
+ void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const
override {
+ if (block->is_nullable()) {
+ _base_evaluate<true>(block, sel, size);
+ } else {
+ _base_evaluate<false>(block, sel, size);
+ }
+ }
+
+ void evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size, bool*
flags) const override {
+ if (block->is_nullable()) {
+ _base_evaluate<true, false>(block, sel, size, flags);
+ } else {
+ _base_evaluate<false, false>(block, sel, size, flags);
+ }
+ }
+
+ void evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size,
+ bool* flags) const override {
+ if (block->is_nullable()) {
+ _base_evaluate<true, true>(block, sel, size, flags);
+ } else {
+ _base_evaluate<false, true>(block, sel, size, flags);
+ }
+ }
+
+ Status evaluate(const Schema& schema, const
std::vector<BitmapIndexIterator*>& iterators,
+ uint32_t num_rows, roaring::Roaring* result) const
override {
+ BitmapIndexIterator* iterator = iterators[_column_id];
+ if (iterator == nullptr) {
+ return Status::OK();
+ }
+ if (iterator->has_null_bitmap()) {
+ roaring::Roaring null_bitmap;
+ RETURN_IF_ERROR(iterator->read_null_bitmap(&null_bitmap));
+ *result -= null_bitmap;
+ }
+ roaring::Roaring indices;
+ for (auto value : _values) {
+ bool exact_match;
+ Status s = iterator->seek_dictionary(&value, &exact_match);
+ rowid_t seeked_ordinal = iterator->current_ordinal();
+ if (!s.is_not_found()) {
+ if (!s.ok()) {
+ return s;
+ }
+ if (exact_match) {
+ roaring::Roaring index;
+ RETURN_IF_ERROR(iterator->read_bitmap(seeked_ordinal,
&index));
+ indices |= index;
+ }
+ }
+ }
+
+ if constexpr (PT == PredicateType::IN_LIST) {
+ *result &= indices;
+ } else {
+ *result -= indices;
+ }
+
+ return Status::OK();
+ }
+
+ void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size)
const override {
+ if (column.is_nullable()) {
+ auto* nullable_col =
+
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
+ auto& null_bitmap = reinterpret_cast<const
vectorized::ColumnUInt8&>(
+ nullable_col->get_null_map_column())
+ .get_data();
+ auto& nested_col = nullable_col->get_nested_column();
+
+ if (_opposite) {
+ _base_evaluate<true, true>(&nested_col, &null_bitmap, sel,
size);
+ } else {
+ _base_evaluate<true, false>(&nested_col, &null_bitmap, sel,
size);
+ }
+ } else {
+ if (_opposite) {
+ _base_evaluate<false, true>(&column, nullptr, sel, size);
+ } else {
+ _base_evaluate<false, false>(&column, nullptr, sel, size);
+ }
+ }
+ }
+
+ // todo(wb) support evaluate_and,evaluate_or
+ void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t
size,
+ bool* flags) const override {
+ LOG(FATAL) << "IColumn not support in_list_predicate.evaluate_and
now.";
+ }
+ void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size,
+ bool* flags) const override {
+ LOG(FATAL) << "IColumn not support in_list_predicate.evaluate_or now.";
+ }
+
+private:
+ template <typename LeftT, typename RightT>
+ bool _operator(const LeftT& lhs, const RightT& rhs) const {
+ if constexpr (PT == PredicateType::IN_LIST) {
+ return lhs != rhs;
+ }
+ return lhs == rhs;
+ }
+
+ template <bool is_nullable>
+ void _base_evaluate(const ColumnBlock* block, uint16_t* sel, uint16_t*
size) const {
+ uint16_t new_size = 0;
+ for (uint16_t i = 0; i < *size; ++i) {
+ uint16_t idx = sel[i];
+ sel[new_size] = idx;
+ const T* cell_value = reinterpret_cast<const
T*>(block->cell(idx).cell_ptr());
+ if constexpr (is_nullable) {
+ new_size += _opposite ^ (!block->cell(idx).is_null() &&
+ _operator(_values.find(*cell_value),
_values.end()));
+ } else {
+ new_size += _opposite ^ _operator(_values.find(*cell_value),
_values.end());
+ }
+ }
+ *size = new_size;
+ }
+
+ template <bool is_nullable, bool is_and>
+ void _base_evaluate(const ColumnBlock* block, const uint16_t* sel,
uint16_t size,
+ bool* flags) const {
+ for (uint16_t i = 0; i < size; ++i) {
+ if (!flags[i]) {
+ continue;
+ }
+
+ uint16_t idx = sel[i];
+ const T* cell_value = reinterpret_cast<const
T*>(block->cell(idx).cell_ptr());
+ auto result = true;
+ if constexpr (is_nullable) {
+ result &= !block->cell(idx).is_null();
+ }
+ result &= _operator(_values.find(*cell_value), _values.end());
+
+ if constexpr (is_and) {
+ flags[i] &= _opposite ^ result;
+ } else {
+ flags[i] |= _opposite ^ result;
+ }
+ }
+ }
+
+ template <bool is_nullable, bool is_opposite>
+ void _base_evaluate(const vectorized::IColumn* column,
+ const vectorized::PaddedPODArray<vectorized::UInt8>*
null_map,
+ uint16_t* sel, uint16_t* size) const {
+ uint16_t new_size = 0;
+
+ if (column->is_column_dictionary()) {
+ if constexpr (std::is_same_v<T, StringValue>) {
+ auto* nested_col_ptr = vectorized::check_and_get_column<
+
vectorized::ColumnDictionary<vectorized::Int32>>(column);
+ auto& data_array = nested_col_ptr->get_data();
+ std::vector<vectorized::UInt8> selected;
+ nested_col_ptr->find_codes(_values, selected);
+
+ for (uint16_t i = 0; i < *size; i++) {
+ uint16_t idx = sel[i];
+ if constexpr (is_nullable) {
+ if ((*null_map)[idx]) {
+ if constexpr (is_opposite) {
+ sel[new_size++] = idx;
+ }
+ continue;
+ }
+ }
+
+ if constexpr (is_opposite != (PT ==
PredicateType::IN_LIST)) {
+ if (selected[data_array[idx]]) {
+ sel[new_size++] = idx;
+ }
+ } else {
+ if (!selected[data_array[idx]]) {
+ sel[new_size++] = idx;
+ }
+ }
+ }
+ } else {
+ LOG(FATAL) << "column_dictionary must use StringValue
predicate.";
+ }
+ } else {
+ auto* nested_col_ptr =
+
vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>(column);
+ auto& data_array = nested_col_ptr->get_data();
+
+ for (uint16_t i = 0; i < *size; i++) {
+ uint16_t idx = sel[i];
+ if constexpr (is_nullable) {
+ if ((*null_map)[idx]) {
+ if constexpr (is_opposite) {
+ sel[new_size++] = idx;
+ }
+ continue;
+ }
+ }
+
+ if constexpr (is_opposite != (PT == PredicateType::IN_LIST)) {
+ if (_operator(_values.find(reinterpret_cast<const
T&>(data_array[idx])),
+ _values.end())) {
+ sel[new_size++] = idx;
+ }
+ } else {
+ if (!_operator(_values.find(reinterpret_cast<const
T&>(data_array[idx])),
+ _values.end())) {
+ sel[new_size++] = idx;
+ }
+ }
+ }
+ }
+
+ *size = new_size;
+ }
+
+ phmap::flat_hash_set<T> _values;
+};
+
+template <class T>
+using InListPredicate = InListPredicateBase<T, PredicateType::IN_LIST>;
+
+template <class T>
+using NotInListPredicate = InListPredicateBase<T, PredicateType::NOT_IN_LIST>;
} //namespace doris
diff --git a/be/src/vec/columns/column_dictionary.h
b/be/src/vec/columns/column_dictionary.h
index 29db3a334c..d38bc4e049 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -21,21 +21,10 @@
#include <algorithm>
-#include "gutil/hash/string_hash.h"
-#include "olap/column_predicate.h"
-#include "olap/comparison_predicate.h"
-#include "olap/decimal12.h"
-#include "olap/in_list_predicate.h"
-#include "olap/uint24.h"
#include "runtime/string_value.h"
-#include "util/slice.h"
#include "vec/columns/column.h"
-#include "vec/columns/column_decimal.h"
-#include "vec/columns/column_impl.h"
#include "vec/columns/column_string.h"
-#include "vec/columns/column_vector.h"
#include "vec/columns/predicate_column.h"
-#include "vec/common/typeid_cast.h"
#include "vec/core/types.h"
namespace doris::vectorized {
@@ -259,7 +248,7 @@ public:
uint32_t get_hash_value(uint32_t idx) const { return
_dict.get_hash_value(_codes[idx]); }
void find_codes(const phmap::flat_hash_set<StringValue>& values,
- std::vector<bool>& selected) const {
+ std::vector<vectorized::UInt8>& selected) const {
return _dict.find_codes(values, selected);
}
@@ -363,13 +352,12 @@ public:
}
void find_codes(const phmap::flat_hash_set<StringValue>& values,
- std::vector<bool>& selected) const {
+ std::vector<vectorized::UInt8>& selected) const {
size_t dict_word_num = _dict_data.size();
selected.resize(dict_word_num);
selected.assign(dict_word_num, false);
for (const auto& value : values) {
- auto it = _inverted_index.find(value);
- if (it != _inverted_index.end()) {
+ if (auto it = _inverted_index.find(value); it !=
_inverted_index.end()) {
selected[it->second] = true;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]