This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch vectorized in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit ae9d0cff0ad6aa59919eaa138307d619fffa9aeb Author: Zeno Yang <[email protected]> AuthorDate: Sat Jan 8 01:00:43 2022 +0800 [Vectorized] Support bloom filter predicate on vectorized engine storage layer (#7557) --- be/src/olap/bloom_filter_predicate.h | 40 +++++- .../olap/bloom_filter_column_predicate_test.cpp | 36 ++++++ be/test/olap/null_predicate_test.cpp | 144 +++++++++++++++++++++ 3 files changed, 219 insertions(+), 1 deletion(-) diff --git a/be/src/olap/bloom_filter_predicate.h b/be/src/olap/bloom_filter_predicate.h index b3dcbbb..ff3201c 100644 --- a/be/src/olap/bloom_filter_predicate.h +++ b/be/src/olap/bloom_filter_predicate.h @@ -27,6 +27,10 @@ #include "olap/field.h" #include "runtime/string_value.hpp" #include "runtime/vectorized_row_batch.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_vector.h" +#include "vec/columns/predicate_column.h" +#include "vec/utils/util.hpp" namespace doris { @@ -59,12 +63,14 @@ public: return Status::OK(); } + void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const override; + private: std::shared_ptr<IBloomFilterFuncBase> _filter; SpecificFilter* _specific_filter; // owned by _filter }; -// blomm filter column predicate do not support in segment v1 +// bloom filter column predicate do not support in segment v1 template <PrimitiveType type> void BloomFilterColumnPredicate<type>::evaluate(VectorizedRowBatch* batch) const { uint16_t n = batch->size(); @@ -99,6 +105,38 @@ void BloomFilterColumnPredicate<type>::evaluate(ColumnBlock* block, uint16_t* se *size = new_size; } +template <PrimitiveType type> +void BloomFilterColumnPredicate<type>::evaluate(vectorized::IColumn& column, uint16_t* sel, + uint16_t* size) const { + uint16_t new_size = 0; + using T = typename PrimitiveTypeTraits<type>::CppType; + + if (column.is_nullable()) { + auto* nullable_col = vectorized::check_and_get_column<vectorized::ColumnNullable>(column); + auto& null_map_data = nullable_col->get_null_map_column().get_data(); + auto* pred_col = vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>( + nullable_col->get_nested_column()); + auto& pred_col_data = pred_col->get_data(); + for (uint16_t i = 0; i < *size; i++) { + uint16_t idx = sel[i]; + sel[new_size] = idx; + const auto* cell_value = reinterpret_cast<const void*>(&(pred_col_data[idx])); + new_size += (!null_map_data[idx]) && _specific_filter->find_olap_engine(cell_value); + } + } else { + auto* pred_col = + vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>(column); + auto& pred_col_data = pred_col->get_data(); + for (uint16_t i = 0; i < *size; i++) { + uint16_t idx = sel[i]; + sel[new_size] = idx; + const auto* cell_value = reinterpret_cast<const void*>(&(pred_col_data[idx])); + new_size += _specific_filter->find_olap_engine(cell_value); + } + } + *size = new_size; +} + class BloomFilterColumnPredicateFactory { public: static ColumnPredicate* create_column_predicate( diff --git a/be/test/olap/bloom_filter_column_predicate_test.cpp b/be/test/olap/bloom_filter_column_predicate_test.cpp index 164c51d..24abea1 100644 --- a/be/test/olap/bloom_filter_column_predicate_test.cpp +++ b/be/test/olap/bloom_filter_column_predicate_test.cpp @@ -28,6 +28,11 @@ #include "runtime/string_value.hpp" #include "runtime/vectorized_row_batch.h" #include "util/logging.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/predicate_column.h" +#include "vec/core/block.h" + +using namespace doris::vectorized; namespace doris { @@ -172,6 +177,37 @@ TEST_F(TestBloomFilterColumnPredicate, FLOAT_COLUMN) { ASSERT_EQ(select_size, 1); ASSERT_FLOAT_EQ(*(float*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 5.1); + // for vectorized::Block no null + auto pred_col = PredicateColumnType<vectorized::Float32>::create(); + pred_col->reserve(size); + for (int i = 0; i < size; ++i) { + *(col_data + i) = i + 0.1f; + pred_col->insert_data(reinterpret_cast<const char*>(col_data + i), 0); + } + _row_block->clear(); + select_size = _row_block->selected_size(); + pred->evaluate(*pred_col, _row_block->selection_vector(), &select_size); + ASSERT_EQ(select_size, 3); + ASSERT_FLOAT_EQ((float)pred_col->get_data()[_row_block->selection_vector()[0]], 4.1); + ASSERT_FLOAT_EQ((float)pred_col->get_data()[_row_block->selection_vector()[1]], 5.1); + ASSERT_FLOAT_EQ((float)pred_col->get_data()[_row_block->selection_vector()[2]], 6.1); + + // for vectorized::Block has nulls + auto null_map = ColumnUInt8::create(size, 0); + auto& null_map_data = null_map->get_data(); + for (int i = 0; i < size; ++i) { + null_map_data[i] = (i % 2 == 0); + } + _row_block->clear(); + select_size = _row_block->selected_size(); + auto nullable_col = + vectorized::ColumnNullable::create(std::move(pred_col), std::move(null_map)); + pred->evaluate(*nullable_col, _row_block->selection_vector(), &select_size); + ASSERT_EQ(select_size, 1); + auto nested_col = check_and_get_column<PredicateColumnType<vectorized::Float32>>( + nullable_col->get_nested_column()); + ASSERT_FLOAT_EQ((float)nested_col->get_data()[_row_block->selection_vector()[0]], 5.1); + delete pred; } diff --git a/be/test/olap/null_predicate_test.cpp b/be/test/olap/null_predicate_test.cpp index bf53ec2..ec870af 100644 --- a/be/test/olap/null_predicate_test.cpp +++ b/be/test/olap/null_predicate_test.cpp @@ -28,6 +28,10 @@ #include "runtime/string_value.hpp" #include "runtime/vectorized_row_batch.h" #include "util/logging.h" +#include "vec/columns/column_nullable.h" +#include "vec/core/block.h" + +using namespace doris::vectorized; namespace doris { @@ -142,6 +146,16 @@ public: pred->evaluate(&col_block, _row_block->selection_vector(), &select_size); \ ASSERT_EQ(select_size, 0); \ \ + /* for vectorized::Block no null */ \ + _row_block->clear(); \ + select_size = _row_block->selected_size(); \ + vectorized::Block vec_block = tablet_schema.create_block(return_columns); \ + _row_block->convert_to_vec_block(&vec_block); \ + ColumnPtr vec_col = vec_block.get_columns()[0]; \ + pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col), \ + _row_block->selection_vector(), &select_size); \ + ASSERT_EQ(select_size, 0); \ + \ /* for has nulls */ \ col_vector->set_no_nulls(false); \ bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size)); \ @@ -173,6 +187,16 @@ public: select_size = _row_block->selected_size(); \ pred->evaluate(&col_block, _row_block->selection_vector(), &select_size); \ ASSERT_EQ(select_size, 5); \ + \ + /* for vectorized::Block has nulls */ \ + _row_block->clear(); \ + select_size = _row_block->selected_size(); \ + vec_block = tablet_schema.create_block(return_columns); \ + _row_block->convert_to_vec_block(&vec_block); \ + vec_col = vec_block.get_columns()[0]; \ + pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col), \ + _row_block->selection_vector(), &select_size); \ + ASSERT_EQ(select_size, 5); \ pred.reset(); \ } @@ -216,6 +240,16 @@ TEST_F(TestNullPredicate, FLOAT_COLUMN) { pred->evaluate(&col_block, _row_block->selection_vector(), &select_size); ASSERT_EQ(select_size, 0); + // for vectorized::Block no null + _row_block->clear(); + select_size = _row_block->selected_size(); + vectorized::Block vec_block = tablet_schema.create_block(return_columns); + _row_block->convert_to_vec_block(&vec_block); + ColumnPtr vec_col = vec_block.get_columns()[0]; + pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col), + _row_block->selection_vector(), &select_size); + ASSERT_EQ(select_size, 0); + // for VectorizedBatch has nulls col_vector->set_no_nulls(false); bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size)); @@ -247,6 +281,16 @@ TEST_F(TestNullPredicate, FLOAT_COLUMN) { select_size = _row_block->selected_size(); pred->evaluate(&col_block, _row_block->selection_vector(), &select_size); ASSERT_EQ(select_size, 5); + + // for vectorized::Block has nulls + _row_block->clear(); + select_size = _row_block->selected_size(); + vec_block = tablet_schema.create_block(return_columns); + _row_block->convert_to_vec_block(&vec_block); + vec_col = vec_block.get_columns()[0]; + pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col), + _row_block->selection_vector(), &select_size); + ASSERT_EQ(select_size, 5); } TEST_F(TestNullPredicate, DOUBLE_COLUMN) { @@ -284,6 +328,16 @@ TEST_F(TestNullPredicate, DOUBLE_COLUMN) { pred->evaluate(&col_block, _row_block->selection_vector(), &select_size); ASSERT_EQ(select_size, 0); + // for vectorized::Block no null + _row_block->clear(); + select_size = _row_block->selected_size(); + vectorized::Block vec_block = tablet_schema.create_block(return_columns); + _row_block->convert_to_vec_block(&vec_block); + ColumnPtr vec_col = vec_block.get_columns()[0]; + pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col), + _row_block->selection_vector(), &select_size); + ASSERT_EQ(select_size, 0); + // for VectorizedBatch has nulls col_vector->set_no_nulls(false); bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size)); @@ -315,6 +369,16 @@ TEST_F(TestNullPredicate, DOUBLE_COLUMN) { select_size = _row_block->selected_size(); pred->evaluate(&col_block, _row_block->selection_vector(), &select_size); ASSERT_EQ(select_size, 5); + + // for vectorized::Block has nulls + _row_block->clear(); + select_size = _row_block->selected_size(); + vec_block = tablet_schema.create_block(return_columns); + _row_block->convert_to_vec_block(&vec_block); + vec_col = vec_block.get_columns()[0]; + pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col), + _row_block->selection_vector(), &select_size); + ASSERT_EQ(select_size, 5); } TEST_F(TestNullPredicate, DECIMAL_COLUMN) { @@ -355,6 +419,16 @@ TEST_F(TestNullPredicate, DECIMAL_COLUMN) { pred->evaluate(&col_block, _row_block->selection_vector(), &select_size); ASSERT_EQ(select_size, 0); + // for vectorized::Block no null + _row_block->clear(); + select_size = _row_block->selected_size(); + vectorized::Block vec_block = tablet_schema.create_block(return_columns); + _row_block->convert_to_vec_block(&vec_block); + ColumnPtr vec_col = vec_block.get_columns()[0]; + pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col), + _row_block->selection_vector(), &select_size); + ASSERT_EQ(select_size, 0); + // for VectorizedBatch has nulls col_vector->set_no_nulls(false); bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size)); @@ -388,6 +462,16 @@ TEST_F(TestNullPredicate, DECIMAL_COLUMN) { select_size = _row_block->selected_size(); pred->evaluate(&col_block, _row_block->selection_vector(), &select_size); ASSERT_EQ(select_size, 4); + + // for vectorized::Block has nulls + _row_block->clear(); + select_size = _row_block->selected_size(); + vec_block = tablet_schema.create_block(return_columns); + _row_block->convert_to_vec_block(&vec_block); + vec_col = vec_block.get_columns()[0]; + pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col), + _row_block->selection_vector(), &select_size); + ASSERT_EQ(select_size, 4); } TEST_F(TestNullPredicate, STRING_COLUMN) { @@ -440,6 +524,16 @@ TEST_F(TestNullPredicate, STRING_COLUMN) { pred->evaluate(&col_block, _row_block->selection_vector(), &select_size); ASSERT_EQ(select_size, 0); + // for vectorized::Block no null + _row_block->clear(); + select_size = _row_block->selected_size(); + vectorized::Block vec_block = tablet_schema.create_block(return_columns); + _row_block->convert_to_vec_block(&vec_block); + ColumnPtr vec_col = vec_block.get_columns()[0]; + pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col), + _row_block->selection_vector(), &select_size); + ASSERT_EQ(select_size, 0); + // for VectorizedBatch has nulls col_vector->set_no_nulls(false); bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size)); @@ -483,6 +577,16 @@ TEST_F(TestNullPredicate, STRING_COLUMN) { select_size = _row_block->selected_size(); pred->evaluate(&col_block, _row_block->selection_vector(), &select_size); ASSERT_EQ(select_size, 4); + + // for vectorized::Block has nulls + _row_block->clear(); + select_size = _row_block->selected_size(); + vec_block = tablet_schema.create_block(return_columns); + _row_block->convert_to_vec_block(&vec_block); + vec_col = vec_block.get_columns()[0]; + pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col), + _row_block->selection_vector(), &select_size); + ASSERT_EQ(select_size, 4); } TEST_F(TestNullPredicate, DATE_COLUMN) { @@ -529,6 +633,16 @@ TEST_F(TestNullPredicate, DATE_COLUMN) { pred->evaluate(&col_block, _row_block->selection_vector(), &select_size); ASSERT_EQ(select_size, 0); + // for vectorized::Block no null + _row_block->clear(); + select_size = _row_block->selected_size(); + vectorized::Block vec_block = tablet_schema.create_block(return_columns); + _row_block->convert_to_vec_block(&vec_block); + ColumnPtr vec_col = vec_block.get_columns()[0]; + pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col), + _row_block->selection_vector(), &select_size); + ASSERT_EQ(select_size, 0); + // for VectorizedBatch has nulls col_vector->set_no_nulls(false); bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size)); @@ -562,6 +676,16 @@ TEST_F(TestNullPredicate, DATE_COLUMN) { select_size = _row_block->selected_size(); pred->evaluate(&col_block, _row_block->selection_vector(), &select_size); ASSERT_EQ(select_size, 2); + + // for vectorized::Block has nulls + _row_block->clear(); + select_size = _row_block->selected_size(); + vec_block = tablet_schema.create_block(return_columns); + _row_block->convert_to_vec_block(&vec_block); + vec_col = vec_block.get_columns()[0]; + pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col), + _row_block->selection_vector(), &select_size); + ASSERT_EQ(select_size, 2); } TEST_F(TestNullPredicate, DATETIME_COLUMN) { @@ -608,6 +732,16 @@ TEST_F(TestNullPredicate, DATETIME_COLUMN) { pred->evaluate(&col_block, _row_block->selection_vector(), &select_size); ASSERT_EQ(select_size, 0); + // for vectorized::Block no null + _row_block->clear(); + select_size = _row_block->selected_size(); + vectorized::Block vec_block = tablet_schema.create_block(return_columns); + _row_block->convert_to_vec_block(&vec_block); + ColumnPtr vec_col = vec_block.get_columns()[0]; + pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col), + _row_block->selection_vector(), &select_size); + ASSERT_EQ(select_size, 0); + // for VectorizedBatch has nulls col_vector->set_no_nulls(false); bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size)); @@ -641,6 +775,16 @@ TEST_F(TestNullPredicate, DATETIME_COLUMN) { select_size = _row_block->selected_size(); pred->evaluate(&col_block, _row_block->selection_vector(), &select_size); ASSERT_EQ(select_size, 2); + + // for vectorized::Block has nulls + _row_block->clear(); + select_size = _row_block->selected_size(); + vec_block = tablet_schema.create_block(return_columns); + _row_block->convert_to_vec_block(&vec_block); + vec_col = vec_block.get_columns()[0]; + pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col), + _row_block->selection_vector(), &select_size); + ASSERT_EQ(select_size, 2); } } // namespace doris --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
