This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch vectorized
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/vectorized by this push:
new f8bf00f [Vectorized] Support bloom filter predicate on vectorized
engine storage layer (#7557)
f8bf00f is described below
commit f8bf00f25a59b97be2b5d4fa465c60ced05cbf3b
Author: Zeno Yang <[email protected]>
AuthorDate: Sat Jan 8 01:00:43 2022 +0800
[Vectorized] Support bloom filter predicate on vectorized engine storage
layer (#7557)
---
be/src/olap/bloom_filter_predicate.h | 40 +++++-
.../olap/bloom_filter_column_predicate_test.cpp | 36 ++++++
be/test/olap/null_predicate_test.cpp | 144 +++++++++++++++++++++
3 files changed, 219 insertions(+), 1 deletion(-)
diff --git a/be/src/olap/bloom_filter_predicate.h
b/be/src/olap/bloom_filter_predicate.h
index b3dcbbb..ff3201c 100644
--- a/be/src/olap/bloom_filter_predicate.h
+++ b/be/src/olap/bloom_filter_predicate.h
@@ -27,6 +27,10 @@
#include "olap/field.h"
#include "runtime/string_value.hpp"
#include "runtime/vectorized_row_batch.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_vector.h"
+#include "vec/columns/predicate_column.h"
+#include "vec/utils/util.hpp"
namespace doris {
@@ -59,12 +63,14 @@ public:
return Status::OK();
}
+ void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size)
const override;
+
private:
std::shared_ptr<IBloomFilterFuncBase> _filter;
SpecificFilter* _specific_filter; // owned by _filter
};
-// blomm filter column predicate do not support in segment v1
+// bloom filter column predicate do not support in segment v1
template <PrimitiveType type>
void BloomFilterColumnPredicate<type>::evaluate(VectorizedRowBatch* batch)
const {
uint16_t n = batch->size();
@@ -99,6 +105,38 @@ void
BloomFilterColumnPredicate<type>::evaluate(ColumnBlock* block, uint16_t* se
*size = new_size;
}
+template <PrimitiveType type>
+void BloomFilterColumnPredicate<type>::evaluate(vectorized::IColumn& column,
uint16_t* sel,
+ uint16_t* size) const {
+ uint16_t new_size = 0;
+ using T = typename PrimitiveTypeTraits<type>::CppType;
+
+ if (column.is_nullable()) {
+ auto* nullable_col =
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
+ auto& null_map_data = nullable_col->get_null_map_column().get_data();
+ auto* pred_col =
vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>(
+ nullable_col->get_nested_column());
+ auto& pred_col_data = pred_col->get_data();
+ for (uint16_t i = 0; i < *size; i++) {
+ uint16_t idx = sel[i];
+ sel[new_size] = idx;
+ const auto* cell_value = reinterpret_cast<const
void*>(&(pred_col_data[idx]));
+ new_size += (!null_map_data[idx]) &&
_specific_filter->find_olap_engine(cell_value);
+ }
+ } else {
+ auto* pred_col =
+
vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>(column);
+ auto& pred_col_data = pred_col->get_data();
+ for (uint16_t i = 0; i < *size; i++) {
+ uint16_t idx = sel[i];
+ sel[new_size] = idx;
+ const auto* cell_value = reinterpret_cast<const
void*>(&(pred_col_data[idx]));
+ new_size += _specific_filter->find_olap_engine(cell_value);
+ }
+ }
+ *size = new_size;
+}
+
class BloomFilterColumnPredicateFactory {
public:
static ColumnPredicate* create_column_predicate(
diff --git a/be/test/olap/bloom_filter_column_predicate_test.cpp
b/be/test/olap/bloom_filter_column_predicate_test.cpp
index 164c51d..24abea1 100644
--- a/be/test/olap/bloom_filter_column_predicate_test.cpp
+++ b/be/test/olap/bloom_filter_column_predicate_test.cpp
@@ -28,6 +28,11 @@
#include "runtime/string_value.hpp"
#include "runtime/vectorized_row_batch.h"
#include "util/logging.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/predicate_column.h"
+#include "vec/core/block.h"
+
+using namespace doris::vectorized;
namespace doris {
@@ -172,6 +177,37 @@ TEST_F(TestBloomFilterColumnPredicate, FLOAT_COLUMN) {
ASSERT_EQ(select_size, 1);
ASSERT_FLOAT_EQ(*(float*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(),
5.1);
+ // for vectorized::Block no null
+ auto pred_col = PredicateColumnType<vectorized::Float32>::create();
+ pred_col->reserve(size);
+ for (int i = 0; i < size; ++i) {
+ *(col_data + i) = i + 0.1f;
+ pred_col->insert_data(reinterpret_cast<const char*>(col_data + i), 0);
+ }
+ _row_block->clear();
+ select_size = _row_block->selected_size();
+ pred->evaluate(*pred_col, _row_block->selection_vector(), &select_size);
+ ASSERT_EQ(select_size, 3);
+
ASSERT_FLOAT_EQ((float)pred_col->get_data()[_row_block->selection_vector()[0]],
4.1);
+
ASSERT_FLOAT_EQ((float)pred_col->get_data()[_row_block->selection_vector()[1]],
5.1);
+
ASSERT_FLOAT_EQ((float)pred_col->get_data()[_row_block->selection_vector()[2]],
6.1);
+
+ // for vectorized::Block has nulls
+ auto null_map = ColumnUInt8::create(size, 0);
+ auto& null_map_data = null_map->get_data();
+ for (int i = 0; i < size; ++i) {
+ null_map_data[i] = (i % 2 == 0);
+ }
+ _row_block->clear();
+ select_size = _row_block->selected_size();
+ auto nullable_col =
+ vectorized::ColumnNullable::create(std::move(pred_col),
std::move(null_map));
+ pred->evaluate(*nullable_col, _row_block->selection_vector(),
&select_size);
+ ASSERT_EQ(select_size, 1);
+ auto nested_col =
check_and_get_column<PredicateColumnType<vectorized::Float32>>(
+ nullable_col->get_nested_column());
+
ASSERT_FLOAT_EQ((float)nested_col->get_data()[_row_block->selection_vector()[0]],
5.1);
+
delete pred;
}
diff --git a/be/test/olap/null_predicate_test.cpp
b/be/test/olap/null_predicate_test.cpp
index bf53ec2..ec870af 100644
--- a/be/test/olap/null_predicate_test.cpp
+++ b/be/test/olap/null_predicate_test.cpp
@@ -28,6 +28,10 @@
#include "runtime/string_value.hpp"
#include "runtime/vectorized_row_batch.h"
#include "util/logging.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/core/block.h"
+
+using namespace doris::vectorized;
namespace doris {
@@ -142,6 +146,16 @@ public:
pred->evaluate(&col_block, _row_block->selection_vector(),
&select_size); \
ASSERT_EQ(select_size, 0);
\
\
+ /* for vectorized::Block no null */
\
+ _row_block->clear();
\
+ select_size = _row_block->selected_size();
\
+ vectorized::Block vec_block =
tablet_schema.create_block(return_columns); \
+ _row_block->convert_to_vec_block(&vec_block);
\
+ ColumnPtr vec_col = vec_block.get_columns()[0];
\
+ pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
\
+ _row_block->selection_vector(), &select_size);
\
+ ASSERT_EQ(select_size, 0);
\
+
\
/* for has nulls */
\
col_vector->set_no_nulls(false);
\
bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
\
@@ -173,6 +187,16 @@ public:
select_size = _row_block->selected_size();
\
pred->evaluate(&col_block, _row_block->selection_vector(),
&select_size); \
ASSERT_EQ(select_size, 5);
\
+
\
+ /* for vectorized::Block has nulls */
\
+ _row_block->clear();
\
+ select_size = _row_block->selected_size();
\
+ vec_block = tablet_schema.create_block(return_columns);
\
+ _row_block->convert_to_vec_block(&vec_block);
\
+ vec_col = vec_block.get_columns()[0];
\
+ pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
\
+ _row_block->selection_vector(), &select_size);
\
+ ASSERT_EQ(select_size, 5);
\
pred.reset();
\
}
@@ -216,6 +240,16 @@ TEST_F(TestNullPredicate, FLOAT_COLUMN) {
pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
ASSERT_EQ(select_size, 0);
+ // for vectorized::Block no null
+ _row_block->clear();
+ select_size = _row_block->selected_size();
+ vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+ _row_block->convert_to_vec_block(&vec_block);
+ ColumnPtr vec_col = vec_block.get_columns()[0];
+ pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+ _row_block->selection_vector(), &select_size);
+ ASSERT_EQ(select_size, 0);
+
// for VectorizedBatch has nulls
col_vector->set_no_nulls(false);
bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -247,6 +281,16 @@ TEST_F(TestNullPredicate, FLOAT_COLUMN) {
select_size = _row_block->selected_size();
pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
ASSERT_EQ(select_size, 5);
+
+ // for vectorized::Block has nulls
+ _row_block->clear();
+ select_size = _row_block->selected_size();
+ vec_block = tablet_schema.create_block(return_columns);
+ _row_block->convert_to_vec_block(&vec_block);
+ vec_col = vec_block.get_columns()[0];
+ pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+ _row_block->selection_vector(), &select_size);
+ ASSERT_EQ(select_size, 5);
}
TEST_F(TestNullPredicate, DOUBLE_COLUMN) {
@@ -284,6 +328,16 @@ TEST_F(TestNullPredicate, DOUBLE_COLUMN) {
pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
ASSERT_EQ(select_size, 0);
+ // for vectorized::Block no null
+ _row_block->clear();
+ select_size = _row_block->selected_size();
+ vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+ _row_block->convert_to_vec_block(&vec_block);
+ ColumnPtr vec_col = vec_block.get_columns()[0];
+ pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+ _row_block->selection_vector(), &select_size);
+ ASSERT_EQ(select_size, 0);
+
// for VectorizedBatch has nulls
col_vector->set_no_nulls(false);
bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -315,6 +369,16 @@ TEST_F(TestNullPredicate, DOUBLE_COLUMN) {
select_size = _row_block->selected_size();
pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
ASSERT_EQ(select_size, 5);
+
+ // for vectorized::Block has nulls
+ _row_block->clear();
+ select_size = _row_block->selected_size();
+ vec_block = tablet_schema.create_block(return_columns);
+ _row_block->convert_to_vec_block(&vec_block);
+ vec_col = vec_block.get_columns()[0];
+ pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+ _row_block->selection_vector(), &select_size);
+ ASSERT_EQ(select_size, 5);
}
TEST_F(TestNullPredicate, DECIMAL_COLUMN) {
@@ -355,6 +419,16 @@ TEST_F(TestNullPredicate, DECIMAL_COLUMN) {
pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
ASSERT_EQ(select_size, 0);
+ // for vectorized::Block no null
+ _row_block->clear();
+ select_size = _row_block->selected_size();
+ vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+ _row_block->convert_to_vec_block(&vec_block);
+ ColumnPtr vec_col = vec_block.get_columns()[0];
+ pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+ _row_block->selection_vector(), &select_size);
+ ASSERT_EQ(select_size, 0);
+
// for VectorizedBatch has nulls
col_vector->set_no_nulls(false);
bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -388,6 +462,16 @@ TEST_F(TestNullPredicate, DECIMAL_COLUMN) {
select_size = _row_block->selected_size();
pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
ASSERT_EQ(select_size, 4);
+
+ // for vectorized::Block has nulls
+ _row_block->clear();
+ select_size = _row_block->selected_size();
+ vec_block = tablet_schema.create_block(return_columns);
+ _row_block->convert_to_vec_block(&vec_block);
+ vec_col = vec_block.get_columns()[0];
+ pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+ _row_block->selection_vector(), &select_size);
+ ASSERT_EQ(select_size, 4);
}
TEST_F(TestNullPredicate, STRING_COLUMN) {
@@ -440,6 +524,16 @@ TEST_F(TestNullPredicate, STRING_COLUMN) {
pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
ASSERT_EQ(select_size, 0);
+ // for vectorized::Block no null
+ _row_block->clear();
+ select_size = _row_block->selected_size();
+ vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+ _row_block->convert_to_vec_block(&vec_block);
+ ColumnPtr vec_col = vec_block.get_columns()[0];
+ pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+ _row_block->selection_vector(), &select_size);
+ ASSERT_EQ(select_size, 0);
+
// for VectorizedBatch has nulls
col_vector->set_no_nulls(false);
bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -483,6 +577,16 @@ TEST_F(TestNullPredicate, STRING_COLUMN) {
select_size = _row_block->selected_size();
pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
ASSERT_EQ(select_size, 4);
+
+ // for vectorized::Block has nulls
+ _row_block->clear();
+ select_size = _row_block->selected_size();
+ vec_block = tablet_schema.create_block(return_columns);
+ _row_block->convert_to_vec_block(&vec_block);
+ vec_col = vec_block.get_columns()[0];
+ pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+ _row_block->selection_vector(), &select_size);
+ ASSERT_EQ(select_size, 4);
}
TEST_F(TestNullPredicate, DATE_COLUMN) {
@@ -529,6 +633,16 @@ TEST_F(TestNullPredicate, DATE_COLUMN) {
pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
ASSERT_EQ(select_size, 0);
+ // for vectorized::Block no null
+ _row_block->clear();
+ select_size = _row_block->selected_size();
+ vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+ _row_block->convert_to_vec_block(&vec_block);
+ ColumnPtr vec_col = vec_block.get_columns()[0];
+ pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+ _row_block->selection_vector(), &select_size);
+ ASSERT_EQ(select_size, 0);
+
// for VectorizedBatch has nulls
col_vector->set_no_nulls(false);
bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -562,6 +676,16 @@ TEST_F(TestNullPredicate, DATE_COLUMN) {
select_size = _row_block->selected_size();
pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
ASSERT_EQ(select_size, 2);
+
+ // for vectorized::Block has nulls
+ _row_block->clear();
+ select_size = _row_block->selected_size();
+ vec_block = tablet_schema.create_block(return_columns);
+ _row_block->convert_to_vec_block(&vec_block);
+ vec_col = vec_block.get_columns()[0];
+ pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+ _row_block->selection_vector(), &select_size);
+ ASSERT_EQ(select_size, 2);
}
TEST_F(TestNullPredicate, DATETIME_COLUMN) {
@@ -608,6 +732,16 @@ TEST_F(TestNullPredicate, DATETIME_COLUMN) {
pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
ASSERT_EQ(select_size, 0);
+ // for vectorized::Block no null
+ _row_block->clear();
+ select_size = _row_block->selected_size();
+ vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+ _row_block->convert_to_vec_block(&vec_block);
+ ColumnPtr vec_col = vec_block.get_columns()[0];
+ pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+ _row_block->selection_vector(), &select_size);
+ ASSERT_EQ(select_size, 0);
+
// for VectorizedBatch has nulls
col_vector->set_no_nulls(false);
bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -641,6 +775,16 @@ TEST_F(TestNullPredicate, DATETIME_COLUMN) {
select_size = _row_block->selected_size();
pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
ASSERT_EQ(select_size, 2);
+
+ // for vectorized::Block has nulls
+ _row_block->clear();
+ select_size = _row_block->selected_size();
+ vec_block = tablet_schema.create_block(return_columns);
+ _row_block->convert_to_vec_block(&vec_block);
+ vec_col = vec_block.get_columns()[0];
+ pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+ _row_block->selection_vector(), &select_size);
+ ASSERT_EQ(select_size, 2);
}
} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]