This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch vectorized
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit ae9d0cff0ad6aa59919eaa138307d619fffa9aeb
Author: Zeno Yang <[email protected]>
AuthorDate: Sat Jan 8 01:00:43 2022 +0800

    [Vectorized] Support bloom filter predicate on vectorized engine storage 
layer (#7557)
---
 be/src/olap/bloom_filter_predicate.h               |  40 +++++-
 .../olap/bloom_filter_column_predicate_test.cpp    |  36 ++++++
 be/test/olap/null_predicate_test.cpp               | 144 +++++++++++++++++++++
 3 files changed, 219 insertions(+), 1 deletion(-)

diff --git a/be/src/olap/bloom_filter_predicate.h 
b/be/src/olap/bloom_filter_predicate.h
index b3dcbbb..ff3201c 100644
--- a/be/src/olap/bloom_filter_predicate.h
+++ b/be/src/olap/bloom_filter_predicate.h
@@ -27,6 +27,10 @@
 #include "olap/field.h"
 #include "runtime/string_value.hpp"
 #include "runtime/vectorized_row_batch.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_vector.h"
+#include "vec/columns/predicate_column.h"
+#include "vec/utils/util.hpp"
 
 namespace doris {
 
@@ -59,12 +63,14 @@ public:
         return Status::OK();
     }
 
+    void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) 
const override;
+
 private:
     std::shared_ptr<IBloomFilterFuncBase> _filter;
     SpecificFilter* _specific_filter; // owned by _filter
 };
 
-// blomm filter column predicate do not support in segment v1
+// bloom filter column predicate do not support in segment v1
 template <PrimitiveType type>
 void BloomFilterColumnPredicate<type>::evaluate(VectorizedRowBatch* batch) 
const {
     uint16_t n = batch->size();
@@ -99,6 +105,38 @@ void 
BloomFilterColumnPredicate<type>::evaluate(ColumnBlock* block, uint16_t* se
     *size = new_size;
 }
 
+template <PrimitiveType type>
+void BloomFilterColumnPredicate<type>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,
+                                                uint16_t* size) const {
+    uint16_t new_size = 0;
+    using T = typename PrimitiveTypeTraits<type>::CppType;
+
+    if (column.is_nullable()) {
+        auto* nullable_col = 
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
+        auto& null_map_data = nullable_col->get_null_map_column().get_data();
+        auto* pred_col = 
vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>(
+                nullable_col->get_nested_column());
+        auto& pred_col_data = pred_col->get_data();
+        for (uint16_t i = 0; i < *size; i++) {
+            uint16_t idx = sel[i];
+            sel[new_size] = idx;
+            const auto* cell_value = reinterpret_cast<const 
void*>(&(pred_col_data[idx]));
+            new_size += (!null_map_data[idx]) && 
_specific_filter->find_olap_engine(cell_value);
+        }
+    } else {
+        auto* pred_col =
+                
vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>(column);
+        auto& pred_col_data = pred_col->get_data();
+        for (uint16_t i = 0; i < *size; i++) {
+            uint16_t idx = sel[i];
+            sel[new_size] = idx;
+            const auto* cell_value = reinterpret_cast<const 
void*>(&(pred_col_data[idx]));
+            new_size += _specific_filter->find_olap_engine(cell_value);
+        }
+    }
+    *size = new_size;
+}
+
 class BloomFilterColumnPredicateFactory {
 public:
     static ColumnPredicate* create_column_predicate(
diff --git a/be/test/olap/bloom_filter_column_predicate_test.cpp 
b/be/test/olap/bloom_filter_column_predicate_test.cpp
index 164c51d..24abea1 100644
--- a/be/test/olap/bloom_filter_column_predicate_test.cpp
+++ b/be/test/olap/bloom_filter_column_predicate_test.cpp
@@ -28,6 +28,11 @@
 #include "runtime/string_value.hpp"
 #include "runtime/vectorized_row_batch.h"
 #include "util/logging.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/predicate_column.h"
+#include "vec/core/block.h"
+
+using namespace doris::vectorized;
 
 namespace doris {
 
@@ -172,6 +177,37 @@ TEST_F(TestBloomFilterColumnPredicate, FLOAT_COLUMN) {
     ASSERT_EQ(select_size, 1);
     
ASSERT_FLOAT_EQ(*(float*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(),
 5.1);
 
+    // for vectorized::Block no null
+    auto pred_col = PredicateColumnType<vectorized::Float32>::create();
+    pred_col->reserve(size);
+    for (int i = 0; i < size; ++i) {
+        *(col_data + i) = i + 0.1f;
+        pred_col->insert_data(reinterpret_cast<const char*>(col_data + i), 0);
+    }
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    pred->evaluate(*pred_col, _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 3);
+    
ASSERT_FLOAT_EQ((float)pred_col->get_data()[_row_block->selection_vector()[0]], 
4.1);
+    
ASSERT_FLOAT_EQ((float)pred_col->get_data()[_row_block->selection_vector()[1]], 
5.1);
+    
ASSERT_FLOAT_EQ((float)pred_col->get_data()[_row_block->selection_vector()[2]], 
6.1);
+
+    // for vectorized::Block has nulls
+    auto null_map = ColumnUInt8::create(size, 0);
+    auto& null_map_data = null_map->get_data();
+    for (int i = 0; i < size; ++i) {
+        null_map_data[i] = (i % 2 == 0);
+    }
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    auto nullable_col =
+            vectorized::ColumnNullable::create(std::move(pred_col), 
std::move(null_map));
+    pred->evaluate(*nullable_col, _row_block->selection_vector(), 
&select_size);
+    ASSERT_EQ(select_size, 1);
+    auto nested_col = 
check_and_get_column<PredicateColumnType<vectorized::Float32>>(
+            nullable_col->get_nested_column());
+    
ASSERT_FLOAT_EQ((float)nested_col->get_data()[_row_block->selection_vector()[0]],
 5.1);
+
     delete pred;
 }
 
diff --git a/be/test/olap/null_predicate_test.cpp 
b/be/test/olap/null_predicate_test.cpp
index bf53ec2..ec870af 100644
--- a/be/test/olap/null_predicate_test.cpp
+++ b/be/test/olap/null_predicate_test.cpp
@@ -28,6 +28,10 @@
 #include "runtime/string_value.hpp"
 #include "runtime/vectorized_row_batch.h"
 #include "util/logging.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/core/block.h"
+
+using namespace doris::vectorized;
 
 namespace doris {
 
@@ -142,6 +146,16 @@ public:
         pred->evaluate(&col_block, _row_block->selection_vector(), 
&select_size);                \
         ASSERT_EQ(select_size, 0);                                             
                  \
                                                                                
                  \
+        /* for vectorized::Block no null */                                    
                  \
+        _row_block->clear();                                                   
                  \
+        select_size = _row_block->selected_size();                             
                  \
+        vectorized::Block vec_block = 
tablet_schema.create_block(return_columns);                \
+        _row_block->convert_to_vec_block(&vec_block);                          
                  \
+        ColumnPtr vec_col = vec_block.get_columns()[0];                        
                  \
+        pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),      
                  \
+                       _row_block->selection_vector(), &select_size);          
                  \
+        ASSERT_EQ(select_size, 0);                                             
                  \
+                                                                               
                  \
         /* for has nulls */                                                    
                  \
         col_vector->set_no_nulls(false);                                       
                  \
         bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));    
                  \
@@ -173,6 +187,16 @@ public:
         select_size = _row_block->selected_size();                             
                  \
         pred->evaluate(&col_block, _row_block->selection_vector(), 
&select_size);                \
         ASSERT_EQ(select_size, 5);                                             
                  \
+                                                                               
                  \
+        /* for vectorized::Block has nulls */                                  
                  \
+        _row_block->clear();                                                   
                  \
+        select_size = _row_block->selected_size();                             
                  \
+        vec_block = tablet_schema.create_block(return_columns);                
                  \
+        _row_block->convert_to_vec_block(&vec_block);                          
                  \
+        vec_col = vec_block.get_columns()[0];                                  
                  \
+        pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),      
                  \
+                       _row_block->selection_vector(), &select_size);          
                  \
+        ASSERT_EQ(select_size, 5);                                             
                  \
         pred.reset();                                                          
                  \
     }
 
@@ -216,6 +240,16 @@ TEST_F(TestNullPredicate, FLOAT_COLUMN) {
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 0);
 
+    // for vectorized::Block no null
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    ColumnPtr vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 0);
+
     // for VectorizedBatch has nulls
     col_vector->set_no_nulls(false);
     bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -247,6 +281,16 @@ TEST_F(TestNullPredicate, FLOAT_COLUMN) {
     select_size = _row_block->selected_size();
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 5);
+
+    // for vectorized::Block has nulls
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 5);
 }
 
 TEST_F(TestNullPredicate, DOUBLE_COLUMN) {
@@ -284,6 +328,16 @@ TEST_F(TestNullPredicate, DOUBLE_COLUMN) {
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 0);
 
+    // for vectorized::Block no null
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    ColumnPtr vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 0);
+
     // for VectorizedBatch has nulls
     col_vector->set_no_nulls(false);
     bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -315,6 +369,16 @@ TEST_F(TestNullPredicate, DOUBLE_COLUMN) {
     select_size = _row_block->selected_size();
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 5);
+
+    // for vectorized::Block has nulls
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 5);
 }
 
 TEST_F(TestNullPredicate, DECIMAL_COLUMN) {
@@ -355,6 +419,16 @@ TEST_F(TestNullPredicate, DECIMAL_COLUMN) {
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 0);
 
+    // for vectorized::Block no null
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    ColumnPtr vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 0);
+
     // for VectorizedBatch has nulls
     col_vector->set_no_nulls(false);
     bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -388,6 +462,16 @@ TEST_F(TestNullPredicate, DECIMAL_COLUMN) {
     select_size = _row_block->selected_size();
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 4);
+
+    // for vectorized::Block has nulls
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 4);
 }
 
 TEST_F(TestNullPredicate, STRING_COLUMN) {
@@ -440,6 +524,16 @@ TEST_F(TestNullPredicate, STRING_COLUMN) {
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 0);
 
+    // for vectorized::Block no null
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    ColumnPtr vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 0);
+
     // for VectorizedBatch has nulls
     col_vector->set_no_nulls(false);
     bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -483,6 +577,16 @@ TEST_F(TestNullPredicate, STRING_COLUMN) {
     select_size = _row_block->selected_size();
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 4);
+
+    // for vectorized::Block has nulls
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 4);
 }
 
 TEST_F(TestNullPredicate, DATE_COLUMN) {
@@ -529,6 +633,16 @@ TEST_F(TestNullPredicate, DATE_COLUMN) {
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 0);
 
+    // for vectorized::Block no null
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    ColumnPtr vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 0);
+
     // for VectorizedBatch has nulls
     col_vector->set_no_nulls(false);
     bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -562,6 +676,16 @@ TEST_F(TestNullPredicate, DATE_COLUMN) {
     select_size = _row_block->selected_size();
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 2);
+
+    // for vectorized::Block has nulls
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 2);
 }
 
 TEST_F(TestNullPredicate, DATETIME_COLUMN) {
@@ -608,6 +732,16 @@ TEST_F(TestNullPredicate, DATETIME_COLUMN) {
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 0);
 
+    // for vectorized::Block no null
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    ColumnPtr vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 0);
+
     // for VectorizedBatch has nulls
     col_vector->set_no_nulls(false);
     bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -641,6 +775,16 @@ TEST_F(TestNullPredicate, DATETIME_COLUMN) {
     select_size = _row_block->selected_size();
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 2);
+
+    // for vectorized::Block has nulls
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 2);
 }
 
 } // namespace doris

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to