github-actions[bot] commented on code in PR #26851:
URL: https://github.com/apache/doris/pull/26851#discussion_r1390558784
##########
be/src/exprs/create_predicate_function.h:
##########
@@ -225,6 +225,16 @@ inline auto create_bitmap_filter(PrimitiveType type) {
return create_bitmap_predicate_function<BitmapFilterTraits>(type);
}
+template <PrimitiveType PT>
+ColumnPredicate* create_olap_column_predicate(uint32_t column_id,
+ const
std::shared_ptr<BloomFilterFuncBase>& filter,
+ int be_exec_version, const
TabletColumn*) {
Review Comment:
warning: all parameters should be named in a function
[readability-named-parameter]
```suggestion
int be_exec_version, const
TabletColumn* /*unused*/) {
```
##########
be/src/olap/bloom_filter_predicate.h:
##########
@@ -0,0 +1,197 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exprs/bloom_filter_func.h"
+#include "exprs/runtime_filter.h"
+#include "olap/column_predicate.h"
+#include "runtime/primitive_type.h"
+#include "vec/columns/column_dictionary.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_vector.h"
+#include "vec/columns/predicate_column.h"
+#include "vec/exprs/vruntimefilter_wrapper.h"
+
+namespace doris {
+
+// only use in runtime filter and segment v2
+
+template <PrimitiveType T>
+class BloomFilterColumnPredicate : public ColumnPredicate {
+public:
+ using SpecificFilter = BloomFilterFunc<T>;
+
+ BloomFilterColumnPredicate(uint32_t column_id,
+ const std::shared_ptr<BloomFilterFuncBase>&
filter,
+ int be_exec_version)
+ : ColumnPredicate(column_id),
+ _filter(filter),
+
_specific_filter(reinterpret_cast<SpecificFilter*>(_filter.get())),
+ _be_exec_version(be_exec_version) {}
+ ~BloomFilterColumnPredicate() override = default;
+
+ PredicateType type() const override { return PredicateType::BF; }
+
+ Status evaluate(BitmapIndexIterator* iterators, uint32_t num_rows,
+ roaring::Roaring* roaring) const override {
+ return Status::OK();
+ }
+
+ uint16_t evaluate(const vectorized::IColumn& column, uint16_t* sel,
+ uint16_t size) const override;
+
+private:
+ template <bool is_nullable>
+ uint16_t evaluate(const vectorized::IColumn& column, const uint8_t*
null_map, uint16_t* sel,
+ uint16_t size) const {
+ if constexpr (is_nullable) {
+ DCHECK(null_map);
+ }
+
+ uint24_t tmp_uint24_value;
+ auto get_cell_value = [&tmp_uint24_value](auto& data) {
+ if constexpr (std::is_same_v<std::decay_t<decltype(data)>,
uint32_t> &&
+ T == PrimitiveType::TYPE_DATE) {
+ memcpy((char*)(&tmp_uint24_value), (char*)(&data),
sizeof(uint24_t));
+ return (const char*)&tmp_uint24_value;
+ } else {
+ return (const char*)&data;
+ }
+ };
+
+ uint16_t new_size = 0;
+ if (column.is_column_dictionary()) {
+ auto* dict_col = reinterpret_cast<const
vectorized::ColumnDictI32*>(&column);
+ if (_be_exec_version >= 2) {
+ for (uint16_t i = 0; i < size; i++) {
+ uint16_t idx = sel[i];
+ sel[new_size] = idx;
+ if constexpr (is_nullable) {
+ new_size += !null_map[idx] &&
_specific_filter->find_uint32_t(
+
dict_col->get_crc32_hash_value(idx));
+ } else {
+ new_size += _specific_filter->find_uint32_t(
+ dict_col->get_crc32_hash_value(idx));
+ }
+ }
+ } else {
+ for (uint16_t i = 0; i < size; i++) {
+ uint16_t idx = sel[i];
+ sel[new_size] = idx;
+ if constexpr (is_nullable) {
+ new_size += !null_map[idx] &&
+
_specific_filter->find_uint32_t(dict_col->get_hash_value(idx));
+ } else {
+ new_size +=
_specific_filter->find_uint32_t(dict_col->get_hash_value(idx));
+ }
+ }
+ }
+ } else if (is_string_type(T) && _be_exec_version >= 2) {
+ auto& pred_col =
+ reinterpret_cast<
+ const
vectorized::PredicateColumnType<PredicateEvaluateType<T>>*>(
+ &column)
+ ->get_data();
+
+ auto pred_col_data = pred_col.data();
+ const bool is_dense_column = pred_col.size() == size;
+ for (uint16_t i = 0; i < size; i++) {
+ uint16_t idx = is_dense_column ? i : sel[i];
+ if constexpr (is_nullable) {
+ if (!null_map[idx] &&
+
_specific_filter->find_crc32_hash(get_cell_value(pred_col_data[idx]))) {
+ sel[new_size++] = idx;
+ }
+ } else {
+ if
(_specific_filter->find_crc32_hash(get_cell_value(pred_col_data[idx]))) {
+ sel[new_size++] = idx;
+ }
+ }
+ }
+ } else if (IRuntimeFilter::enable_use_batch(_be_exec_version > 0, T)) {
+ const auto& data =
+ reinterpret_cast<
+ const
vectorized::PredicateColumnType<PredicateEvaluateType<T>>*>(
+ &column)
+ ->get_data();
+ new_size =
_specific_filter->find_fixed_len_olap_engine((char*)data.data(), null_map,
+ sel, size,
data.size() != size);
+ } else {
+ auto& pred_col =
+ reinterpret_cast<
+ const
vectorized::PredicateColumnType<PredicateEvaluateType<T>>*>(
+ &column)
+ ->get_data();
+
+ auto pred_col_data = pred_col.data();
+#define EVALUATE_WITH_NULL_IMPL(IDX) \
+ !null_map[IDX] &&
_specific_filter->find_olap_engine(get_cell_value(pred_col_data[IDX]))
+#define EVALUATE_WITHOUT_NULL_IMPL(IDX) \
+ _specific_filter->find_olap_engine(get_cell_value(pred_col_data[IDX]))
+ EVALUATE_BY_SELECTOR(EVALUATE_WITH_NULL_IMPL,
EVALUATE_WITHOUT_NULL_IMPL)
+#undef EVALUATE_WITH_NULL_IMPL
+#undef EVALUATE_WITHOUT_NULL_IMPL
+ }
+ return new_size;
+ }
+
+ std::string _debug_string() const override {
+ std::string info = "BloomFilterColumnPredicate(" + type_to_string(T) +
")";
+ return info;
+ }
+
+ int get_filter_id() const override {
+ int filter_id = _filter->get_filter_id();
+ DCHECK(filter_id != -1);
+ return filter_id;
+ }
+ bool is_filter() const override { return true; }
+
+ std::shared_ptr<BloomFilterFuncBase> _filter;
+ SpecificFilter* _specific_filter; // owned by _filter
+ mutable bool _always_true = false;
+ mutable bool _has_calculate_filter = false;
+ int _be_exec_version;
+};
+
+template <PrimitiveType T>
+uint16_t BloomFilterColumnPredicate<T>::evaluate(const vectorized::IColumn&
column, uint16_t* sel,
+ uint16_t size) const {
+ uint16_t new_size = 0;
+ if (_always_true) {
+ return size;
+ }
+ if (column.is_nullable()) {
+ auto* nullable_col = reinterpret_cast<const
vectorized::ColumnNullable*>(&column);
Review Comment:
warning: 'auto *nullable_col' can be declared as 'const auto *nullable_col'
[readability-qualified-auto]
```suggestion
const auto* nullable_col = reinterpret_cast<const
vectorized::ColumnNullable*>(&column);
```
##########
be/src/olap/reader.cpp:
##########
@@ -562,6 +567,17 @@ void
TabletReader::_init_conditions_param_except_leafnode_of_andnode(
}
}
+ColumnPredicate* TabletReader::_parse_to_predicate(
Review Comment:
warning: method '_parse_to_predicate' can be made static
[readability-convert-member-functions-to-static]
```suggestion
static ColumnPredicate* TabletReader::_parse_to_predicate(
```
##########
be/src/olap/reader.cpp:
##########
@@ -562,6 +567,17 @@
}
}
+ColumnPredicate* TabletReader::_parse_to_predicate(
+ const std::pair<std::string, std::shared_ptr<BloomFilterFuncBase>>&
bloom_filter) {
Review Comment:
warning: all parameters should be named in a function
[readability-named-parameter]
```suggestion
const std::pair<std::string, std::shared_ptr<BloomFilterFuncBase>
/*unused*/>& bloom_filter) {
```
##########
be/src/olap/reader.h:
##########
@@ -240,6 +240,9 @@ class TabletReader {
void _init_conditions_param_except_leafnode_of_andnode(const ReaderParams&
read_params);
+ ColumnPredicate* _parse_to_predicate(
+ const std::pair<std::string,
std::shared_ptr<BloomFilterFuncBase>>& bloom_filter);
Review Comment:
warning: parameter 1 is const-qualified in the function declaration;
const-qualification of parameters only has an effect in function definitions
[readability-avoid-const-params-in-decls]
```suggestion
std::pair<std::string, std::shared_ptr<BloomFilterFuncBase>>&
bloom_filter);
```
##########
be/src/olap/bloom_filter_predicate.h:
##########
@@ -0,0 +1,197 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exprs/bloom_filter_func.h"
+#include "exprs/runtime_filter.h"
+#include "olap/column_predicate.h"
+#include "runtime/primitive_type.h"
+#include "vec/columns/column_dictionary.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_vector.h"
+#include "vec/columns/predicate_column.h"
+#include "vec/exprs/vruntimefilter_wrapper.h"
+
+namespace doris {
+
+// only use in runtime filter and segment v2
+
+template <PrimitiveType T>
+class BloomFilterColumnPredicate : public ColumnPredicate {
+public:
+ using SpecificFilter = BloomFilterFunc<T>;
+
+ BloomFilterColumnPredicate(uint32_t column_id,
+ const std::shared_ptr<BloomFilterFuncBase>&
filter,
+ int be_exec_version)
+ : ColumnPredicate(column_id),
+ _filter(filter),
+
_specific_filter(reinterpret_cast<SpecificFilter*>(_filter.get())),
+ _be_exec_version(be_exec_version) {}
+ ~BloomFilterColumnPredicate() override = default;
+
+ PredicateType type() const override { return PredicateType::BF; }
+
+ Status evaluate(BitmapIndexIterator* iterators, uint32_t num_rows,
+ roaring::Roaring* roaring) const override {
+ return Status::OK();
+ }
+
+ uint16_t evaluate(const vectorized::IColumn& column, uint16_t* sel,
+ uint16_t size) const override;
+
+private:
+ template <bool is_nullable>
+ uint16_t evaluate(const vectorized::IColumn& column, const uint8_t*
null_map, uint16_t* sel,
+ uint16_t size) const {
+ if constexpr (is_nullable) {
+ DCHECK(null_map);
+ }
+
+ uint24_t tmp_uint24_value;
+ auto get_cell_value = [&tmp_uint24_value](auto& data) {
+ if constexpr (std::is_same_v<std::decay_t<decltype(data)>,
uint32_t> &&
+ T == PrimitiveType::TYPE_DATE) {
+ memcpy((char*)(&tmp_uint24_value), (char*)(&data),
sizeof(uint24_t));
+ return (const char*)&tmp_uint24_value;
+ } else {
+ return (const char*)&data;
+ }
+ };
+
+ uint16_t new_size = 0;
+ if (column.is_column_dictionary()) {
+ auto* dict_col = reinterpret_cast<const
vectorized::ColumnDictI32*>(&column);
Review Comment:
warning: 'auto *dict_col' can be declared as 'const auto *dict_col'
[readability-qualified-auto]
```suggestion
const auto* dict_col = reinterpret_cast<const
vectorized::ColumnDictI32*>(&column);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]