This is an automated email from the ASF dual-hosted git repository.
changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new edf8f93075 [GLUTEN-8434][CH] Function bloomFilterContains process
improvement (#8435)
edf8f93075 is described below
commit edf8f93075dd5f0e457f7a69ed6ee0d1b5192be8
Author: zhanglistar <[email protected]>
AuthorDate: Thu Feb 20 11:08:33 2025 +0800
[GLUTEN-8434][CH] Function bloomFilterContains process improvement (#8435)
---
.../local-engine/Functions/FunctionsBloomFilter.h | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/cpp-ch/local-engine/Functions/FunctionsBloomFilter.h
b/cpp-ch/local-engine/Functions/FunctionsBloomFilter.h
index 2546030db5..6a6cdf27e7 100644
--- a/cpp-ch/local-engine/Functions/FunctionsBloomFilter.h
+++ b/cpp-ch/local-engine/Functions/FunctionsBloomFilter.h
@@ -17,6 +17,7 @@
#pragma once
#include <cstddef>
+#include <cstring>
#include <memory>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionGroupBloomFilter.h>
@@ -118,7 +119,7 @@ private:
mutable DB::AggregateFunctionPtr agg_func;
template <typename T>
- typename std::enable_if<std::is_same_v<T, Int64> || std::is_same_v<T,
UInt64>, void>::type internalExecute(
+ std::enable_if_t<std::is_same_v<T, Int64> || std::is_same_v<T, UInt64>,
void> internalExecute(
const DB::ColumnsWithTypeAndName & arguments,
size_t input_rows_count,
typename DB::ColumnVector<UInt8>::Container & vec_to,
@@ -129,16 +130,21 @@ private:
const auto * column_ptr = arguments[1].column.get();
auto second_arg_const = isColumnConst(*column_ptr);
+ AggregateFunctionGroupBloomFilterData & bloom_filter_data_0
+ = *reinterpret_cast<AggregateFunctionGroupBloomFilterData
*>(bloom_filter_state);
if (second_arg_const)
- container_of_int = &typeid_cast<const ColumnType
&>(typeid_cast<const DB::ColumnConst &>(*column_ptr).getDataColumn()).getData();
- else
- container_of_int = &typeid_cast<const ColumnType
&>(*column_ptr).getData();
+ {
+ vec_to[0] =
bloom_filter_data_0.bloom_filter.find(typeid_cast<const DB::ColumnConst
&>(*column_ptr).getDataAt(0).data, sizeof(T));
+ // copy to all rows, better use constant column
+ std::memcpy(&vec_to[1], &vec_to[0], (input_rows_count - 1) *
sizeof(UInt8));
+ return;
+ }
+
+ container_of_int = &typeid_cast<const ColumnType
&>(*column_ptr).getData();
for (size_t i = 0; i < input_rows_count; ++i)
{
- const T v = second_arg_const ? (*container_of_int)[0] :
(*container_of_int)[i];
- AggregateFunctionGroupBloomFilterData & bloom_filter_data_0
- = *reinterpret_cast<AggregateFunctionGroupBloomFilterData
*>(bloom_filter_state);
+ const T v = (*container_of_int)[i];
vec_to[i] =
bloom_filter_data_0.bloom_filter.find(reinterpret_cast<const char *>(&v),
sizeof(T));
}
}
@@ -195,7 +201,6 @@ private:
arguments[0].type->getName());
}
-
const DB::IColumn * second_column_ptr = arguments[1].column.get();
if (isColumnNullable(*second_column_ptr))
second_column_ptr = &typeid_cast<const DB::ColumnNullable
&>(*second_column_ptr).getNestedColumn();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]