This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new edf8f93075 [GLUTEN-8434][CH] Function bloomFilterContains process 
improvement (#8435)
edf8f93075 is described below

commit edf8f93075dd5f0e457f7a69ed6ee0d1b5192be8
Author: zhanglistar <[email protected]>
AuthorDate: Thu Feb 20 11:08:33 2025 +0800

    [GLUTEN-8434][CH] Function bloomFilterContains process improvement (#8435)
---
 .../local-engine/Functions/FunctionsBloomFilter.h   | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/cpp-ch/local-engine/Functions/FunctionsBloomFilter.h 
b/cpp-ch/local-engine/Functions/FunctionsBloomFilter.h
index 2546030db5..6a6cdf27e7 100644
--- a/cpp-ch/local-engine/Functions/FunctionsBloomFilter.h
+++ b/cpp-ch/local-engine/Functions/FunctionsBloomFilter.h
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <cstddef>
+#include <cstring>
 #include <memory>
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/AggregateFunctionGroupBloomFilter.h>
@@ -118,7 +119,7 @@ private:
     mutable DB::AggregateFunctionPtr agg_func;
 
     template <typename T>
-    typename std::enable_if<std::is_same_v<T, Int64> || std::is_same_v<T, 
UInt64>, void>::type internalExecute(
+    std::enable_if_t<std::is_same_v<T, Int64> || std::is_same_v<T, UInt64>, 
void> internalExecute(
         const DB::ColumnsWithTypeAndName & arguments,
         size_t input_rows_count,
         typename DB::ColumnVector<UInt8>::Container & vec_to,
@@ -129,16 +130,21 @@ private:
         const auto * column_ptr = arguments[1].column.get();
         auto second_arg_const = isColumnConst(*column_ptr);
 
+        AggregateFunctionGroupBloomFilterData & bloom_filter_data_0
+                = *reinterpret_cast<AggregateFunctionGroupBloomFilterData 
*>(bloom_filter_state);
         if (second_arg_const)
-            container_of_int = &typeid_cast<const ColumnType 
&>(typeid_cast<const DB::ColumnConst &>(*column_ptr).getDataColumn()).getData();
-        else
-            container_of_int = &typeid_cast<const ColumnType 
&>(*column_ptr).getData();
+        {
+            vec_to[0] = 
bloom_filter_data_0.bloom_filter.find(typeid_cast<const DB::ColumnConst 
&>(*column_ptr).getDataAt(0).data, sizeof(T));
+            // copy to all rows, better use constant column
+            std::memcpy(&vec_to[1], &vec_to[0], (input_rows_count - 1) * 
sizeof(UInt8));
 
+            return;
+        }
+
+        container_of_int = &typeid_cast<const ColumnType 
&>(*column_ptr).getData();
         for (size_t i = 0; i < input_rows_count; ++i)
         {
-            const T v = second_arg_const ? (*container_of_int)[0] : 
(*container_of_int)[i];
-            AggregateFunctionGroupBloomFilterData & bloom_filter_data_0
-                = *reinterpret_cast<AggregateFunctionGroupBloomFilterData 
*>(bloom_filter_state);
+            const T v = (*container_of_int)[i];
             vec_to[i] = 
bloom_filter_data_0.bloom_filter.find(reinterpret_cast<const char *>(&v), 
sizeof(T));
         }
     }
@@ -195,7 +201,6 @@ private:
                 arguments[0].type->getName());
         }
 
-
         const DB::IColumn * second_column_ptr = arguments[1].column.get();
         if (isColumnNullable(*second_column_ptr))
             second_column_ptr = &typeid_cast<const DB::ColumnNullable 
&>(*second_column_ptr).getNestedColumn();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to