kou commented on code in PR #39795:
URL: https://github.com/apache/arrow/pull/39795#discussion_r1467137700


##########
cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc:
##########
@@ -146,36 +146,40 @@ class DropNullCounter {
 
 /// \brief The Filter implementation for primitive (fixed-width) types does not
 /// use the logical Arrow type but rather the physical C type. This way we only
-/// generate one take function for each byte width. We use the same
-/// implementation here for boolean and fixed-byte-size inputs with some
-/// template specialization.
-template <typename ArrowType>
+/// generate one take function for each byte width.
+///
+/// We use compile-time specialization for two variations:
+/// - operating on boolean data (using kIsBoolean = true)
+/// - operating on fixed-width data of arbitrary width (using kByteWidth = -1),
+///   with the actual width only known at runtime
+template <int32_t kByteWidth, bool kIsBoolean = false>
 class PrimitiveFilterImpl {
  public:
-  using T = typename std::conditional<std::is_same<ArrowType, 
BooleanType>::value,
-                                      uint8_t, typename 
ArrowType::c_type>::type;
-
   PrimitiveFilterImpl(const ArraySpan& values, const ArraySpan& filter,
                       FilterOptions::NullSelectionBehavior null_selection,
                       ArrayData* out_arr)
-      : values_is_valid_(values.buffers[0].data),
-        values_data_(reinterpret_cast<const T*>(values.buffers[1].data)),
+      : byte_width_(values.type->byte_width()),
+        values_is_valid_(values.buffers[0].data),
+        values_data_(values.buffers[1].data),
         values_null_count_(values.null_count),
         values_offset_(values.offset),
         values_length_(values.length),
         filter_(filter),
         null_selection_(null_selection) {
-    if (values.type->id() != Type::BOOL) {
+    if (kByteWidth >= 0 && !kIsBoolean) {

Review Comment:
   Should we add `constexpr` here?



##########
cpp/src/arrow/compute/kernels/vector_selection_test.cc:
##########
@@ -1897,8 +1961,10 @@ TEST(TestFilter, RandomString) {
 }
 
 TEST(TestFilter, RandomFixedSizeBinary) {
-  FilterRandomTest<>::Test(fixed_size_binary(0));
-  FilterRandomTest<>::Test(fixed_size_binary(16));
+  // FixedSizeBinary filter is special-cased for some widths
+  for (int32_t width : {0, 1, 9, 16, 32, 35}) {

Review Comment:
   Do we need to use both of `9` and `35`?
   Is there any difference for the `9` and `35` cases?



##########
cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc:
##########
@@ -315,8 +354,22 @@ void FilterSetArgs(benchmark::internal::Benchmark* bench) {
   }
 }
 
+void FilterFSBSetArgs(benchmark::internal::Benchmark* bench) {
+  for (int64_t size : g_data_sizes) {
+    for (int i = 0; i < static_cast<int>(g_filter_params.size()); ++i) {
+      // FixedSizeBinary of primitive sizes (powers of two up to 32)
+      // have a faster path.

Review Comment:
   Could you add the same comment to `TakeFSBSetArgs()` too?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to