[arrow] branch master updated: ARROW-9398: [C++] Register SIMD sum variants to function instance.

wesm Thu, 30 Jul 2020 16:10:43 -0700

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/master by this push:
     new 6efba62  ARROW-9398: [C++] Register SIMD sum variants to function 
instance.
6efba62 is described below

commit 6efba62ee47196e62e3521b07d4c25c092e8910e
Author: Frank Du <[email protected]>
AuthorDate: Thu Jul 30 18:09:06 2020 -0500

    ARROW-9398: [C++] Register SIMD sum variants to function instance.
    
    Enable simd_level feature of kernel and use it in DispatchExactImpl.
    Add simd_level as a parameter of sum template to make sure every simd 
kernel has its own instantiation instance.
    Also expand sum/mean test case to cover BitBlockCounter method.
    
    Signed-off-by: Frank Du <[email protected]>
    
    Closes #7700 from jianxind/sum_variants_to_function
    
    Authored-by: Frank Du <[email protected]>
    Signed-off-by: Wes McKinney <[email protected]>
---
 cpp/src/arrow/compute/function.cc                  | 25 +++++++++++++-
 cpp/src/arrow/compute/kernel.h                     |  9 +++--
 cpp/src/arrow/compute/kernels/aggregate_basic.cc   | 40 ++++++++++++++++------
 .../compute/kernels/aggregate_basic_internal.h     | 30 ++++++++++------
 .../arrow/compute/kernels/aggregate_sum_avx2.cc    | 39 ++++++++-------------
 .../arrow/compute/kernels/aggregate_sum_avx512.cc  | 40 ++++++++--------------
 cpp/src/arrow/compute/kernels/aggregate_test.cc    |  8 +++--
 cpp/src/arrow/compute/registry.cc                  | 14 --------
 cpp/src/arrow/compute/registry_internal.h          |  3 --
 9 files changed, 110 insertions(+), 98 deletions(-)

diff --git a/cpp/src/arrow/compute/function.cc 
b/cpp/src/arrow/compute/function.cc
index 1bce468..41c3e36 100644
--- a/cpp/src/arrow/compute/function.cc
+++ b/cpp/src/arrow/compute/function.cc
@@ -24,6 +24,7 @@
 #include "arrow/compute/exec.h"
 #include "arrow/compute/exec_internal.h"
 #include "arrow/datum.h"
+#include "arrow/util/cpu_info.h"
 
 namespace arrow {
 namespace compute {
@@ -58,6 +59,7 @@ Result<const KernelType*> DispatchExactImpl(const Function& 
func,
                                             const std::vector<KernelType>& 
kernels,
                                             const std::vector<DescrType>& 
values) {
   const int passed_num_args = static_cast<int>(values.size());
+  const KernelType* kernel_matches[SimdLevel::MAX] = {NULL};
 
   // Validate arity
   const Arity arity = func.arity();
@@ -70,9 +72,30 @@ Result<const KernelType*> DispatchExactImpl(const Function& 
func,
   }
   for (const auto& kernel : kernels) {
     if (kernel.signature->MatchesInputs(values)) {
-      return &kernel;
+      kernel_matches[kernel.simd_level] = &kernel;
     }
   }
+
+  // Dispatch as the CPU feature
+  auto cpu_info = arrow::internal::CpuInfo::GetInstance();
+#if defined(ARROW_HAVE_RUNTIME_AVX512)
+  if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX512)) {
+    if (kernel_matches[SimdLevel::AVX512]) {
+      return kernel_matches[SimdLevel::AVX512];
+    }
+  }
+#endif
+#if defined(ARROW_HAVE_RUNTIME_AVX2)
+  if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX2)) {
+    if (kernel_matches[SimdLevel::AVX2]) {
+      return kernel_matches[SimdLevel::AVX2];
+    }
+  }
+#endif
+  if (kernel_matches[SimdLevel::NONE]) {
+    return kernel_matches[SimdLevel::NONE];
+  }
+
   return Status::NotImplemented("Function ", func.name(),
                                 " has no kernel matching input types ",
                                 FormatArgTypes(values));
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index c581544..3fb6947 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -448,7 +448,7 @@ class ARROW_EXPORT KernelSignature {
 /// type combination for different SIMD levels. Based on the active system's
 /// CPU info or the user's preferences, we can elect to use one over the other.
 struct SimdLevel {
-  enum type { NONE, SSE4_2, AVX, AVX2, AVX512, NEON };
+  enum type { NONE = 0, SSE4_2, AVX, AVX2, AVX512, NEON, MAX };
 };
 
 /// \brief The strategy to use for propagating or otherwise populating the
@@ -555,10 +555,9 @@ struct Kernel {
   bool parallelizable = true;
 
   /// \brief Indicates the level of SIMD instruction support in the host CPU is
-  /// required to use the function. Currently this is not used, but the
-  /// intention is for functions to be able to contain multiple kernels with
-  /// the same signature but different levels of SIMD, so that the most
-  /// optimized kernel supported on a host's processor can be chosen.
+  /// required to use the function. The intention is for functions to be able 
to
+  /// contain multiple kernels with the same signature but different levels of 
SIMD,
+  /// so that the most optimized kernel supported on a host's processor can be 
chosen.
   SimdLevel::type simd_level = SimdLevel::NONE;
 };
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc 
b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 2349360..2f3cdda 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -111,12 +111,12 @@ struct RoundSizeDefault<uint32_t> {
 template <typename ArrowType>
 struct SumImplDefault
     : public SumImpl<RoundSizeDefault<typename 
TypeTraits<ArrowType>::CType>::size,
-                     ArrowType> {};
+                     ArrowType, SimdLevel::NONE> {};
 
 template <typename ArrowType>
 struct MeanImplDefault
     : public MeanImpl<RoundSizeDefault<typename 
TypeTraits<ArrowType>::CType>::size,
-                      ArrowType> {};
+                      ArrowType, SimdLevel::NONE> {};
 
 std::unique_ptr<KernelState> SumInit(KernelContext* ctx, const KernelInitArgs& 
args) {
   SumLikeInit<SumImplDefault> visitor(ctx, *args.inputs[0].type);
@@ -341,29 +341,35 @@ std::unique_ptr<KernelState> MinMaxInit(KernelContext* 
ctx, const KernelInitArgs
 }
 
 void AddAggKernel(std::shared_ptr<KernelSignature> sig, KernelInit init,
-                  ScalarAggregateFunction* func) {
-  DCHECK_OK(func->AddKernel(ScalarAggregateKernel(std::move(sig), init, 
AggregateConsume,
-                                                  AggregateMerge, 
AggregateFinalize)));
+                  ScalarAggregateFunction* func,
+                  SimdLevel::type simd_level = SimdLevel::NONE) {
+  ScalarAggregateKernel kernel(std::move(sig), init, AggregateConsume, 
AggregateMerge,
+                               AggregateFinalize);
+  // Set the simd level
+  kernel.simd_level = simd_level;
+  DCHECK_OK(func->AddKernel(kernel));
 }
 
 void AddBasicAggKernels(KernelInit init,
                         const std::vector<std::shared_ptr<DataType>>& types,
-                        std::shared_ptr<DataType> out_ty, 
ScalarAggregateFunction* func) {
+                        std::shared_ptr<DataType> out_ty, 
ScalarAggregateFunction* func,
+                        SimdLevel::type simd_level) {
   for (const auto& ty : types) {
     // array[InT] -> scalar[OutT]
     auto sig = KernelSignature::Make({InputType::Array(ty)}, 
ValueDescr::Scalar(out_ty));
-    AddAggKernel(std::move(sig), init, func);
+    AddAggKernel(std::move(sig), init, func, simd_level);
   }
 }
 
 void AddMinMaxKernels(KernelInit init,
                       const std::vector<std::shared_ptr<DataType>>& types,
-                      ScalarAggregateFunction* func) {
+                      ScalarAggregateFunction* func,
+                      SimdLevel::type simd_level = SimdLevel::NONE) {
   for (const auto& ty : types) {
     // array[T] -> scalar[struct<min: T, max: T>]
     auto out_ty = struct_({field("min", ty), field("max", ty)});
     auto sig = KernelSignature::Make({InputType::Array(ty)}, 
ValueDescr::Scalar(out_ty));
-    AddAggKernel(std::move(sig), init, func);
+    AddAggKernel(std::move(sig), init, func, simd_level);
   }
 }
 
@@ -375,7 +381,7 @@ void RegisterScalarAggregateBasic(FunctionRegistry* 
registry) {
   auto func = std::make_shared<ScalarAggregateFunction>("count", 
Arity::Unary(),
                                                         
&default_count_options);
 
-  /// Takes any array input, outputs int64 scalar
+  // Takes any array input, outputs int64 scalar
   InputType any_array(ValueDescr::ARRAY);
   aggregate::AddAggKernel(KernelSignature::Make({any_array}, 
ValueDescr::Scalar(int64())),
                           aggregate::CountInit, func.get());
@@ -389,12 +395,26 @@ void RegisterScalarAggregateBasic(FunctionRegistry* 
registry) {
                                 func.get());
   aggregate::AddBasicAggKernels(aggregate::SumInit, FloatingPointTypes(), 
float64(),
                                 func.get());
+  // Add the SIMD variants for sum
+#if defined(ARROW_HAVE_RUNTIME_AVX2)
+  aggregate::AddSumAvx2AggKernels(func.get());
+#endif
+#if defined(ARROW_HAVE_RUNTIME_AVX512)
+  aggregate::AddSumAvx512AggKernels(func.get());
+#endif
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
   func = std::make_shared<ScalarAggregateFunction>("mean", Arity::Unary());
   aggregate::AddBasicAggKernels(aggregate::MeanInit, {boolean()}, float64(), 
func.get());
   aggregate::AddBasicAggKernels(aggregate::MeanInit, NumericTypes(), float64(),
                                 func.get());
+  // Add the SIMD variants for mean
+#if defined(ARROW_HAVE_RUNTIME_AVX2)
+  aggregate::AddMeanAvx2AggKernels(func.get());
+#endif
+#if defined(ARROW_HAVE_RUNTIME_AVX512)
+  aggregate::AddMeanAvx512AggKernels(func.get());
+#endif
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
   static auto default_minmax_options = MinMaxOptions::Defaults();
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h 
b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
index b99adc3..e301467 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
@@ -34,15 +34,23 @@ struct ScalarAggregator : public KernelState {
 
 void AddBasicAggKernels(KernelInit init,
                         const std::vector<std::shared_ptr<DataType>>& types,
-                        std::shared_ptr<DataType> out_ty, 
ScalarAggregateFunction* func);
+                        std::shared_ptr<DataType> out_ty, 
ScalarAggregateFunction* func,
+                        SimdLevel::type simd_level = SimdLevel::NONE);
+
+// SIMD variants for kernels
+void AddSumAvx2AggKernels(ScalarAggregateFunction* func);
+void AddMeanAvx2AggKernels(ScalarAggregateFunction* func);
+
+void AddSumAvx512AggKernels(ScalarAggregateFunction* func);
+void AddMeanAvx512AggKernels(ScalarAggregateFunction* func);
 
 // ----------------------------------------------------------------------
 // Sum implementation
 
-template <int64_t kRoundSize, typename ArrowType>
+template <int64_t kRoundSize, typename ArrowType, SimdLevel::type simd_level>
 struct SumState {
   using SumType = typename FindAccumulatorType<ArrowType>::Type;
-  using ThisType = SumState<kRoundSize, ArrowType>;
+  using ThisType = SumState<kRoundSize, ArrowType, simd_level>;
   using T = typename TypeTraits<ArrowType>::CType;
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
 
@@ -203,10 +211,10 @@ struct SumState {
   }
 };
 
-template <int64_t kRoundSize>
-struct SumState<kRoundSize, BooleanType> {
+template <int64_t kRoundSize, SimdLevel::type simd_level>
+struct SumState<kRoundSize, BooleanType, simd_level> {
   using SumType = typename FindAccumulatorType<BooleanType>::Type;
-  using ThisType = SumState<kRoundSize, BooleanType>;
+  using ThisType = SumState<kRoundSize, BooleanType, simd_level>;
 
   ThisType& operator+=(const ThisType& rhs) {
     this->count += rhs.count;
@@ -225,10 +233,10 @@ struct SumState<kRoundSize, BooleanType> {
   typename SumType::c_type sum = 0;
 };
 
-template <uint64_t kRoundSize, typename ArrowType>
+template <uint64_t kRoundSize, typename ArrowType, SimdLevel::type simd_level>
 struct SumImpl : public ScalarAggregator {
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
-  using ThisType = SumImpl<kRoundSize, ArrowType>;
+  using ThisType = SumImpl<kRoundSize, ArrowType, simd_level>;
   using SumType = typename FindAccumulatorType<ArrowType>::Type;
   using OutputType = typename TypeTraits<SumType>::ScalarType;
 
@@ -249,11 +257,11 @@ struct SumImpl : public ScalarAggregator {
     }
   }
 
-  SumState<kRoundSize, ArrowType> state;
+  SumState<kRoundSize, ArrowType, simd_level> state;
 };
 
-template <int64_t kRoundSize, typename ArrowType>
-struct MeanImpl : public SumImpl<kRoundSize, ArrowType> {
+template <int64_t kRoundSize, typename ArrowType, SimdLevel::type simd_level>
+struct MeanImpl : public SumImpl<kRoundSize, ArrowType, simd_level> {
   void Finalize(KernelContext*, Datum* out) override {
     const bool is_valid = this->state.count > 0;
     const double divisor = static_cast<double>(is_valid ? this->state.count : 
1UL);
diff --git a/cpp/src/arrow/compute/kernels/aggregate_sum_avx2.cc 
b/cpp/src/arrow/compute/kernels/aggregate_sum_avx2.cc
index b0c70dc..2811c4c 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_sum_avx2.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_sum_avx2.cc
@@ -49,12 +49,12 @@ struct RoundSizeAvx2<uint32_t> {
 template <typename ArrowType>
 struct SumImplAvx2
     : public SumImpl<RoundSizeAvx2<typename 
TypeTraits<ArrowType>::CType>::size,
-                     ArrowType> {};
+                     ArrowType, SimdLevel::AVX2> {};
 
 template <typename ArrowType>
 struct MeanImplAvx2
     : public MeanImpl<RoundSizeAvx2<typename 
TypeTraits<ArrowType>::CType>::size,
-                      ArrowType> {};
+                      ArrowType, SimdLevel::AVX2> {};
 
 std::unique_ptr<KernelState> SumInitAvx2(KernelContext* ctx, const 
KernelInitArgs& args) {
   SumLikeInit<SumImplAvx2> visitor(ctx, *args.inputs[0].type);
@@ -67,31 +67,20 @@ std::unique_ptr<KernelState> MeanInitAvx2(KernelContext* 
ctx,
   return visitor.Create();
 }
 
-}  // namespace aggregate
-
-namespace internal {
-
-void RegisterScalarAggregateSumAvx2(FunctionRegistry* registry) {
-  auto func = std::make_shared<ScalarAggregateFunction>("sum", Arity::Unary());
-  aggregate::AddBasicAggKernels(aggregate::SumInitAvx2, {boolean()}, int64(), 
func.get());
-  aggregate::AddBasicAggKernels(aggregate::SumInitAvx2, SignedIntTypes(), 
int64(),
-                                func.get());
-  aggregate::AddBasicAggKernels(aggregate::SumInitAvx2, UnsignedIntTypes(), 
uint64(),
-                                func.get());
-  aggregate::AddBasicAggKernels(aggregate::SumInitAvx2, FloatingPointTypes(), 
float64(),
-                                func.get());
-  // Register the override AVX2 version
-  DCHECK_OK(registry->AddFunction(std::move(func), /*allow_overwrite=*/true));
+void AddSumAvx2AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(SumInitAvx2, internal::SignedIntTypes(), int64(), func,
+                     SimdLevel::AVX2);
+  AddBasicAggKernels(SumInitAvx2, internal::UnsignedIntTypes(), uint64(), func,
+                     SimdLevel::AVX2);
+  AddBasicAggKernels(SumInitAvx2, internal::FloatingPointTypes(), float64(), 
func,
+                     SimdLevel::AVX2);
+}
 
-  func = std::make_shared<ScalarAggregateFunction>("mean", Arity::Unary());
-  aggregate::AddBasicAggKernels(aggregate::MeanInitAvx2, {boolean()}, 
float64(),
-                                func.get());
-  aggregate::AddBasicAggKernels(aggregate::MeanInitAvx2, NumericTypes(), 
float64(),
-                                func.get());
-  // Register the override AVX2 version
-  DCHECK_OK(registry->AddFunction(std::move(func), /*allow_overwrite=*/true));
+void AddMeanAvx2AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(MeanInitAvx2, internal::NumericTypes(), float64(), func,
+                     SimdLevel::AVX2);
 }
 
-}  // namespace internal
+}  // namespace aggregate
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/aggregate_sum_avx512.cc 
b/cpp/src/arrow/compute/kernels/aggregate_sum_avx512.cc
index c230df9..0040802 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_sum_avx512.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_sum_avx512.cc
@@ -49,12 +49,12 @@ struct RoundSizeAvx512<uint32_t> {
 template <typename ArrowType>
 struct SumImplAvx512
     : public SumImpl<RoundSizeAvx512<typename 
TypeTraits<ArrowType>::CType>::size,
-                     ArrowType> {};
+                     ArrowType, SimdLevel::AVX512> {};
 
 template <typename ArrowType>
 struct MeanImplAvx512
     : public MeanImpl<RoundSizeAvx512<typename 
TypeTraits<ArrowType>::CType>::size,
-                      ArrowType> {};
+                      ArrowType, SimdLevel::AVX512> {};
 
 std::unique_ptr<KernelState> SumInitAvx512(KernelContext* ctx,
                                            const KernelInitArgs& args) {
@@ -68,32 +68,20 @@ std::unique_ptr<KernelState> MeanInitAvx512(KernelContext* 
ctx,
   return visitor.Create();
 }
 
-}  // namespace aggregate
-
-namespace internal {
-
-void RegisterScalarAggregateSumAvx512(FunctionRegistry* registry) {
-  auto func = std::make_shared<ScalarAggregateFunction>("sum", Arity::Unary());
-  aggregate::AddBasicAggKernels(aggregate::SumInitAvx512, {boolean()}, int64(),
-                                func.get());
-  aggregate::AddBasicAggKernels(aggregate::SumInitAvx512, SignedIntTypes(), 
int64(),
-                                func.get());
-  aggregate::AddBasicAggKernels(aggregate::SumInitAvx512, UnsignedIntTypes(), 
uint64(),
-                                func.get());
-  aggregate::AddBasicAggKernels(aggregate::SumInitAvx512, 
FloatingPointTypes(), float64(),
-                                func.get());
-  // Register the override AVX512 version
-  DCHECK_OK(registry->AddFunction(std::move(func), /*allow_overwrite=*/true));
+void AddSumAvx512AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(SumInitAvx512, internal::SignedIntTypes(), int64(), func,
+                     SimdLevel::AVX512);
+  AddBasicAggKernels(SumInitAvx512, internal::UnsignedIntTypes(), uint64(), 
func,
+                     SimdLevel::AVX512);
+  AddBasicAggKernels(SumInitAvx512, internal::FloatingPointTypes(), float64(), 
func,
+                     SimdLevel::AVX512);
+}
 
-  func = std::make_shared<ScalarAggregateFunction>("mean", Arity::Unary());
-  aggregate::AddBasicAggKernels(aggregate::MeanInitAvx512, {boolean()}, 
float64(),
-                                func.get());
-  aggregate::AddBasicAggKernels(aggregate::MeanInitAvx512, NumericTypes(), 
float64(),
-                                func.get());
-  // Register the override AVX512 version
-  DCHECK_OK(registry->AddFunction(std::move(func), /*allow_overwrite=*/true));
+void AddMeanAvx512AggKernels(ScalarAggregateFunction* func) {
+  aggregate::AddBasicAggKernels(MeanInitAvx512, internal::NumericTypes(), 
float64(), func,
+                                SimdLevel::AVX512);
 }
 
-}  // namespace internal
+}  // namespace aggregate
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc 
b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index 6658a7e..ad669b2 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -206,8 +206,9 @@ class TestRandomNumericSumKernel : public ::testing::Test 
{};
 TYPED_TEST_SUITE(TestRandomNumericSumKernel, NumericArrowTypes);
 TYPED_TEST(TestRandomNumericSumKernel, RandomArraySum) {
   auto rand = random::RandomArrayGenerator(0x5487655);
-  for (size_t i = 3; i < 10; i += 2) {
-    for (auto null_probability : {0.0, 0.1, 0.5, 1.0}) {
+  // Test size up to 1<<13 (8192).
+  for (size_t i = 3; i < 14; i += 2) {
+    for (auto null_probability : {0.0, 0.001, 0.1, 0.5, 0.999, 1.0}) {
       for (auto length_adjust : {-2, -1, 0, 1, 2}) {
         int64_t length = (1UL << i) + length_adjust;
         auto array = rand.Numeric<TypeParam>(length, 0, 100, null_probability);
@@ -389,8 +390,9 @@ class TestRandomNumericMeanKernel : public ::testing::Test 
{};
 TYPED_TEST_SUITE(TestRandomNumericMeanKernel, NumericArrowTypes);
 TYPED_TEST(TestRandomNumericMeanKernel, RandomArrayMean) {
   auto rand = random::RandomArrayGenerator(0x8afc055);
+  // Test size up to 1<<13 (8192).
   for (size_t i = 3; i < 14; i += 2) {
-    for (auto null_probability : {0.0, 0.1, 0.5, 1.0}) {
+    for (auto null_probability : {0.0, 0.001, 0.1, 0.5, 0.999, 1.0}) {
       for (auto length_adjust : {-2, -1, 0, 1, 2}) {
         int64_t length = (1UL << i) + length_adjust;
         auto array = rand.Numeric<TypeParam>(length, 0, 100, null_probability);
diff --git a/cpp/src/arrow/compute/registry.cc 
b/cpp/src/arrow/compute/registry.cc
index d880d97..cb9565e 100644
--- a/cpp/src/arrow/compute/registry.cc
+++ b/cpp/src/arrow/compute/registry.cc
@@ -26,7 +26,6 @@
 #include "arrow/compute/function.h"
 #include "arrow/compute/registry_internal.h"
 #include "arrow/status.h"
-#include "arrow/util/cpu_info.h"
 
 namespace arrow {
 namespace compute {
@@ -132,19 +131,6 @@ static std::unique_ptr<FunctionRegistry> 
CreateBuiltInRegistry() {
   RegisterVectorNested(registry.get());
   RegisterVectorSort(registry.get());
 
-  // SIMD functions
-  auto cpu_info = arrow::internal::CpuInfo::GetInstance();
-#if defined(ARROW_HAVE_RUNTIME_AVX2)
-  if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX2)) {
-    RegisterScalarAggregateSumAvx2(registry.get());
-  }
-#endif
-#if defined(ARROW_HAVE_RUNTIME_AVX512)
-  if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX512)) {
-    RegisterScalarAggregateSumAvx512(registry.get());
-  }
-#endif
-
   return registry;
 }
 
diff --git a/cpp/src/arrow/compute/registry_internal.h 
b/cpp/src/arrow/compute/registry_internal.h
index e6c68ef..d84f85c 100644
--- a/cpp/src/arrow/compute/registry_internal.h
+++ b/cpp/src/arrow/compute/registry_internal.h
@@ -43,9 +43,6 @@ void RegisterVectorSort(FunctionRegistry* registry);
 
 // Aggregate functions
 void RegisterScalarAggregateBasic(FunctionRegistry* registry);
-// SIMD version
-void RegisterScalarAggregateSumAvx2(FunctionRegistry* registry);
-void RegisterScalarAggregateSumAvx512(FunctionRegistry* registry);
 
 }  // namespace internal
 }  // namespace compute

[arrow] branch master updated: ARROW-9398: [C++] Register SIMD sum variants to function instance.

Reply via email to