This is an automated email from the ASF dual-hosted git repository.
westonpace pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new be7a763403 GH-34388: [C++] Build core compute kernels unconditionally
(#34295)
be7a763403 is described below
commit be7a76340377cf23d5ee272f05c759160cdeb576
Author: Ben Harkins <[email protected]>
AuthorDate: Fri Mar 3 20:16:20 2023 -0500
GH-34388: [C++] Build core compute kernels unconditionally (#34295)
This includes the core compute machinery in libarrow by default - in
addition to all cast kernels and several other kernels that are either
dependencies of `cast` (`take`) or utilized in libarrow/libparquet (`unique`,
`filter`). The remaining kernels won't be built/registered unless
`ARROW_COMPUTE=ON` (note that this would slightly change the option's meaning,
as currently, nothing in arrow/compute is built unless it's set).
Initially this was more substantial as the original goal was to build the
extra kernels as a shared library (suggested in the orginal issue). After some
discussion in the issue thread, I opted not to do that - primarily because I
can't personally see the utility of a separate lib here, even ignoring the
complexity it introduces. However, there may be a good reason that simply
hasn't occured to me.
* Closes: #34388
Lead-authored-by: benibus <[email protected]>
Co-authored-by: Ben Harkins <[email protected]>
Co-authored-by: Weston Pace <[email protected]>
Signed-off-by: Weston Pace <[email protected]>
---
cpp/cmake_modules/DefineOptions.cmake | 3 +-
cpp/src/arrow/CMakeLists.txt | 155 +++++++-------
cpp/src/arrow/array/CMakeLists.txt | 6 +-
cpp/src/arrow/compute/CMakeLists.txt | 20 +-
cpp/src/arrow/compute/exec/CMakeLists.txt | 7 +-
cpp/src/arrow/compute/kernels/CMakeLists.txt | 20 +-
cpp/src/arrow/compute/kernels/scalar_arithmetic.cc | 10 -
cpp/src/arrow/compute/kernels/scalar_round.cc | 225 +--------------------
cpp/src/arrow/compute/kernels/util_internal.cc | 14 ++
cpp/src/arrow/compute/kernels/util_internal.h | 19 ++
cpp/src/arrow/compute/registry.cc | 23 ++-
cpp/src/arrow/csv/CMakeLists.txt | 8 +-
cpp/src/arrow/csv/api.h | 5 -
cpp/src/arrow/public_api_test.cc | 9 +-
cpp/src/arrow/testing/generator.cc | 6 +-
cpp/src/arrow/testing/generator.h | 8 +-
16 files changed, 180 insertions(+), 358 deletions(-)
diff --git a/cpp/cmake_modules/DefineOptions.cmake
b/cpp/cmake_modules/DefineOptions.cmake
index 6700a409e1..1ec1245e7d 100644
--- a/cpp/cmake_modules/DefineOptions.cmake
+++ b/cpp/cmake_modules/DefineOptions.cmake
@@ -296,7 +296,7 @@ takes precedence over ccache if a storage backend is
configured" ON)
define_option(ARROW_BUILD_UTILITIES "Build Arrow commandline utilities" OFF)
- define_option(ARROW_COMPUTE "Build the Arrow Compute Modules" OFF)
+ define_option(ARROW_COMPUTE "Build all Arrow Compute kernels" OFF)
define_option(ARROW_CSV "Build the Arrow CSV Parser Module" OFF)
@@ -361,7 +361,6 @@ takes precedence over ccache if a storage backend is
configured" ON)
"Build the Parquet libraries"
OFF
DEPENDS
- ARROW_COMPUTE
ARROW_IPC)
define_option(ARROW_ORC
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index e0931c19ef..721812b4c0 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -375,73 +375,93 @@ if(ARROW_CSV)
csv/column_decoder.cc
csv/options.cc
csv/parser.cc
- csv/reader.cc)
- if(ARROW_COMPUTE)
- list(APPEND ARROW_SRCS csv/writer.cc)
- endif()
+ csv/reader.cc
+ csv/writer.cc)
list(APPEND ARROW_TESTING_SRCS csv/test_common.cc)
endif()
+# Baseline Compute functionality + scalar casts and a few select kernels
+list(APPEND
+ ARROW_SRCS
+ compute/api_aggregate.cc
+ compute/api_scalar.cc
+ compute/api_vector.cc
+ compute/cast.cc
+ compute/exec.cc
+ compute/exec/groupby.cc
+ compute/exec/accumulation_queue.cc
+ compute/exec/aggregate_node.cc
+ compute/exec/asof_join_node.cc
+ compute/exec/bloom_filter.cc
+ compute/exec/exec_plan.cc
+ compute/exec/expression.cc
+ compute/exec/fetch_node.cc
+ compute/exec/filter_node.cc
+ compute/exec/hash_join.cc
+ compute/exec/hash_join_dict.cc
+ compute/exec/hash_join_node.cc
+ compute/exec/key_hash.cc
+ compute/exec/key_map.cc
+ compute/exec/map_node.cc
+ compute/exec/options.cc
+ compute/exec/order_by_impl.cc
+ compute/exec/partition_util.cc
+ compute/exec/project_node.cc
+ compute/exec/query_context.cc
+ compute/exec/sink_node.cc
+ compute/exec/source_node.cc
+ compute/exec/swiss_join.cc
+ compute/exec/task_util.cc
+ compute/exec/tpch_node.cc
+ compute/exec/union_node.cc
+ compute/exec/util.cc
+ compute/function.cc
+ compute/function_internal.cc
+ compute/kernel.cc
+ compute/light_array.cc
+ compute/ordering.cc
+ compute/registry.cc
+ compute/kernels/codegen_internal.cc
+ compute/kernels/row_encoder.cc
+ compute/kernels/scalar_cast_boolean.cc
+ compute/kernels/scalar_cast_dictionary.cc
+ compute/kernels/scalar_cast_extension.cc
+ compute/kernels/scalar_cast_internal.cc
+ compute/kernels/scalar_cast_nested.cc
+ compute/kernels/scalar_cast_numeric.cc
+ compute/kernels/scalar_cast_string.cc
+ compute/kernels/scalar_cast_temporal.cc
+ compute/kernels/util_internal.cc
+ compute/kernels/vector_hash.cc
+ compute/kernels/vector_selection.cc
+ compute/row/encode_internal.cc
+ compute/row/compare_internal.cc
+ compute/row/grouper.cc
+ compute/row/row_internal.cc)
+
+append_avx2_src(compute/exec/bloom_filter_avx2.cc)
+append_avx2_src(compute/exec/key_hash_avx2.cc)
+append_avx2_src(compute/exec/key_map_avx2.cc)
+append_avx2_src(compute/exec/swiss_join_avx2.cc)
+append_avx2_src(compute/exec/util_avx2.cc)
+append_avx2_src(compute/row/compare_internal_avx2.cc)
+append_avx2_src(compute/row/encode_internal_avx2.cc)
+
+list(APPEND ARROW_TESTING_SRCS compute/exec/test_util.cc)
+
if(ARROW_COMPUTE)
+ # Include the remaining kernels
list(APPEND
ARROW_SRCS
- compute/api_aggregate.cc
- compute/api_scalar.cc
- compute/api_vector.cc
- compute/cast.cc
- compute/exec.cc
- compute/exec/groupby.cc
- compute/exec/accumulation_queue.cc
- compute/exec/aggregate_node.cc
- compute/exec/asof_join_node.cc
- compute/exec/bloom_filter.cc
- compute/exec/exec_plan.cc
- compute/exec/expression.cc
- compute/exec/fetch_node.cc
- compute/exec/filter_node.cc
- compute/exec/hash_join.cc
- compute/exec/hash_join_dict.cc
- compute/exec/hash_join_node.cc
- compute/exec/key_hash.cc
- compute/exec/key_map.cc
- compute/exec/map_node.cc
- compute/exec/options.cc
- compute/exec/order_by_impl.cc
- compute/exec/partition_util.cc
- compute/exec/project_node.cc
- compute/exec/query_context.cc
- compute/exec/sink_node.cc
- compute/exec/source_node.cc
- compute/exec/swiss_join.cc
- compute/exec/task_util.cc
- compute/exec/tpch_node.cc
- compute/exec/union_node.cc
- compute/exec/util.cc
- compute/function.cc
- compute/function_internal.cc
- compute/kernel.cc
- compute/light_array.cc
- compute/ordering.cc
- compute/registry.cc
compute/kernels/aggregate_basic.cc
compute/kernels/aggregate_mode.cc
compute/kernels/aggregate_quantile.cc
compute/kernels/aggregate_tdigest.cc
compute/kernels/aggregate_var_std.cc
- compute/kernels/codegen_internal.cc
compute/kernels/hash_aggregate.cc
- compute/kernels/row_encoder.cc
compute/kernels/scalar_arithmetic.cc
compute/kernels/scalar_boolean.cc
- compute/kernels/scalar_cast_boolean.cc
- compute/kernels/scalar_cast_dictionary.cc
- compute/kernels/scalar_cast_extension.cc
- compute/kernels/scalar_cast_internal.cc
- compute/kernels/scalar_cast_nested.cc
- compute/kernels/scalar_cast_numeric.cc
- compute/kernels/scalar_cast_string.cc
- compute/kernels/scalar_cast_temporal.cc
compute/kernels/scalar_compare.cc
compute/kernels/scalar_if_else.cc
compute/kernels/scalar_nested.cc
@@ -453,33 +473,16 @@ if(ARROW_COMPUTE)
compute/kernels/scalar_temporal_binary.cc
compute/kernels/scalar_temporal_unary.cc
compute/kernels/scalar_validity.cc
- compute/kernels/util_internal.cc
compute/kernels/vector_array_sort.cc
compute/kernels/vector_cumulative_ops.cc
- compute/kernels/vector_hash.cc
compute/kernels/vector_nested.cc
compute/kernels/vector_rank.cc
compute/kernels/vector_replace.cc
compute/kernels/vector_select_k.cc
- compute/kernels/vector_selection.cc
- compute/kernels/vector_sort.cc
- compute/row/encode_internal.cc
- compute/row/compare_internal.cc
- compute/row/grouper.cc
- compute/row/row_internal.cc)
+ compute/kernels/vector_sort.cc)
append_avx2_src(compute/kernels/aggregate_basic_avx2.cc)
append_avx512_src(compute/kernels/aggregate_basic_avx512.cc)
-
- append_avx2_src(compute/exec/bloom_filter_avx2.cc)
- append_avx2_src(compute/exec/key_hash_avx2.cc)
- append_avx2_src(compute/exec/key_map_avx2.cc)
- append_avx2_src(compute/exec/swiss_join_avx2.cc)
- append_avx2_src(compute/exec/util_avx2.cc)
- append_avx2_src(compute/row/compare_internal_avx2.cc)
- append_avx2_src(compute/row/encode_internal_avx2.cc)
-
- list(APPEND ARROW_TESTING_SRCS compute/exec/test_util.cc)
endif()
if(ARROW_FILESYSTEM)
@@ -800,12 +803,7 @@ add_arrow_test(table_test
add_arrow_test(tensor_test)
add_arrow_test(sparse_tensor_test)
-set(STL_TEST_SRCS stl_iterator_test.cc)
-if(ARROW_COMPUTE)
- # This unit test uses compute code
- list(APPEND STL_TEST_SRCS stl_test.cc)
-endif()
-add_arrow_test(stl_test SOURCES ${STL_TEST_SRCS})
+add_arrow_test(stl_test SOURCES stl_iterator_test.cc stl_test.cc)
add_arrow_benchmark(builder_benchmark)
add_arrow_benchmark(compare_benchmark)
@@ -821,6 +819,7 @@ add_subdirectory(testing)
add_subdirectory(array)
add_subdirectory(c)
+add_subdirectory(compute)
add_subdirectory(io)
add_subdirectory(tensor)
add_subdirectory(util)
@@ -830,10 +829,6 @@ if(ARROW_CSV)
add_subdirectory(csv)
endif()
-if(ARROW_COMPUTE)
- add_subdirectory(compute)
-endif()
-
if(ARROW_SUBSTRAIT)
add_subdirectory(engine)
endif()
diff --git a/cpp/src/arrow/array/CMakeLists.txt
b/cpp/src/arrow/array/CMakeLists.txt
index c0fc17687d..d8dc83bb71 100644
--- a/cpp/src/arrow/array/CMakeLists.txt
+++ b/cpp/src/arrow/array/CMakeLists.txt
@@ -16,11 +16,7 @@
# under the License.
add_arrow_test(concatenate_test)
-
-if(ARROW_COMPUTE)
- # This unit test uses compute code
- add_arrow_test(diff_test)
-endif()
+add_arrow_test(diff_test)
# Headers: top level
arrow_install_all_headers("arrow/array")
diff --git a/cpp/src/arrow/compute/CMakeLists.txt
b/cpp/src/arrow/compute/CMakeLists.txt
index 91fa796f6d..cdf019b798 100644
--- a/cpp/src/arrow/compute/CMakeLists.txt
+++ b/cpp/src/arrow/compute/CMakeLists.txt
@@ -26,8 +26,22 @@ arrow_add_pkg_config("arrow-compute")
# Unit tests
#
+# The following kernels are always present:
+# - array_filter
+# - array_take
+# - cast
+# - dictionary_encode
+# - drop_null
+# - filter
+# - indices_nonzero
+# - take
+# - unique
+# - value_counts
+#
+# Tests that use additional kernels should specify REQUIRE_ALL_KERNELS to avoid
+# being included in minimal builds. See: GH-34388
function(ADD_ARROW_COMPUTE_TEST REL_TEST_NAME)
- set(options)
+ set(options REQUIRE_ALL_KERNELS)
set(one_value_args PREFIX)
set(multi_value_args LABELS)
cmake_parse_arguments(ARG
@@ -36,6 +50,10 @@ function(ADD_ARROW_COMPUTE_TEST REL_TEST_NAME)
"${multi_value_args}"
${ARGN})
+ if(ARG_REQUIRE_ALL_KERNELS AND (NOT ARROW_COMPUTE))
+ return()
+ endif()
+
if(ARG_PREFIX)
set(PREFIX ${ARG_PREFIX})
else()
diff --git a/cpp/src/arrow/compute/exec/CMakeLists.txt
b/cpp/src/arrow/compute/exec/CMakeLists.txt
index e20a402189..ff979be84a 100644
--- a/cpp/src/arrow/compute/exec/CMakeLists.txt
+++ b/cpp/src/arrow/compute/exec/CMakeLists.txt
@@ -18,6 +18,7 @@
arrow_install_all_headers("arrow/compute/exec")
add_arrow_compute_test(expression_test
+ REQUIRE_ALL_KERNELS
PREFIX
"arrow-compute"
SOURCES
@@ -25,6 +26,7 @@ add_arrow_compute_test(expression_test
subtree_test.cc)
add_arrow_compute_test(plan_test
+ REQUIRE_ALL_KERNELS
PREFIX
"arrow-compute"
SOURCES
@@ -32,12 +34,14 @@ add_arrow_compute_test(plan_test
test_nodes_test.cc
test_nodes.cc)
add_arrow_compute_test(fetch_node_test
+ REQUIRE_ALL_KERNELS
PREFIX
"arrow-compute"
SOURCES
fetch_node_test.cc
test_nodes.cc)
add_arrow_compute_test(hash_join_node_test
+ REQUIRE_ALL_KERNELS
PREFIX
"arrow-compute"
SOURCES
@@ -45,6 +49,7 @@ add_arrow_compute_test(hash_join_node_test
bloom_filter_test.cc
key_hash_test.cc)
add_arrow_compute_test(asof_join_node_test
+ REQUIRE_ALL_KERNELS
PREFIX
"arrow-compute"
SOURCES
@@ -52,7 +57,7 @@ add_arrow_compute_test(asof_join_node_test
test_nodes.cc)
add_arrow_compute_test(tpch_node_test PREFIX "arrow-compute")
add_arrow_compute_test(union_node_test PREFIX "arrow-compute")
-add_arrow_compute_test(groupby_test PREFIX "arrow-compute")
+add_arrow_compute_test(groupby_test REQUIRE_ALL_KERNELS PREFIX "arrow-compute")
add_arrow_compute_test(util_test
PREFIX
"arrow-compute"
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt
b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index a4d0fc8582..7db97041d7 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -18,19 +18,30 @@
# ----------------------------------------------------------------------
# Scalar kernels
+add_arrow_compute_test(scalar_cast_test SOURCES scalar_cast_test.cc
test_util.cc)
+
add_arrow_compute_test(scalar_type_test
+ REQUIRE_ALL_KERNELS
SOURCES
scalar_boolean_test.cc
- scalar_cast_test.cc
scalar_nested_test.cc
scalar_string_test.cc
test_util.cc)
-add_arrow_compute_test(scalar_if_else_test SOURCES scalar_if_else_test.cc
test_util.cc)
+add_arrow_compute_test(scalar_if_else_test
+ REQUIRE_ALL_KERNELS
+ SOURCES
+ scalar_if_else_test.cc
+ test_util.cc)
-add_arrow_compute_test(scalar_temporal_test SOURCES scalar_temporal_test.cc
test_util.cc)
+add_arrow_compute_test(scalar_temporal_test
+ REQUIRE_ALL_KERNELS
+ SOURCES
+ scalar_temporal_test.cc
+ test_util.cc)
add_arrow_compute_test(scalar_math_test
+ REQUIRE_ALL_KERNELS
SOURCES
scalar_arithmetic_test.cc
scalar_compare_test.cc
@@ -38,6 +49,7 @@ add_arrow_compute_test(scalar_math_test
test_util.cc)
add_arrow_compute_test(scalar_utility_test
+ REQUIRE_ALL_KERNELS
SOURCES
scalar_random_test.cc
scalar_set_lookup_test.cc
@@ -59,6 +71,7 @@ add_arrow_benchmark(scalar_temporal_benchmark PREFIX
"arrow-compute")
# Vector kernels
add_arrow_compute_test(vector_test
+ REQUIRE_ALL_KERNELS
SOURCES
vector_cumulative_ops_test.cc
vector_hash_test.cc
@@ -82,6 +95,7 @@ add_arrow_benchmark(vector_selection_benchmark PREFIX
"arrow-compute")
# Aggregates
add_arrow_compute_test(aggregate_test
+ REQUIRE_ALL_KERNELS
SOURCES
aggregate_test.cc
hash_aggregate_test.cc
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index 0021aa1108..249da4758e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -761,16 +761,6 @@ struct ArithmeticFloatingPointFunction : public
ArithmeticFunction {
}
};
-// A scalar kernel that ignores (assumed all-null) inputs and returns null.
-Status NullToNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult*
out) {
- return Status::OK();
-}
-
-void AddNullExec(ScalarFunction* func) {
- std::vector<InputType> input_types(func->arity().num_args,
InputType(Type::NA));
- DCHECK_OK(func->AddKernel(std::move(input_types), OutputType(null()),
NullToNullExec));
-}
-
template <typename Op, typename FunctionImpl = ArithmeticFunction>
std::shared_ptr<ScalarFunction> MakeArithmeticFunction(std::string name,
FunctionDoc doc) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_round.cc
b/cpp/src/arrow/compute/kernels/scalar_round.cc
index 41961ad50e..fc2cb5b8a6 100644
--- a/cpp/src/arrow/compute/kernels/scalar_round.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_round.cc
@@ -771,114 +771,6 @@ struct Trunc {
}
};
-// Generate a kernel given a bitwise arithmetic functor. Assumes the
-// functor treats all integer types of equal width identically
-template <template <typename... Args> class KernelGenerator, typename Op>
-ArrayKernelExec TypeAgnosticBitWiseExecFromOp(detail::GetTypeId get_id) {
- switch (get_id.id) {
- case Type::INT8:
- case Type::UINT8:
- return KernelGenerator<UInt8Type, UInt8Type, Op>::Exec;
- case Type::INT16:
- case Type::UINT16:
- return KernelGenerator<UInt16Type, UInt16Type, Op>::Exec;
- case Type::INT32:
- case Type::UINT32:
- return KernelGenerator<UInt32Type, UInt32Type, Op>::Exec;
- case Type::INT64:
- case Type::UINT64:
- return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec;
- default:
- DCHECK(false);
- return nullptr;
- }
-}
-
-template <template <typename... Args> class KernelGenerator, typename Op>
-ArrayKernelExec ShiftExecFromOp(detail::GetTypeId get_id) {
- switch (get_id.id) {
- case Type::INT8:
- return KernelGenerator<Int8Type, Int8Type, Op>::Exec;
- case Type::UINT8:
- return KernelGenerator<UInt8Type, UInt8Type, Op>::Exec;
- case Type::INT16:
- return KernelGenerator<Int16Type, Int16Type, Op>::Exec;
- case Type::UINT16:
- return KernelGenerator<UInt16Type, UInt16Type, Op>::Exec;
- case Type::INT32:
- return KernelGenerator<Int32Type, Int32Type, Op>::Exec;
- case Type::UINT32:
- return KernelGenerator<UInt32Type, UInt32Type, Op>::Exec;
- case Type::INT64:
- return KernelGenerator<Int64Type, Int64Type, Op>::Exec;
- case Type::UINT64:
- return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec;
- default:
- DCHECK(false);
- return nullptr;
- }
-}
-
-template <template <typename... Args> class KernelGenerator, typename Op>
-ArrayKernelExec GenerateArithmeticFloatingPoint(detail::GetTypeId get_id) {
- switch (get_id.id) {
- case Type::FLOAT:
- return KernelGenerator<FloatType, FloatType, Op>::Exec;
- case Type::DOUBLE:
- return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
- default:
- DCHECK(false);
- return nullptr;
- }
-}
-
-// resolve decimal binary operation output type per *casted* args
-template <typename OutputGetter>
-Result<TypeHolder> ResolveDecimalBinaryOperationOutput(
- const std::vector<TypeHolder>& types, OutputGetter&& getter) {
- // casted types should be same size decimals
- const auto& left_type = checked_cast<const DecimalType&>(*types[0]);
- const auto& right_type = checked_cast<const DecimalType&>(*types[1]);
- DCHECK_EQ(left_type.id(), right_type.id());
-
- int32_t precision, scale;
- std::tie(precision, scale) = getter(left_type.precision(), left_type.scale(),
- right_type.precision(),
right_type.scale());
- ARROW_ASSIGN_OR_RAISE(auto type, DecimalType::Make(left_type.id(),
precision, scale));
- return std::move(type);
-}
-
-// Generate a kernel given an arithmetic functor
-template <template <typename...> class KernelGenerator, typename OutType,
typename Op>
-ArrayKernelExec GenerateArithmeticWithFixedIntOutType(detail::GetTypeId
get_id) {
- switch (get_id.id) {
- case Type::INT8:
- return KernelGenerator<OutType, Int8Type, Op>::Exec;
- case Type::UINT8:
- return KernelGenerator<OutType, UInt8Type, Op>::Exec;
- case Type::INT16:
- return KernelGenerator<OutType, Int16Type, Op>::Exec;
- case Type::UINT16:
- return KernelGenerator<OutType, UInt16Type, Op>::Exec;
- case Type::INT32:
- return KernelGenerator<OutType, Int32Type, Op>::Exec;
- case Type::UINT32:
- return KernelGenerator<OutType, UInt32Type, Op>::Exec;
- case Type::INT64:
- case Type::TIMESTAMP:
- return KernelGenerator<OutType, Int64Type, Op>::Exec;
- case Type::UINT64:
- return KernelGenerator<OutType, UInt64Type, Op>::Exec;
- case Type::FLOAT:
- return KernelGenerator<FloatType, FloatType, Op>::Exec;
- case Type::DOUBLE:
- return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
- default:
- DCHECK(false);
- return nullptr;
- }
-}
-
struct RoundFunction : ScalarFunction {
using ScalarFunction::ScalarFunction;
@@ -896,7 +788,7 @@ struct RoundFunction : ScalarFunction {
};
/// A RoundFunction that promotes only decimal arguments to double.
-struct ArithmeticDecimalToFloatingPointFunction : public RoundFunction {
+struct RoundDecimalToFloatingPointFunction : public RoundFunction {
using RoundFunction::RoundFunction;
Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const
override {
@@ -970,64 +862,6 @@ struct RoundFloatingPointFunction : public RoundFunction {
}
};
-// A scalar kernel that ignores (assumed all-null) inputs and returns null.
-Status NullToNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult*
out) {
- return Status::OK();
-}
-
-void AddNullExec(ScalarFunction* func) {
- std::vector<InputType> input_types(func->arity().num_args,
InputType(Type::NA));
- DCHECK_OK(func->AddKernel(std::move(input_types), OutputType(null()),
NullToNullExec));
-}
-
-template <typename Op>
-std::shared_ptr<ScalarFunction> MakeUnaryRoundFunction(std::string name,
- FunctionDoc doc) {
- auto func = std::make_shared<RoundFunction>(name, Arity::Unary(),
std::move(doc));
- for (const auto& ty : NumericTypes()) {
- auto exec = ArithmeticExecFromOp<ScalarUnary, Op>(ty);
- DCHECK_OK(func->AddKernel({ty}, ty, exec));
- }
- AddNullExec(func.get());
- return func;
-}
-
-// Like MakeUnaryRoundFunction, but for unary arithmetic ops with a fixed
-// output type for integral inputs.
-template <typename Op, typename IntOutType>
-std::shared_ptr<ScalarFunction> MakeUnaryRoundFunctionWithFixedIntOutType(
- std::string name, FunctionDoc doc) {
- auto int_out_ty = TypeTraits<IntOutType>::type_singleton();
- auto func = std::make_shared<RoundFunction>(name, Arity::Unary(),
std::move(doc));
- for (const auto& ty : NumericTypes()) {
- auto out_ty = arrow::is_floating(ty->id()) ? ty : int_out_ty;
- auto exec = GenerateArithmeticWithFixedIntOutType<ScalarUnary, IntOutType,
Op>(ty);
- DCHECK_OK(func->AddKernel({ty}, out_ty, exec));
- }
- {
- auto exec = ScalarUnary<Int64Type, Decimal128Type, Op>::Exec;
- DCHECK_OK(func->AddKernel({InputType(Type::DECIMAL128)}, int64(), exec));
- exec = ScalarUnary<Int64Type, Decimal256Type, Op>::Exec;
- DCHECK_OK(func->AddKernel({InputType(Type::DECIMAL256)}, int64(), exec));
- }
- AddNullExec(func.get());
- return func;
-}
-
-// Like MakeUnaryRoundFunction, but for arithmetic ops that need to run
-// only on non-null output.
-template <typename Op>
-std::shared_ptr<ScalarFunction> MakeUnaryRoundFunctionNotNull(std::string name,
- FunctionDoc doc)
{
- auto func = std::make_shared<RoundFunction>(name, Arity::Unary(),
std::move(doc));
- for (const auto& ty : NumericTypes()) {
- auto exec = ArithmeticExecFromOp<ScalarUnaryNotNull, Op>(ty);
- DCHECK_OK(func->AddKernel({ty}, ty, exec));
- }
- AddNullExec(func.get());
- return func;
-}
-
#define ROUND_CASE(MODE)
\
case RoundMode::MODE: {
\
using Op = OpImpl<Type, RoundMode::MODE>;
\
@@ -1097,8 +931,8 @@ struct RoundBinaryKernel {
};
#undef ROUND_BINARY_CASE
-// Like MakeUnaryRoundFunction, but for unary rounding functions that control
-// kernel dispatch based on RoundMode, only on non-null output.
+// For unary rounding functions that control kernel dispatch based on
RoundMode, only on
+// non-null output.
template <template <typename, RoundMode, typename...> class Op, typename
OptionsType>
std::shared_ptr<ScalarFunction> MakeUnaryRoundFunction(std::string name,
FunctionDoc doc) {
@@ -1169,46 +1003,6 @@ std::shared_ptr<ScalarFunction>
MakeBinaryRoundFunction(const std::string& name,
return func;
}
-// Like MakeUnaryRoundFunction, but for signed arithmetic ops that need to run
-// only on non-null output.
-template <typename Op>
-std::shared_ptr<ScalarFunction>
MakeUnarySignedRoundFunctionNotNull(std::string name,
-
FunctionDoc doc) {
- auto func = std::make_shared<RoundFunction>(name, Arity::Unary(),
std::move(doc));
- for (const auto& ty : NumericTypes()) {
- if (!arrow::is_unsigned_integer(ty->id())) {
- auto exec = ArithmeticExecFromOp<ScalarUnaryNotNull, Op>(ty);
- DCHECK_OK(func->AddKernel({ty}, ty, exec));
- }
- }
- AddNullExec(func.get());
- return func;
-}
-
-template <typename Op>
-std::shared_ptr<ScalarFunction> MakeBitWiseFunctionNotNull(std::string name,
- FunctionDoc doc) {
- auto func = std::make_shared<RoundFunction>(name, Arity::Binary(),
std::move(doc));
- for (const auto& ty : IntTypes()) {
- auto exec = TypeAgnosticBitWiseExecFromOp<ScalarBinaryNotNullEqualTypes,
Op>(ty);
- DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
- }
- AddNullExec(func.get());
- return func;
-}
-
-template <typename Op>
-std::shared_ptr<ScalarFunction> MakeShiftFunctionNotNull(std::string name,
- FunctionDoc doc) {
- auto func = std::make_shared<RoundFunction>(name, Arity::Binary(),
std::move(doc));
- for (const auto& ty : IntTypes()) {
- auto exec = ShiftExecFromOp<ScalarBinaryNotNullEqualTypes, Op>(ty);
- DCHECK_OK(func->AddKernel({ty, ty}, ty, exec));
- }
- AddNullExec(func.get());
- return func;
-}
-
template <typename Op, typename FunctionImpl = RoundFloatingPointFunction>
std::shared_ptr<ScalarFunction>
MakeUnaryRoundFunctionFloatingPoint(std::string name,
FunctionDoc doc) {
@@ -1221,19 +1015,6 @@ std::shared_ptr<ScalarFunction>
MakeUnaryRoundFunctionFloatingPoint(std::string
return func;
}
-template <typename Op>
-std::shared_ptr<ScalarFunction> MakeUnaryRoundFunctionFloatingPointNotNull(
- std::string name, FunctionDoc doc) {
- auto func =
- std::make_shared<RoundFloatingPointFunction>(name, Arity::Unary(),
std::move(doc));
- for (const auto& ty : FloatingPointTypes()) {
- auto exec = GenerateArithmeticFloatingPoint<ScalarUnaryNotNull, Op>(ty);
- DCHECK_OK(func->AddKernel({ty}, ty, exec));
- }
- AddNullExec(func.get());
- return func;
-}
-
const FunctionDoc floor_doc{
"Round down to the nearest integer",
("Compute the largest integer value not greater in magnitude than `x`."),
diff --git a/cpp/src/arrow/compute/kernels/util_internal.cc
b/cpp/src/arrow/compute/kernels/util_internal.cc
index 4293597129..50d31362f1 100644
--- a/cpp/src/arrow/compute/kernels/util_internal.cc
+++ b/cpp/src/arrow/compute/kernels/util_internal.cc
@@ -20,6 +20,7 @@
#include <cstdint>
#include "arrow/array/data.h"
+#include "arrow/compute/function.h"
#include "arrow/type.h"
#include "arrow/util/checked_cast.h"
@@ -30,6 +31,14 @@ using internal::checked_cast;
namespace compute {
namespace internal {
+namespace {
+
+Status NullToNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult*
out) {
+ return Status::OK();
+}
+
+} // namespace
+
ExecValue GetExecValue(const Datum& value) {
ExecValue result;
if (value.is_array()) {
@@ -49,6 +58,11 @@ int64_t GetTrueCount(const ArraySpan& mask) {
}
}
+void AddNullExec(ScalarFunction* func) {
+ std::vector<InputType> input_types(func->arity().num_args,
InputType(Type::NA));
+ DCHECK_OK(func->AddKernel(std::move(input_types), OutputType(null()),
NullToNullExec));
+}
+
} // namespace internal
} // namespace compute
} // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/util_internal.h
b/cpp/src/arrow/compute/kernels/util_internal.h
index 5e283b5618..1fe139c117 100644
--- a/cpp/src/arrow/compute/kernels/util_internal.h
+++ b/cpp/src/arrow/compute/kernels/util_internal.h
@@ -35,6 +35,9 @@ using internal::CountAndSetBits;
using internal::CountSetBits;
namespace compute {
+
+class ScalarFunction;
+
namespace internal {
template <typename T>
@@ -136,6 +139,22 @@ ExecValue GetExecValue(const Datum& value);
int64_t GetTrueCount(const ArraySpan& mask);
+template <template <typename... Args> class KernelGenerator, typename Op>
+ArrayKernelExec GenerateArithmeticFloatingPoint(detail::GetTypeId get_id) {
+ switch (get_id.id) {
+ case Type::FLOAT:
+ return KernelGenerator<FloatType, FloatType, Op>::Exec;
+ case Type::DOUBLE:
+ return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
+ default:
+ DCHECK(false);
+ return nullptr;
+ }
+}
+
+// A scalar kernel that ignores (assumed all-null) inputs and returns null.
+void AddNullExec(ScalarFunction* func);
+
} // namespace internal
} // namespace compute
} // namespace arrow
diff --git a/cpp/src/arrow/compute/registry.cc
b/cpp/src/arrow/compute/registry.cc
index 9f95290c12..f91ecfdc2f 100644
--- a/cpp/src/arrow/compute/registry.cc
+++ b/cpp/src/arrow/compute/registry.cc
@@ -27,6 +27,7 @@
#include "arrow/compute/function_internal.h"
#include "arrow/compute/registry_internal.h"
#include "arrow/status.h"
+#include "arrow/util/config.h" // For ARROW_COMPUTE
#include "arrow/util/logging.h"
namespace arrow {
@@ -272,10 +273,21 @@ namespace internal {
static std::unique_ptr<FunctionRegistry> CreateBuiltInRegistry() {
auto registry = FunctionRegistry::Make();
+ // Register core kernels
+ RegisterScalarCast(registry.get());
+ RegisterVectorHash(registry.get());
+ RegisterVectorSelection(registry.get());
+
+ RegisterScalarOptions(registry.get());
+ RegisterVectorOptions(registry.get());
+ RegisterAggregateOptions(registry.get());
+
+#ifdef ARROW_COMPUTE
+ // Register additional kernels
+
// Scalar functions
RegisterScalarArithmetic(registry.get());
RegisterScalarBoolean(registry.get());
- RegisterScalarCast(registry.get());
RegisterScalarComparison(registry.get());
RegisterScalarIfElse(registry.get());
RegisterScalarNested(registry.get());
@@ -288,21 +300,15 @@ static std::unique_ptr<FunctionRegistry>
CreateBuiltInRegistry() {
RegisterScalarTemporalUnary(registry.get());
RegisterScalarValidity(registry.get());
- RegisterScalarOptions(registry.get());
-
// Vector functions
RegisterVectorArraySort(registry.get());
RegisterVectorCumulativeSum(registry.get());
- RegisterVectorHash(registry.get());
RegisterVectorNested(registry.get());
RegisterVectorRank(registry.get());
RegisterVectorReplace(registry.get());
RegisterVectorSelectK(registry.get());
- RegisterVectorSelection(registry.get());
RegisterVectorSort(registry.get());
- RegisterVectorOptions(registry.get());
-
// Aggregate functions
RegisterHashAggregateBasic(registry.get());
RegisterScalarAggregateBasic(registry.get());
@@ -310,8 +316,7 @@ static std::unique_ptr<FunctionRegistry>
CreateBuiltInRegistry() {
RegisterScalarAggregateQuantile(registry.get());
RegisterScalarAggregateTDigest(registry.get());
RegisterScalarAggregateVariance(registry.get());
-
- RegisterAggregateOptions(registry.get());
+#endif
return registry;
}
diff --git a/cpp/src/arrow/csv/CMakeLists.txt b/cpp/src/arrow/csv/CMakeLists.txt
index 00c00a87f5..a112ca423e 100644
--- a/cpp/src/arrow/csv/CMakeLists.txt
+++ b/cpp/src/arrow/csv/CMakeLists.txt
@@ -21,12 +21,8 @@ set(CSV_TEST_SRCS
column_decoder_test.cc
converter_test.cc
parser_test.cc
- reader_test.cc)
-
-# Writer depends on compute's cast functionality
-if(ARROW_COMPUTE)
- list(APPEND CSV_TEST_SRCS writer_test.cc)
-endif()
+ reader_test.cc
+ writer_test.cc)
add_arrow_test(csv-test SOURCES ${CSV_TEST_SRCS})
diff --git a/cpp/src/arrow/csv/api.h b/cpp/src/arrow/csv/api.h
index 9f83efab29..4af1835cd7 100644
--- a/cpp/src/arrow/csv/api.h
+++ b/cpp/src/arrow/csv/api.h
@@ -19,9 +19,4 @@
#include "arrow/csv/options.h"
#include "arrow/csv/reader.h"
-
-// The writer depends on compute module for casting.
-#include "arrow/util/config.h" // for ARROW_COMPUTE definition
-#ifdef ARROW_COMPUTE
#include "arrow/csv/writer.h"
-#endif
diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc
index 9abff22950..20de827ced 100644
--- a/cpp/src/arrow/public_api_test.cc
+++ b/cpp/src/arrow/public_api_test.cc
@@ -22,13 +22,10 @@
// Include various "api.h" entrypoints and check they don't leak internal
symbols
-#include "arrow/api.h" // IWYU pragma: keep
-#include "arrow/io/api.h" // IWYU pragma: keep
-#include "arrow/ipc/api.h" // IWYU pragma: keep
-
-#ifdef ARROW_COMPUTE
+#include "arrow/api.h" // IWYU pragma: keep
#include "arrow/compute/api.h" // IWYU pragma: keep
-#endif
+#include "arrow/io/api.h" // IWYU pragma: keep
+#include "arrow/ipc/api.h" // IWYU pragma: keep
#ifdef ARROW_CSV
#include "arrow/csv/api.h" // IWYU pragma: keep
diff --git a/cpp/src/arrow/testing/generator.cc
b/cpp/src/arrow/testing/generator.cc
index ad0984e408..fc90a5cd88 100644
--- a/cpp/src/arrow/testing/generator.cc
+++ b/cpp/src/arrow/testing/generator.cc
@@ -289,7 +289,6 @@ class DataGeneratorImpl : public DataGenerator,
return batches;
}
-#ifdef ARROW_COMPUTE
Result<::arrow::compute::ExecBatch> ExecBatch(int64_t num_rows) override {
std::vector<Datum> values;
values.reserve(generators_.size());
@@ -318,7 +317,6 @@ class DataGeneratorImpl : public DataGenerator,
"exec_batch_source",
::arrow::compute::ExecBatchSourceNodeOptions(schema_,
std::move(batches)));
}
-#endif
Result<std::shared_ptr<::arrow::Table>> Table(int64_t rows_per_chunk,
int num_chunks = 1) override {
@@ -365,7 +363,7 @@ class GTestDataGeneratorImpl : public GTestDataGenerator {
target_->RecordBatches(rows_per_batch, num_batches));
return batches;
}
-#ifdef ARROW_COMPUTE
+
::arrow::compute::ExecBatch ExecBatch(int64_t num_rows) override {
EXPECT_OK_AND_ASSIGN(auto batch, target_->ExecBatch(num_rows));
return batch;
@@ -381,7 +379,7 @@ class GTestDataGeneratorImpl : public GTestDataGenerator {
target_->SourceNode(rows_per_batch, num_batches));
return source_node;
}
-#endif
+
std::shared_ptr<::arrow::Table> Table(int64_t rows_per_chunk, int
num_chunks) override {
EXPECT_OK_AND_ASSIGN(auto table, target_->Table(rows_per_chunk,
num_chunks));
return table;
diff --git a/cpp/src/arrow/testing/generator.h
b/cpp/src/arrow/testing/generator.h
index 9f02df4505..ecfc4ee640 100644
--- a/cpp/src/arrow/testing/generator.h
+++ b/cpp/src/arrow/testing/generator.h
@@ -252,13 +252,13 @@ class ARROW_TESTING_EXPORT GTestDataGenerator {
virtual std::shared_ptr<::arrow::RecordBatch> RecordBatch(int64_t num_rows)
= 0;
virtual std::vector<std::shared_ptr<::arrow::RecordBatch>> RecordBatches(
int64_t rows_per_batch, int num_batches) = 0;
-#ifdef ARROW_COMPUTE
+
virtual ::arrow::compute::ExecBatch ExecBatch(int64_t num_rows) = 0;
virtual std::vector<::arrow::compute::ExecBatch> ExecBatches(int64_t
rows_per_batch,
int
num_batches) = 0;
virtual ::arrow::compute::Declaration SourceNode(int64_t rows_per_batch,
int num_batches) = 0;
-#endif
+
virtual std::shared_ptr<::arrow::Table> Table(int64_t rows_per_chunk,
int num_chunks = 1) = 0;
virtual std::shared_ptr<::arrow::Schema> Schema() = 0;
@@ -270,13 +270,13 @@ class ARROW_TESTING_EXPORT DataGenerator {
virtual Result<std::shared_ptr<::arrow::RecordBatch>> RecordBatch(int64_t
num_rows) = 0;
virtual Result<std::vector<std::shared_ptr<::arrow::RecordBatch>>>
RecordBatches(
int64_t rows_per_batch, int num_batches) = 0;
-#ifdef ARROW_COMPUTE
+
virtual Result<::arrow::compute::ExecBatch> ExecBatch(int64_t num_rows) = 0;
virtual Result<std::vector<::arrow::compute::ExecBatch>> ExecBatches(
int64_t rows_per_batch, int num_batches) = 0;
virtual Result<::arrow::compute::Declaration> SourceNode(int64_t
rows_per_batch,
int num_batches) =
0;
-#endif
+
virtual Result<std::shared_ptr<::arrow::Table>> Table(int64_t rows_per_chunk,
int num_chunks = 1) =
0;
virtual std::shared_ptr<::arrow::Schema> Schema() = 0;