This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 1214083f7e ARROW-17135: [C++] Reduce code size in
compute/kernels/scalar_compare.cc (#13654)
1214083f7e is described below
commit 1214083f7ece4e1797b7f3cdecfec1c2cfa8bf89
Author: Wes McKinney <[email protected]>
AuthorDate: Wed Jul 20 13:12:23 2022 -0700
ARROW-17135: [C++] Reduce code size in compute/kernels/scalar_compare.cc
(#13654)
This "leaner" implementation reduces the generated code size of this C++
file from 2307768 bytes to 1192608 bytes in gcc 10.3.0. The benchmarks are also
faster (on my avx2 laptop):
before
```
-----------------------------------------------------------------------------------------------
Benchmark Time CPU
Iterations UserCounters...
-----------------------------------------------------------------------------------------------
GreaterArrayArrayInt64/32768/10000 32.1 us 32.1 us
21533 items_per_second=1020.16M/s null_percent=0.01 size=32.768k
GreaterArrayArrayInt64/32768/100 32.1 us 32.1 us
21603 items_per_second=1019.27M/s null_percent=1 size=32.768k
GreaterArrayArrayInt64/32768/10 32.1 us 32.1 us
21479 items_per_second=1020.82M/s null_percent=10 size=32.768k
GreaterArrayArrayInt64/32768/2 32.0 us 32.0 us
21468 items_per_second=1023.12M/s null_percent=50 size=32.768k
GreaterArrayArrayInt64/32768/1 32.3 us 32.3 us
21720 items_per_second=1013.44M/s null_percent=100 size=32.768k
GreaterArrayArrayInt64/32768/0 31.6 us 31.6 us
21828 items_per_second=1036.94M/s null_percent=0 size=32.768k
GreaterArrayScalarInt64/32768/10000 20.8 us 20.8 us
33461 items_per_second=1.57238G/s null_percent=0.01 size=32.768k
GreaterArrayScalarInt64/32768/100 20.9 us 20.9 us
33625 items_per_second=1.56611G/s null_percent=1 size=32.768k
GreaterArrayScalarInt64/32768/10 20.8 us 20.8 us
33553 items_per_second=1.57338G/s null_percent=10 size=32.768k
GreaterArrayScalarInt64/32768/2 20.9 us 20.9 us
33348 items_per_second=1.5687G/s null_percent=50 size=32.768k
GreaterArrayScalarInt64/32768/1 20.9 us 20.9 us
33419 items_per_second=1.56879G/s null_percent=100 size=32.768k
GreaterArrayScalarInt64/32768/0 20.5 us 20.5 us
34116 items_per_second=1.59837G/s null_percent=0 size=32.768k
```
after
```
-----------------------------------------------------------------------------------------------
Benchmark Time CPU
Iterations UserCounters...
-----------------------------------------------------------------------------------------------
GreaterArrayArrayInt64/32768/10000 18.1 us 18.1 us
38751 items_per_second=1.81199G/s null_percent=0.01 size=32.768k
GreaterArrayArrayInt64/32768/100 17.5 us 17.5 us
39374 items_per_second=1.86821G/s null_percent=1 size=32.768k
GreaterArrayArrayInt64/32768/10 19.0 us 19.0 us
33941 items_per_second=1.72066G/s null_percent=10 size=32.768k
GreaterArrayArrayInt64/32768/2 18.0 us 18.0 us
39589 items_per_second=1.81817G/s null_percent=50 size=32.768k
GreaterArrayArrayInt64/32768/1 18.1 us 18.1 us
39061 items_per_second=1.80719G/s null_percent=100 size=32.768k
GreaterArrayArrayInt64/32768/0 17.5 us 17.5 us
39813 items_per_second=1.87031G/s null_percent=0 size=32.768k
GreaterArrayScalarInt64/32768/10000 16.3 us 16.3 us
42281 items_per_second=2.01525G/s null_percent=0.01 size=32.768k
GreaterArrayScalarInt64/32768/100 16.5 us 16.5 us
42266 items_per_second=1.98195G/s null_percent=1 size=32.768k
GreaterArrayScalarInt64/32768/10 16.5 us 16.5 us
41872 items_per_second=1.98615G/s null_percent=10 size=32.768k
GreaterArrayScalarInt64/32768/2 16.3 us 16.3 us
42130 items_per_second=2.00447G/s null_percent=50 size=32.768k
GreaterArrayScalarInt64/32768/1 16.2 us 16.2 us
42391 items_per_second=2.02296G/s null_percent=100 size=32.768k
GreaterArrayScalarInt64/32768/0 15.9 us 15.9 us
43498 items_per_second=2.0614G/s null_percent=0 size=32.768k
```
Authored-by: Wes McKinney <[email protected]>
Signed-off-by: Wes McKinney <[email protected]>
---
cpp/src/arrow/compute/kernels/codegen_internal.cc | 4 -
cpp/src/arrow/compute/kernels/codegen_internal.h | 33 +--
cpp/src/arrow/compute/kernels/scalar_arithmetic.cc | 8 +-
cpp/src/arrow/compute/kernels/scalar_compare.cc | 241 +++++++++++++++++----
cpp/src/arrow/util/bit_util.h | 10 +
cpp/tools/binary_symbol_explore.py | 1 +
6 files changed, 226 insertions(+), 71 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.cc
b/cpp/src/arrow/compute/kernels/codegen_internal.cc
index 66724727fd..7be51188e0 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.cc
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.cc
@@ -29,10 +29,6 @@ namespace arrow {
namespace compute {
namespace internal {
-Status ExecFail(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
- return Status::NotImplemented("This kernel is malformed");
-}
-
const std::vector<std::shared_ptr<DataType>>& ExampleParametricTypes() {
static DataTypeVector example_parametric_types = {
decimal128(12, 2),
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h
b/cpp/src/arrow/compute/kernels/codegen_internal.h
index f008314e8b..a6ede14176 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -964,8 +964,6 @@ struct FailFunctor<VectorKernel::ChunkedExec> {
}
};
-Status ExecFail(KernelContext* ctx, const ExecSpan& batch, ExecResult* out);
-
// GD for numeric types (integer and floating point)
template <template <typename...> class Generator, typename Type0,
typename KernelType = ArrayKernelExec, typename... Args>
@@ -1009,7 +1007,7 @@ ArrayKernelExec GenerateFloatingPoint(detail::GetTypeId
get_id) {
return Generator<Type0, DoubleType, Args...>::Exec;
default:
DCHECK(false);
- return ExecFail;
+ return nullptr;
}
}
@@ -1037,7 +1035,7 @@ ArrayKernelExec GenerateInteger(detail::GetTypeId get_id)
{
return Generator<Type0, UInt64Type, Args...>::Exec;
default:
DCHECK(false);
- return ExecFail;
+ return nullptr;
}
}
@@ -1068,7 +1066,7 @@ ArrayKernelExec GeneratePhysicalInteger(detail::GetTypeId
get_id) {
return Generator<Type0, UInt64Type, Args...>::Exec;
default:
DCHECK(false);
- return ExecFail;
+ return nullptr;
}
}
@@ -1104,8 +1102,9 @@ KernelType ArithmeticExecFromOp(detail::GetTypeId get_id)
{
}
}
-template <template <typename... Args> class Generator, typename... Args>
-ArrayKernelExec GeneratePhysicalNumeric(detail::GetTypeId get_id) {
+template <typename ReturnType, template <typename... Args> class Generator,
+ typename... Args>
+ReturnType GeneratePhysicalNumericGeneric(detail::GetTypeId get_id) {
switch (get_id.id) {
case Type::INT8:
return Generator<Int8Type, Args...>::Exec;
@@ -1135,9 +1134,13 @@ ArrayKernelExec
GeneratePhysicalNumeric(detail::GetTypeId get_id) {
return Generator<DoubleType, Args...>::Exec;
default:
DCHECK(false);
- return ExecFail;
+ return nullptr;
}
}
+template <template <typename... Args> class Generator, typename... Args>
+ArrayKernelExec GeneratePhysicalNumeric(detail::GetTypeId get_id) {
+ return GeneratePhysicalNumericGeneric<ArrayKernelExec, Generator,
Args...>(get_id);
+}
// Generate a kernel given a templated functor for decimal types
template <template <typename... Args> class Generator, typename... Args>
@@ -1149,7 +1152,7 @@ ArrayKernelExec
GenerateDecimalToDecimal(detail::GetTypeId get_id) {
return Generator<Decimal256Type, Args...>::Exec;
default:
DCHECK(false);
- return ExecFail;
+ return nullptr;
}
}
@@ -1169,7 +1172,7 @@ ArrayKernelExec GenerateSignedInteger(detail::GetTypeId
get_id) {
return Generator<Type0, Int64Type, Args...>::Exec;
default:
DCHECK(false);
- return ExecFail;
+ return nullptr;
}
}
@@ -1249,7 +1252,7 @@ ArrayKernelExec
GenerateVarBinaryToVarBinary(detail::GetTypeId get_id) {
return Generator<LargeStringType, Args...>::Exec;
default:
DCHECK(false);
- return ExecFail;
+ return nullptr;
}
}
@@ -1270,7 +1273,7 @@ ArrayKernelExec GenerateVarBinaryBase(detail::GetTypeId
get_id) {
return Generator<Type0, LargeBinaryType, Args...>::Exec;
default:
DCHECK(false);
- return ExecFail;
+ return nullptr;
}
}
@@ -1288,7 +1291,7 @@ ArrayKernelExec GenerateVarBinary(detail::GetTypeId
get_id) {
return Generator<Type0, LargeStringType, Args...>::Exec;
default:
DCHECK(false);
- return ExecFail;
+ return nullptr;
}
}
@@ -1312,7 +1315,7 @@ ArrayKernelExec GenerateTemporal(detail::GetTypeId
get_id) {
return Generator<Type0, TimestampType, Args...>::Exec;
default:
DCHECK(false);
- return ExecFail;
+ return nullptr;
}
}
@@ -1328,7 +1331,7 @@ ArrayKernelExec GenerateDecimal(detail::GetTypeId get_id)
{
return Generator<Type0, Decimal256Type, Args...>::Exec;
default:
DCHECK(false);
- return ExecFail;
+ return nullptr;
}
}
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index e513e07d49..984c3b5653 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -1021,7 +1021,7 @@ ArrayKernelExec
TypeAgnosticBitWiseExecFromOp(detail::GetTypeId get_id) {
return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec;
default:
DCHECK(false);
- return ExecFail;
+ return nullptr;
}
}
@@ -1046,7 +1046,7 @@ ArrayKernelExec ShiftExecFromOp(detail::GetTypeId get_id)
{
return KernelGenerator<UInt64Type, UInt64Type, Op>::Exec;
default:
DCHECK(false);
- return ExecFail;
+ return nullptr;
}
}
@@ -1059,7 +1059,7 @@ ArrayKernelExec
GenerateArithmeticFloatingPoint(detail::GetTypeId get_id) {
return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
default:
DCHECK(false);
- return ExecFail;
+ return nullptr;
}
}
@@ -1188,7 +1188,7 @@ ArrayKernelExec
GenerateArithmeticWithFixedIntOutType(detail::GetTypeId get_id)
return KernelGenerator<DoubleType, DoubleType, Op>::Exec;
default:
DCHECK(false);
- return ExecFail;
+ return nullptr;
}
}
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare.cc
b/cpp/src/arrow/compute/kernels/scalar_compare.cc
index 07778ca113..f071986dd2 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare.cc
@@ -158,11 +158,145 @@ struct Maximum {
// Implement Less, LessEqual by flipping arguments to Greater, GreaterEqual
-template <typename OutType, typename ArgType, typename Op>
-struct CompareTimestamps
- : public applicator::ScalarBinaryEqualTypes<OutType, ArgType, Op> {
- using Base = applicator::ScalarBinaryEqualTypes<OutType, ArgType, Op>;
+template <typename Type, typename Op>
+struct ComparePrimitiveArrayArray {
+ using T = typename Type::c_type;
+ static void Exec(const void* left_values_void, const void* right_values_void,
+ int64_t length, void* out_bitmap_void) {
+ const T* left_values = reinterpret_cast<const T*>(left_values_void);
+ const T* right_values = reinterpret_cast<const T*>(right_values_void);
+ uint8_t* out_bitmap = reinterpret_cast<uint8_t*>(out_bitmap_void);
+ static constexpr int kBatchSize = 32;
+ int64_t num_batches = length / kBatchSize;
+ uint32_t temp_output[kBatchSize];
+ for (int64_t j = 0; j < num_batches; ++j) {
+ for (int i = 0; i < kBatchSize; ++i) {
+ temp_output[i] = Op::template Call<bool, T, T>(nullptr, *left_values++,
+ *right_values++,
nullptr);
+ }
+ bit_util::PackBits<kBatchSize>(temp_output, out_bitmap);
+ out_bitmap += kBatchSize / 8;
+ }
+ int64_t bit_index = 0;
+ for (int64_t j = kBatchSize * num_batches; j < length; ++j) {
+ bit_util::SetBitTo(out_bitmap, bit_index++,
+ Op::template Call<bool, T, T>(nullptr, *left_values++,
+ *right_values++,
nullptr));
+ }
+ }
+};
+
+template <typename Type, typename Op>
+struct ComparePrimitiveArrayScalar {
+ using T = typename Type::c_type;
+ static void Exec(const void* left_values_void, const void* right_value_void,
+ int64_t length, void* out_bitmap_void) {
+ const T* left_values = reinterpret_cast<const T*>(left_values_void);
+ const T right_value = *reinterpret_cast<const T*>(right_value_void);
+ uint8_t* out_bitmap = reinterpret_cast<uint8_t*>(out_bitmap_void);
+ static constexpr int kBatchSize = 32;
+ int64_t num_batches = length / kBatchSize;
+ uint32_t temp_output[kBatchSize];
+ for (int64_t j = 0; j < num_batches; ++j) {
+ for (int i = 0; i < kBatchSize; ++i) {
+ temp_output[i] =
+ Op::template Call<bool, T, T>(nullptr, *left_values++,
right_value, nullptr);
+ }
+ bit_util::PackBits<kBatchSize>(temp_output, out_bitmap);
+ out_bitmap += kBatchSize / 8;
+ }
+ int64_t bit_index = 0;
+ for (int64_t j = kBatchSize * num_batches; j < length; ++j) {
+ bit_util::SetBitTo(
+ out_bitmap, bit_index++,
+ Op::template Call<bool, T, T>(nullptr, *left_values++, right_value,
nullptr));
+ }
+ }
+};
+
+template <typename Type, typename Op>
+struct ComparePrimitiveScalarArray {
+ using T = typename Type::c_type;
+ static void Exec(const void* left_value_void, const void* right_values_void,
+ int64_t length, void* out_bitmap_void) {
+ const T left_value = *reinterpret_cast<const T*>(left_value_void);
+ const T* right_values = reinterpret_cast<const T*>(right_values_void);
+ uint8_t* out_bitmap = reinterpret_cast<uint8_t*>(out_bitmap_void);
+ static constexpr int kBatchSize = 32;
+ int64_t num_batches = length / kBatchSize;
+ uint32_t temp_output[kBatchSize];
+ for (int64_t j = 0; j < num_batches; ++j) {
+ for (int i = 0; i < kBatchSize; ++i) {
+ temp_output[i] =
+ Op::template Call<bool, T, T>(nullptr, left_value,
*right_values++, nullptr);
+ }
+ bit_util::PackBits<kBatchSize>(temp_output, out_bitmap);
+ out_bitmap += kBatchSize / 8;
+ }
+ int64_t bit_index = 0;
+ for (int64_t j = kBatchSize * num_batches; j < length; ++j) {
+ bit_util::SetBitTo(
+ out_bitmap, bit_index++,
+ Op::template Call<bool, T, T>(nullptr, left_value, *right_values++,
nullptr));
+ }
+ }
+};
+
+using BinaryKernel = void (*)(const void*, const void*, int64_t, void*);
+struct CompareData : public KernelState {
+ BinaryKernel func_aa;
+ BinaryKernel func_sa;
+ BinaryKernel func_as;
+ CompareData(BinaryKernel func_aa, BinaryKernel func_sa, BinaryKernel func_as)
+ : func_aa(func_aa), func_sa(func_sa), func_as(func_as) {}
+};
+
+template <typename Type>
+struct CompareKernel {
+ using T = typename Type::c_type;
+
+ static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult*
out) {
+ const auto kernel = static_cast<const ScalarKernel*>(ctx->kernel());
+ DCHECK(kernel);
+ const auto kernel_data = checked_cast<const
CompareData*>(kernel->data.get());
+
+ ArraySpan* out_arr = out->array_span();
+
+ // TODO: implement path for offset not multiple of 8
+ const bool out_is_byte_aligned = out_arr->offset % 8 == 0;
+
+ std::shared_ptr<Buffer> out_buffer_tmp;
+ uint8_t* out_buffer;
+ if (out_is_byte_aligned) {
+ out_buffer = out_arr->buffers[1].data + out_arr->offset / 8;
+ } else {
+ ARROW_ASSIGN_OR_RAISE(out_buffer_tmp,
+
ctx->Allocate(bit_util::BytesForBits(batch.length)));
+ out_buffer = out_buffer_tmp->mutable_data();
+ }
+ if (batch[0].is_array() && batch[1].is_array()) {
+ kernel_data->func_aa(batch[0].array.GetValues<T>(1),
batch[1].array.GetValues<T>(1),
+ batch.length, out_buffer);
+ } else if (batch[1].is_scalar()) {
+ T value = UnboxScalar<Type>::Unbox(*batch[1].scalar);
+ kernel_data->func_as(batch[0].array.GetValues<T>(1), &value,
batch.length,
+ out_buffer);
+ } else {
+ T value = UnboxScalar<Type>::Unbox(*batch[0].scalar);
+ kernel_data->func_sa(&value, batch[1].array.GetValues<T>(1),
batch.length,
+ out_buffer);
+ }
+ if (!out_is_byte_aligned) {
+ ::arrow::internal::CopyBitmap(out_buffer, /*offset=*/0, batch.length,
+ out_arr->buffers[1].data, out_arr->offset);
+ }
+ return Status::OK();
+ }
+};
+
+template <typename Op>
+struct CompareTimestamps {
static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult*
out) {
const auto& lhs = checked_cast<const TimestampType&>(*batch[0].type());
const auto& rhs = checked_cast<const TimestampType&>(*batch[1].type());
@@ -171,22 +305,34 @@ struct CompareTimestamps
"Cannot compare timestamp with timezone to timestamp without
timezone, got: ",
lhs, " and ", rhs);
}
- return Base::Exec(ctx, batch, out);
+ return CompareKernel<Int64Type>::Exec(ctx, batch, out);
}
};
template <typename Op>
-void AddIntegerCompare(const std::shared_ptr<DataType>& ty, ScalarFunction*
func) {
- auto exec =
- GeneratePhysicalInteger<applicator::ScalarBinaryEqualTypes, BooleanType,
Op>(*ty);
- DCHECK_OK(func->AddKernel({ty, ty}, boolean(), std::move(exec)));
+ScalarKernel GetCompareKernel(InputType ty, Type::type compare_type,
+ ArrayKernelExec exec) {
+ ScalarKernel kernel;
+ kernel.signature = KernelSignature::Make({ty, ty}, boolean());
+ BinaryKernel func_aa =
+ GeneratePhysicalNumericGeneric<BinaryKernel, ComparePrimitiveArrayArray,
Op>(
+ compare_type);
+ BinaryKernel func_sa =
+ GeneratePhysicalNumericGeneric<BinaryKernel,
ComparePrimitiveScalarArray, Op>(
+ compare_type);
+ BinaryKernel func_as =
+ GeneratePhysicalNumericGeneric<BinaryKernel,
ComparePrimitiveArrayScalar, Op>(
+ compare_type);
+ kernel.data = std::make_shared<CompareData>(func_aa, func_sa, func_as);
+ kernel.exec = exec;
+ return kernel;
}
-template <typename InType, typename Op>
-void AddGenericCompare(const std::shared_ptr<DataType>& ty, ScalarFunction*
func) {
- DCHECK_OK(
- func->AddKernel({ty, ty}, boolean(),
- applicator::ScalarBinaryEqualTypes<BooleanType, InType,
Op>::Exec));
+template <typename Op>
+void AddPrimitiveCompare(const std::shared_ptr<DataType>& ty, ScalarFunction*
func) {
+ ArrayKernelExec exec = GeneratePhysicalNumeric<CompareKernel>(ty);
+ ScalarKernel kernel = GetCompareKernel<Op>(ty, ty->id(), exec);
+ DCHECK_OK(func->AddKernel(kernel));
}
struct CompareFunction : ScalarFunction {
@@ -247,45 +393,37 @@ std::shared_ptr<ScalarFunction>
MakeCompareFunction(std::string name, FunctionDo
{boolean(), boolean()}, boolean(),
applicator::ScalarBinary<BooleanType, BooleanType, BooleanType,
Op>::Exec));
- for (const std::shared_ptr<DataType>& ty : IntTypes()) {
- AddIntegerCompare<Op>(ty, func.get());
+ for (const std::shared_ptr<DataType>& ty : NumericTypes()) {
+ AddPrimitiveCompare<Op>(ty, func.get());
}
- AddIntegerCompare<Op>(date32(), func.get());
- AddIntegerCompare<Op>(date64(), func.get());
-
- AddGenericCompare<FloatType, Op>(float32(), func.get());
- AddGenericCompare<DoubleType, Op>(float64(), func.get());
+ AddPrimitiveCompare<Op>(date32(), func.get());
+ AddPrimitiveCompare<Op>(date64(), func.get());
// Add timestamp kernels
for (auto unit : TimeUnit::values()) {
InputType in_type(match::TimestampTypeUnit(unit));
- DCHECK_OK(func->AddKernel({in_type, in_type}, boolean(),
- CompareTimestamps<BooleanType, TimestampType,
Op>::Exec));
+ ScalarKernel kernel =
+ GetCompareKernel<Op>(in_type, Type::INT64,
CompareTimestamps<Op>::Exec);
+ DCHECK_OK(func->AddKernel(kernel));
}
// Duration
for (auto unit : TimeUnit::values()) {
InputType in_type(match::DurationTypeUnit(unit));
- auto exec =
- GeneratePhysicalInteger<applicator::ScalarBinaryEqualTypes,
BooleanType, Op>(
- int64());
- DCHECK_OK(func->AddKernel({in_type, in_type}, boolean(), std::move(exec)));
+ ArrayKernelExec exec = GeneratePhysicalNumeric<CompareKernel>(int64());
+ DCHECK_OK(func->AddKernel(GetCompareKernel<Op>(in_type, Type::INT64,
exec)));
}
// Time32 and Time64
for (auto unit : {TimeUnit::SECOND, TimeUnit::MILLI}) {
InputType in_type(match::Time32TypeUnit(unit));
- auto exec =
- GeneratePhysicalInteger<applicator::ScalarBinaryEqualTypes,
BooleanType, Op>(
- int32());
- DCHECK_OK(func->AddKernel({in_type, in_type}, boolean(), std::move(exec)));
+ ArrayKernelExec exec = GeneratePhysicalNumeric<CompareKernel>(int32());
+ DCHECK_OK(func->AddKernel(GetCompareKernel<Op>(in_type, Type::INT32,
exec)));
}
for (auto unit : {TimeUnit::MICRO, TimeUnit::NANO}) {
InputType in_type(match::Time64TypeUnit(unit));
- auto exec =
- GeneratePhysicalInteger<applicator::ScalarBinaryEqualTypes,
BooleanType, Op>(
- int64());
- DCHECK_OK(func->AddKernel({in_type, in_type}, boolean(), std::move(exec)));
+ ArrayKernelExec exec = GeneratePhysicalNumeric<CompareKernel>(int64());
+ DCHECK_OK(func->AddKernel(GetCompareKernel<Op>(in_type, Type::INT64,
exec)));
}
for (const std::shared_ptr<DataType>& ty : BaseBinaryTypes()) {
@@ -310,30 +448,37 @@ std::shared_ptr<ScalarFunction>
MakeCompareFunction(std::string name, FunctionDo
return func;
}
-struct FlippedData : public KernelState {
+struct FlippedData : public CompareData {
ArrayKernelExec unflipped_exec;
- explicit FlippedData(ArrayKernelExec unflipped_exec) :
unflipped_exec(unflipped_exec) {}
+ explicit FlippedData(ArrayKernelExec unflipped_exec, BinaryKernel func_aa =
nullptr,
+ BinaryKernel func_sa = nullptr, BinaryKernel func_as =
nullptr)
+ : CompareData{func_aa, func_sa, func_as}, unflipped_exec(unflipped_exec)
{}
};
-Status FlippedBinaryExec(KernelContext* ctx, const ExecSpan& span, ExecResult*
out) {
+Status FlippedCompare(KernelContext* ctx, const ExecSpan& span, ExecResult*
out) {
const auto kernel = static_cast<const ScalarKernel*>(ctx->kernel());
- DCHECK(kernel);
- const auto kernel_data = static_cast<const FlippedData*>(kernel->data.get());
-
+ const auto kernel_data = checked_cast<const
FlippedData*>(kernel->data.get());
ExecSpan flipped_span = span;
std::swap(flipped_span.values[0], flipped_span.values[1]);
return kernel_data->unflipped_exec(ctx, flipped_span, out);
}
-std::shared_ptr<ScalarFunction> MakeFlippedFunction(std::string name,
- const ScalarFunction& func,
- FunctionDoc doc) {
+std::shared_ptr<ScalarFunction> MakeFlippedCompare(std::string name,
+ const ScalarFunction& func,
+ FunctionDoc doc) {
auto flipped_func =
std::make_shared<CompareFunction>(name, Arity::Binary(), std::move(doc));
for (const ScalarKernel* kernel : func.kernels()) {
ScalarKernel flipped_kernel = *kernel;
- flipped_kernel.data = std::make_shared<FlippedData>(kernel->exec);
- flipped_kernel.exec = FlippedBinaryExec;
+ if (kernel->data) {
+ auto compare_data = checked_cast<const CompareData*>(kernel->data.get());
+ flipped_kernel.data =
+ std::make_shared<FlippedData>(kernel->exec, compare_data->func_aa,
+ compare_data->func_sa,
compare_data->func_as);
+ } else {
+ flipped_kernel.data = std::make_shared<FlippedData>(kernel->exec);
+ }
+ flipped_kernel.exec = FlippedCompare;
DCHECK_OK(flipped_func->AddKernel(std::move(flipped_kernel)));
}
return flipped_func;
@@ -750,8 +895,8 @@ void RegisterScalarComparison(FunctionRegistry* registry) {
auto greater_equal =
MakeCompareFunction<GreaterEqual>("greater_equal", greater_equal_doc);
- auto less = MakeFlippedFunction("less", *greater, less_doc);
- auto less_equal = MakeFlippedFunction("less_equal", *greater_equal,
less_equal_doc);
+ auto less = MakeFlippedCompare("less", *greater, less_doc);
+ auto less_equal = MakeFlippedCompare("less_equal", *greater_equal,
less_equal_doc);
DCHECK_OK(registry->AddFunction(std::move(less)));
DCHECK_OK(registry->AddFunction(std::move(less_equal)));
DCHECK_OK(registry->AddFunction(std::move(greater)));
diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h
index 8583e10b22..04ab07af1d 100644
--- a/cpp/src/arrow/util/bit_util.h
+++ b/cpp/src/arrow/util/bit_util.h
@@ -353,5 +353,15 @@ constexpr Word SpliceWord(int n, Word low, Word high) {
return (high & ~PrecedingWordBitmask<Word>(n)) | (low &
PrecedingWordBitmask<Word>(n));
}
+/// \brief Pack integers into a bitmap in batches of 8
+template <int batch_size>
+void PackBits(const uint32_t* values, uint8_t* out) {
+ for (int i = 0; i < batch_size / 8; ++i) {
+ *out++ = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 |
+ values[4] << 4 | values[5] << 5 | values[6] << 6 | values[7] <<
7);
+ values += 8;
+ }
+}
+
} // namespace bit_util
} // namespace arrow
diff --git a/cpp/tools/binary_symbol_explore.py
b/cpp/tools/binary_symbol_explore.py
index dfe81cea84..d7fa54da1b 100644
--- a/cpp/tools/binary_symbol_explore.py
+++ b/cpp/tools/binary_symbol_explore.py
@@ -114,6 +114,7 @@ if __name__ == '__main__':
'contender', 'diff'])
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = 150
+ print(diff[diff['diff'] < - 700])
print(diff[diff['diff'] > 700])
else:
# TODO