[
https://issues.apache.org/jira/browse/ARROW-15571?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17487559#comment-17487559
]
Yaron Gvili commented on ARROW-15571:
-------------------------------------
Running the benchmark code below on my CPU "Intel(R) Core(TM) i7-10875H CPU @
2.30GHz" showed the minimum/maximum operations in my PR are faster when
operating on a single array/column. For an array of size 1M the speed-up factor
was ~1.5 and even higher for shorter arrays.
{code:java}
#include <iostream>
#include <chrono>
#include <arrow/api.h>
#include <arrow/compare.h>
#include <arrow/datum.h>
#include <arrow/compute/api.h>
uint64_t get_time() {
return std::chrono::high_resolution_clock::now().time_since_epoch().count();
}
arrow::Status RunMain(int argc, char **argv) {
constexpr int n = 1000000;
arrow::DoubleBuilder builder;
builder.Reserve(n);
for (int i=0; i<n; i++) {
builder.UnsafeAppend(1.0*i/n);
}
auto maybe_array = builder.Finish();
if (!maybe_array.ok()) {
return arrow::Status::Invalid("array building failed");
}
std::shared_ptr<arrow::Array> array = *maybe_array;
std::shared_ptr<arrow::Scalar> low_threshold =
std::make_shared<arrow::DoubleScalar>(0.1);
std::shared_ptr<arrow::Scalar> high_threshold =
std::make_shared<arrow::DoubleScalar>(0.8);
uint64_t t;
t = get_time();
ARROW_ASSIGN_OR_RAISE(arrow::Datum max0,
arrow::compute::CallFunction("max_element_wise", {array, low_threshold}));
ARROW_ASSIGN_OR_RAISE(arrow::Datum min0,
arrow::compute::CallFunction("min_element_wise", {max0, high_threshold}));
uint64_t t0 = get_time() - t;
t = get_time();
ARROW_ASSIGN_OR_RAISE(arrow::Datum max1,
arrow::compute::CallFunction("maximum", {array, low_threshold}));
ARROW_ASSIGN_OR_RAISE(arrow::Datum min1,
arrow::compute::CallFunction("minimum", {max1, high_threshold}));
uint64_t t1 = get_time() - t;
if (!arrow::ArrayEquals(*min0.make_array(), *min1.make_array())) {
return arrow::Status::Invalid("result arrays are not equal");
}
std::cout << "timing: " << t0 << " vs " << t1 << " (speed-up factor " << (1.0
* t0 / t1) << ")" << std::endl;
return arrow::Status::OK();
}
int main(int argc, char **argv) {
arrow::Status st = RunMain(argc, argv);
if (!st.ok()) {
std::cerr << st << std::endl;
return 1;
}
return 0;
} {code}
> [C++] Add min/max/sqrt scalar kernels to execution engine
> ---------------------------------------------------------
>
> Key: ARROW-15571
> URL: https://issues.apache.org/jira/browse/ARROW-15571
> Project: Apache Arrow
> Issue Type: Improvement
> Components: C++
> Reporter: Yaron Gvili
> Priority: Major
> Labels: kernel, pull-request-available
> Time Spent: 40m
> Remaining Estimate: 0h
>
> The list of execution engine's scalar kernels currently available in
> `cpp/src/arrow/compute/kernels/scalar_arithmetic.cc` does not cover the
> common minimum, maximum, and square-root functions.
--
This message was sent by Atlassian Jira
(v8.20.1#820001)