[ 
https://issues.apache.org/jira/browse/ARROW-15571?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17487559#comment-17487559
 ] 

Yaron Gvili commented on ARROW-15571:
-------------------------------------

Running the benchmark code below on my CPU "Intel(R) Core(TM) i7-10875H CPU @ 
2.30GHz" showed the minimum/maximum operations in my PR are faster when 
operating on a single array/column. For an array of size 1M the speed-up factor 
was ~1.5 and even higher for shorter arrays.

 
{code:java}
#include <iostream>
#include <chrono>

#include <arrow/api.h>
#include <arrow/compare.h>
#include <arrow/datum.h>
#include <arrow/compute/api.h>

uint64_t get_time() {
  return std::chrono::high_resolution_clock::now().time_since_epoch().count();
}

arrow::Status RunMain(int argc, char **argv) {
  constexpr int n = 1000000;
  arrow::DoubleBuilder builder;
  builder.Reserve(n);
  for (int i=0; i<n; i++) {
    builder.UnsafeAppend(1.0*i/n);
  }
  auto maybe_array = builder.Finish();
  if (!maybe_array.ok()) {
    return arrow::Status::Invalid("array building failed");
  }
  std::shared_ptr<arrow::Array> array = *maybe_array;
  std::shared_ptr<arrow::Scalar> low_threshold = 
std::make_shared<arrow::DoubleScalar>(0.1);
  std::shared_ptr<arrow::Scalar> high_threshold = 
std::make_shared<arrow::DoubleScalar>(0.8);

  uint64_t t;

  t = get_time();
  ARROW_ASSIGN_OR_RAISE(arrow::Datum max0, 
arrow::compute::CallFunction("max_element_wise", {array, low_threshold}));
  ARROW_ASSIGN_OR_RAISE(arrow::Datum min0, 
arrow::compute::CallFunction("min_element_wise", {max0, high_threshold}));
  uint64_t t0 = get_time() - t;

  t = get_time();
  ARROW_ASSIGN_OR_RAISE(arrow::Datum max1, 
arrow::compute::CallFunction("maximum", {array, low_threshold}));
  ARROW_ASSIGN_OR_RAISE(arrow::Datum min1, 
arrow::compute::CallFunction("minimum", {max1, high_threshold}));
  uint64_t t1 = get_time() - t;

  if (!arrow::ArrayEquals(*min0.make_array(), *min1.make_array())) {
    return arrow::Status::Invalid("result arrays are not equal");
  }
  std::cout << "timing: " << t0 << " vs " << t1 << " (speed-up factor " << (1.0 
* t0 / t1) << ")" << std::endl;

  return arrow::Status::OK();
} 
  
int main(int argc, char **argv) {
  arrow::Status st = RunMain(argc, argv);
  if (!st.ok()) {
    std::cerr << st << std::endl;
    return 1;
  }
  return 0;
} {code}
 

 

> [C++] Add min/max/sqrt scalar kernels to execution engine
> ---------------------------------------------------------
>
>                 Key: ARROW-15571
>                 URL: https://issues.apache.org/jira/browse/ARROW-15571
>             Project: Apache Arrow
>          Issue Type: Improvement
>          Components: C++
>            Reporter: Yaron Gvili
>            Priority: Major
>              Labels: kernel, pull-request-available
>          Time Spent: 40m
>  Remaining Estimate: 0h
>
> The list of execution engine's scalar kernels currently available in 
> `cpp/src/arrow/compute/kernels/scalar_arithmetic.cc` does not cover the 
> common minimum, maximum, and square-root functions.



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

Reply via email to