This is an automated email from the ASF dual-hosted git repository.

bkietz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new b7044a1  ARROW-7336: [C++][Compute] fix minmax kernel options
b7044a1 is described below

commit b7044a15b0008f85c0ba619ea60a1515dd657e1c
Author: Yuan Zhou <[email protected]>
AuthorDate: Thu Apr 9 12:11:03 2020 -0400

    ARROW-7336: [C++][Compute] fix minmax kernel options
    
    minmax kernel has MinMaxOptions but not used.
    This patch allows user to make use of this option
    
    Signed-off-by: Yuan Zhou <[email protected]>
    
    Closes #5981 from zhouyuan/wip_minmax_option
    
    Lead-authored-by: Yuan Zhou <[email protected]>
    Co-authored-by: Benjamin Kietzman <[email protected]>
    Signed-off-by: Benjamin Kietzman <[email protected]>
---
 cpp/src/arrow/compute/kernels/aggregate_test.cc | 90 +++++++++++++++++++++----
 cpp/src/arrow/compute/kernels/minmax.cc         | 22 +++++-
 cpp/src/arrow/compute/kernels/minmax.h          | 13 +++-
 3 files changed, 108 insertions(+), 17 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc 
b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index e139675..bb48861 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -40,6 +40,7 @@
 
 namespace arrow {
 
+using internal::checked_cast;
 using internal::checked_pointer_cast;
 
 namespace compute {
@@ -364,25 +365,54 @@ class TestNumericMinMaxKernel : public ComputeFixture, 
public TestBase {
   using ScalarType = typename Traits::ScalarType;
 
  public:
-  template <typename Min, typename Max>
-  void AssertMinMaxIs(std::string array_json, Min expected_min, Max 
expected_max,
+  void AssertMinMaxIs(const Datum& array, c_type expected_min, c_type 
expected_max,
                       const MinMaxOptions& options) {
-    auto array = ArrayFromJSON(Traits::type_singleton(), array_json);
-    Datum out, out_min, out_max;
-    ASSERT_OK(MinMax(&this->ctx_, options, *array, &out));
+    Datum out;
+    ASSERT_OK(MinMax(&this->ctx_, options, array, &out));
 
     ASSERT_TRUE(out.is_collection());
-    auto col = out.collection();
 
-    out_min = col[0];
+    Datum out_min = out.collection()[0];
     ASSERT_TRUE(out_min.is_scalar());
-    auto min = checked_pointer_cast<ScalarType>(out_min.scalar());
-    ASSERT_EQ(min->value, static_cast<c_type>(expected_min));
+    ASSERT_EQ(checked_cast<const ScalarType&>(*out_min.scalar()).value, 
expected_min);
 
-    out_max = col[1];
+    Datum out_max = out.collection()[1];
     ASSERT_TRUE(out_max.is_scalar());
-    auto max = checked_pointer_cast<ScalarType>(out_max.scalar());
-    ASSERT_EQ(max->value, static_cast<c_type>(expected_max));
+    ASSERT_EQ(checked_cast<const ScalarType&>(*out_max.scalar()).value, 
expected_max);
+  }
+
+  void AssertMinMaxIs(const std::string& json, c_type expected_min, c_type 
expected_max,
+                      const MinMaxOptions& options) {
+    auto array = ArrayFromJSON(Traits::type_singleton(), json);
+    AssertMinMaxIs(array, expected_min, expected_max, options);
+  }
+
+  void AssertMinMaxIs(const std::vector<std::string>& json, c_type 
expected_min,
+                      c_type expected_max, const MinMaxOptions& options) {
+    auto array = ChunkedArrayFromJSON(Traits::type_singleton(), json);
+    AssertMinMaxIs(array, expected_min, expected_max, options);
+  }
+
+  void AssertMinMaxIsNull(const Datum& array, const MinMaxOptions& options) {
+    Datum out;
+    ASSERT_OK(MinMax(&this->ctx_, options, array, &out));
+
+    ASSERT_TRUE(out.is_collection());
+    for (const auto& out : out.collection()) {
+      ASSERT_TRUE(out.is_scalar());
+      ASSERT_FALSE(out.scalar()->is_valid);
+    }
+  }
+
+  void AssertMinMaxIsNull(const std::string& json, const MinMaxOptions& 
options) {
+    auto array = ArrayFromJSON(Traits::type_singleton(), json);
+    AssertMinMaxIsNull(array, options);
+  }
+
+  void AssertMinMaxIsNull(const std::vector<std::string>& json,
+                          const MinMaxOptions& options) {
+    auto array = ChunkedArrayFromJSON(Traits::type_singleton(), json);
+    AssertMinMaxIsNull(array, options);
   }
 };
 
@@ -392,18 +422,54 @@ class TestFloatingMinMaxKernel : public 
TestNumericMinMaxKernel<ArrowType> {};
 TYPED_TEST_SUITE(TestNumericMinMaxKernel, IntegralArrowTypes);
 TYPED_TEST(TestNumericMinMaxKernel, Basics) {
   MinMaxOptions options;
+  std::vector<std::string> chunked_input1 = {"[5, 1, 2, 3, 4]", "[9, 1, null, 
3, 4]"};
+  std::vector<std::string> chunked_input2 = {"[5, null, 2, 3, 4]", "[9, 1, 2, 
3, 4]"};
+  std::vector<std::string> chunked_input3 = {"[5, 1, 2, 3, null]", "[9, 1, 
null, 3, 4]"};
+
   this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
   this->AssertMinMaxIs("[5, null, 2, 3, 4]", 2, 5, options);
+  this->AssertMinMaxIs(chunked_input1, 1, 9, options);
+  this->AssertMinMaxIs(chunked_input2, 1, 9, options);
+  this->AssertMinMaxIs(chunked_input3, 1, 9, options);
+
+  options = MinMaxOptions(MinMaxOptions::OUTPUT_NULL);
+  this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
+  // output null
+  this->AssertMinMaxIsNull("[5, null, 2, 3, 4]", options);
+  // output null
+  this->AssertMinMaxIsNull(chunked_input1, options);
+  this->AssertMinMaxIsNull(chunked_input2, options);
+  this->AssertMinMaxIsNull(chunked_input3, options);
 }
 
 TYPED_TEST_SUITE(TestFloatingMinMaxKernel, RealArrowTypes);
 TYPED_TEST(TestFloatingMinMaxKernel, Floats) {
   MinMaxOptions options;
+  std::vector<std::string> chunked_input1 = {"[5, 1, 2, 3, 4]", "[9, 1, null, 
3, 4]"};
+  std::vector<std::string> chunked_input2 = {"[5, null, 2, 3, 4]", "[9, 1, 2, 
3, 4]"};
+  std::vector<std::string> chunked_input3 = {"[5, 1, 2, 3, null]", "[9, 1, 
null, 3, 4]"};
+
+  this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
   this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
   this->AssertMinMaxIs("[5, null, 2, 3, 4]", 2, 5, options);
   this->AssertMinMaxIs("[5, Inf, 2, 3, 4]", 2.0, INFINITY, options);
   this->AssertMinMaxIs("[5, NaN, 2, 3, 4]", 2, 5, options);
   this->AssertMinMaxIs("[5, -Inf, 2, 3, 4]", -INFINITY, 5, options);
+  this->AssertMinMaxIs(chunked_input1, 1, 9, options);
+  this->AssertMinMaxIs(chunked_input2, 1, 9, options);
+  this->AssertMinMaxIs(chunked_input3, 1, 9, options);
+
+  options = MinMaxOptions(MinMaxOptions::OUTPUT_NULL);
+  this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
+  this->AssertMinMaxIs("[5, -Inf, 2, 3, 4]", -INFINITY, 5, options);
+  // output null
+  this->AssertMinMaxIsNull("[5, null, 2, 3, 4]", options);
+  // output null
+  this->AssertMinMaxIsNull("[5, -Inf, null, 3, 4]", options);
+  // output null
+  this->AssertMinMaxIsNull(chunked_input1, options);
+  this->AssertMinMaxIsNull(chunked_input2, options);
+  this->AssertMinMaxIsNull(chunked_input3, options);
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/minmax.cc 
b/cpp/src/arrow/compute/kernels/minmax.cc
index 0aff757..fb6f810 100644
--- a/cpp/src/arrow/compute/kernels/minmax.cc
+++ b/cpp/src/arrow/compute/kernels/minmax.cc
@@ -39,6 +39,7 @@ struct MinMaxState<ArrowType, enable_if_integer<ArrowType>> {
   using c_type = typename ArrowType::c_type;
 
   ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
     this->min = std::min(this->min, rhs.min);
     this->max = std::max(this->max, rhs.max);
     return *this;
@@ -51,6 +52,7 @@ struct MinMaxState<ArrowType, enable_if_integer<ArrowType>> {
 
   c_type min = std::numeric_limits<c_type>::max();
   c_type max = std::numeric_limits<c_type>::min();
+  bool has_nulls = false;
 };
 
 template <typename ArrowType>
@@ -59,6 +61,7 @@ struct MinMaxState<ArrowType, 
enable_if_floating_point<ArrowType>> {
   using c_type = typename ArrowType::c_type;
 
   ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
     this->min = std::fmin(this->min, rhs.min);
     this->max = std::fmax(this->max, rhs.max);
     return *this;
@@ -71,6 +74,7 @@ struct MinMaxState<ArrowType, 
enable_if_floating_point<ArrowType>> {
 
   c_type min = std::numeric_limits<c_type>::infinity();
   c_type max = -std::numeric_limits<c_type>::infinity();
+  bool has_nulls = false;
 };
 
 template <typename ArrowType>
@@ -84,10 +88,16 @@ class MinMaxAggregateFunction final
   Status Consume(const Array& array, StateType* state) const override {
     StateType local;
 
+    local.has_nulls = array.null_count() > 0;
+    if (local.has_nulls && options_.null_handling == 
MinMaxOptions::OUTPUT_NULL) {
+      *state = local;
+      return Status::OK();
+    }
+
     const auto values =
         checked_cast<const typename TypeTraits<ArrowType>::ArrayType&>(array)
             .raw_values();
-    if (array.null_count() != 0) {
+    if (array.null_count() > 0) {
       internal::BitmapReader reader(array.null_bitmap_data(), array.offset(),
                                     array.length());
       for (int64_t i = 0; i < array.length(); i++) {
@@ -101,6 +111,7 @@ class MinMaxAggregateFunction final
         local.MergeOne(values[i]);
       }
     }
+
     *state = local;
     return Status::OK();
   }
@@ -111,7 +122,14 @@ class MinMaxAggregateFunction final
   }
 
   Status Finalize(const StateType& src, Datum* output) const override {
-    *output = Datum({Datum(src.min), Datum(src.max)});
+    using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+    if (src.has_nulls && options_.null_handling == MinMaxOptions::OUTPUT_NULL) 
{
+      *output = Datum(
+          {Datum(std::make_shared<ScalarType>()), 
Datum(std::make_shared<ScalarType>())});
+    } else {
+      *output = Datum({Datum(src.min), Datum(src.max)});
+    }
+
     return Status::OK();
   }
 
diff --git a/cpp/src/arrow/compute/kernels/minmax.h 
b/cpp/src/arrow/compute/kernels/minmax.h
index 4ac4e31..237796c 100644
--- a/cpp/src/arrow/compute/kernels/minmax.h
+++ b/cpp/src/arrow/compute/kernels/minmax.h
@@ -36,11 +36,18 @@ class AggregateFunction;
 /// \class MinMaxOptions
 ///
 /// The user can control the MinMax kernel behavior with this class. By 
default,
-/// it will return null if there is a null value present.
+/// it will skip null if there is a null value present.
 struct ARROW_EXPORT MinMaxOptions {
-  //   MinMaxOptions() : skip_nulls(false) {}
+  enum mode {
+    /// skip null values
+    SKIP = 0,
+    /// any nulls will result in null output
+    OUTPUT_NULL
+  };
 
-  //   bool skip_nulls;
+  explicit MinMaxOptions(enum mode null_handling = SKIP) : 
null_handling(null_handling) {}
+
+  enum mode null_handling = SKIP;
 };
 
 /// \brief Return a Min/Max Kernel

Reply via email to