wesm commented on a change in pull request #8294:
URL: https://github.com/apache/arrow/pull/8294#discussion_r496875601



##########
File path: cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
##########
@@ -547,6 +547,36 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, 
SimdLevel> {
   }
 };
 
+template <SimdLevel::type SimdLevel>
+struct BooleanAnyImpl : public MinMaxImpl<BooleanType, SimdLevel> {
+  using StateType = MinMaxState<BooleanType, SimdLevel>;
+  using ArrayType = typename TypeTraits<BooleanType>::ArrayType;
+  using MinMaxImpl<BooleanType, SimdLevel>::MinMaxImpl;
+
+  void Consume(KernelContext*, const ExecBatch& batch) override {
+    // short-circuit if seen a True already
+    if (this->state.max == true) {
+      return;
+    }
+
+    ArrayType arr(batch[0].array());
+    const auto true_count = arr.true_count();
+    if (true_count > 0) {
+      this->state.max = true;
+    }
+  }
+
+  void Finalize(KernelContext*, Datum* out) override {
+    using ScalarType = typename TypeTraits<BooleanType>::ScalarType;

Review comment:
       Just use BooleanScalar here?

##########
File path: cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
##########
@@ -547,6 +547,36 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, 
SimdLevel> {
   }
 };
 
+template <SimdLevel::type SimdLevel>
+struct BooleanAnyImpl : public MinMaxImpl<BooleanType, SimdLevel> {
+  using StateType = MinMaxState<BooleanType, SimdLevel>;
+  using ArrayType = typename TypeTraits<BooleanType>::ArrayType;
+  using MinMaxImpl<BooleanType, SimdLevel>::MinMaxImpl;
+
+  void Consume(KernelContext*, const ExecBatch& batch) override {
+    // short-circuit if seen a True already
+    if (this->state.max == true) {
+      return;
+    }
+
+    ArrayType arr(batch[0].array());
+    const auto true_count = arr.true_count();
+    if (true_count > 0) {
+      this->state.max = true;
+    }
+  }
+
+  void Finalize(KernelContext*, Datum* out) override {
+    using ScalarType = typename TypeTraits<BooleanType>::ScalarType;
+
+    if (this->state.max == true) {
+      out->value = std::make_shared<ScalarType>(true);
+    } else {
+      out->value = std::make_shared<ScalarType>(false);
+    }

Review comment:
       Collapse all this to
   
   `out->value = std::make_shared<BooleanScalar>(this->state.max)`

##########
File path: cpp/src/arrow/compute/kernels/aggregate_test.cc
##########
@@ -726,6 +726,109 @@ TYPED_TEST(TestRandomNumericMinMaxKernel, 
RandomArrayMinMax) {
   }
 }
 
+//
+// Any
+//
+
+class TestPrimitiveAnyKernel : public ::testing::Test {
+  using Traits = TypeTraits<BooleanType>;
+  using ArrayType = typename Traits::ArrayType;
+  using c_type = typename BooleanType::c_type;
+  using ScalarType = typename Traits::ScalarType;
+
+ public:
+  void AssertAnyIs(const Datum& array, c_type expected) {
+    ASSERT_OK_AND_ASSIGN(Datum out, Any(array));
+    const ScalarType& out_any = out.scalar_as<ScalarType>();
+    const auto expected_any = static_cast<const ScalarType>(expected);
+    ASSERT_EQ(out_any, expected_any);
+  }
+
+  void AssertAnyIs(const std::string& json, c_type expected) {
+    auto array = ArrayFromJSON(type_singleton(), json);
+    AssertAnyIs(array, expected);
+  }
+
+  void AssertAnyIs(const std::vector<std::string>& json, c_type expected) {
+    auto array = ChunkedArrayFromJSON(type_singleton(), json);
+    AssertAnyIs(array, expected);
+  }
+
+  std::shared_ptr<DataType> type_singleton() { return 
Traits::type_singleton(); }
+};
+
+class TestAnyKernel : public TestPrimitiveAnyKernel {};
+
+TEST_F(TestAnyKernel, Basics) {
+  std::vector<std::string> chunked_input0 = {"[]", "[true]"};
+  std::vector<std::string> chunked_input1 = {"[true, true, null]", "[true, 
null]"};
+  std::vector<std::string> chunked_input2 = {"[false, false, false]", 
"[false]"};
+  std::vector<std::string> chunked_input3 = {"[false, null]", "[null, false]"};
+
+  this->AssertAnyIs("[false]", false);
+  this->AssertAnyIs("[true, false]", true);
+  this->AssertAnyIs("[null, null, null]", false);
+  this->AssertAnyIs("[false, false, false]", false);
+  this->AssertAnyIs("[false, false, false, null]", false);
+  this->AssertAnyIs("[true, null, true, true]", true);
+  this->AssertAnyIs("[false, null, false, true]", true);
+  this->AssertAnyIs("[true, null, false, true]", true);
+  this->AssertAnyIs(chunked_input0, true);
+  this->AssertAnyIs(chunked_input1, true);
+  this->AssertAnyIs(chunked_input2, false);
+  this->AssertAnyIs(chunked_input3, false);
+}
+
+struct AnyResult {
+  using c_type = typename BooleanType::c_type;
+
+  c_type any = false;
+};
+
+static AnyResult NaiveAny(const Array& array) {

Review comment:
       maybe just `bool` here?

##########
File path: cpp/src/arrow/compute/kernels/aggregate_test.cc
##########
@@ -726,6 +726,109 @@ TYPED_TEST(TestRandomNumericMinMaxKernel, 
RandomArrayMinMax) {
   }
 }
 
+//
+// Any
+//
+
+class TestPrimitiveAnyKernel : public ::testing::Test {
+  using Traits = TypeTraits<BooleanType>;
+  using ArrayType = typename Traits::ArrayType;
+  using c_type = typename BooleanType::c_type;
+  using ScalarType = typename Traits::ScalarType;
+
+ public:
+  void AssertAnyIs(const Datum& array, c_type expected) {
+    ASSERT_OK_AND_ASSIGN(Datum out, Any(array));
+    const ScalarType& out_any = out.scalar_as<ScalarType>();
+    const auto expected_any = static_cast<const ScalarType>(expected);
+    ASSERT_EQ(out_any, expected_any);
+  }
+
+  void AssertAnyIs(const std::string& json, c_type expected) {
+    auto array = ArrayFromJSON(type_singleton(), json);
+    AssertAnyIs(array, expected);
+  }
+
+  void AssertAnyIs(const std::vector<std::string>& json, c_type expected) {
+    auto array = ChunkedArrayFromJSON(type_singleton(), json);
+    AssertAnyIs(array, expected);
+  }
+
+  std::shared_ptr<DataType> type_singleton() { return 
Traits::type_singleton(); }
+};
+
+class TestAnyKernel : public TestPrimitiveAnyKernel {};
+
+TEST_F(TestAnyKernel, Basics) {
+  std::vector<std::string> chunked_input0 = {"[]", "[true]"};
+  std::vector<std::string> chunked_input1 = {"[true, true, null]", "[true, 
null]"};
+  std::vector<std::string> chunked_input2 = {"[false, false, false]", 
"[false]"};
+  std::vector<std::string> chunked_input3 = {"[false, null]", "[null, false]"};
+
+  this->AssertAnyIs("[false]", false);
+  this->AssertAnyIs("[true, false]", true);
+  this->AssertAnyIs("[null, null, null]", false);
+  this->AssertAnyIs("[false, false, false]", false);
+  this->AssertAnyIs("[false, false, false, null]", false);
+  this->AssertAnyIs("[true, null, true, true]", true);
+  this->AssertAnyIs("[false, null, false, true]", true);
+  this->AssertAnyIs("[true, null, false, true]", true);
+  this->AssertAnyIs(chunked_input0, true);
+  this->AssertAnyIs(chunked_input1, true);
+  this->AssertAnyIs(chunked_input2, false);
+  this->AssertAnyIs(chunked_input3, false);
+}
+
+struct AnyResult {
+  using c_type = typename BooleanType::c_type;
+
+  c_type any = false;
+};
+
+static AnyResult NaiveAny(const Array& array) {
+  using ArrayType = typename TypeTraits<BooleanType>::ArrayType;
+
+  AnyResult result;
+
+  const auto& array_numeric = reinterpret_cast<const ArrayType&>(array);
+  const auto true_count = array_numeric.true_count();
+
+  if (true_count > 0) {
+    result.any = true;
+  }
+  return result;
+}
+
+void ValidateAny(const Array& array) {
+  using Traits = TypeTraits<BooleanType>;
+  using ScalarType = typename Traits::ScalarType;
+
+  ASSERT_OK_AND_ASSIGN(Datum out, Any(array));
+  const ScalarType& out_any = out.scalar_as<ScalarType>();
+
+  auto expected = NaiveAny(array);
+  const auto& expected_any = static_cast<const ScalarType>(expected.any);
+
+  ASSERT_EQ(out_any, expected_any);
+}
+
+class TestRandomBooleanAnyKernel : public ::testing::Test {};
+
+TEST_F(TestRandomBooleanAnyKernel, RandomArrayAny) {
+  auto rand = random::RandomArrayGenerator(0x8afc055);
+  // Test size up to 1<<11 (2048).
+  for (size_t i = 3; i < 12; i += 2) {
+    for (auto null_probability : {0.0, 0.01, 0.1, 0.5, 0.99, 1.0}) {
+      int64_t base_length = (1UL << i) + 2;
+      auto array = rand.Boolean(base_length, null_probability, 
null_probability);
+      for (auto length_adjust : {-2, -1, 0, 1, 2}) {
+        int64_t length = (1UL << i) + length_adjust;
+        ValidateAny(*array->Slice(0, length));
+      }
+    }
+  }
+}

Review comment:
       It's unclear how useful randomly generated cases are for Any by its 
nature

##########
File path: cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
##########
@@ -547,6 +547,36 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, 
SimdLevel> {
   }
 };
 
+template <SimdLevel::type SimdLevel>
+struct BooleanAnyImpl : public MinMaxImpl<BooleanType, SimdLevel> {
+  using StateType = MinMaxState<BooleanType, SimdLevel>;

Review comment:
       Not used

##########
File path: cpp/src/arrow/compute/kernels/aggregate_test.cc
##########
@@ -726,6 +726,109 @@ TYPED_TEST(TestRandomNumericMinMaxKernel, 
RandomArrayMinMax) {
   }
 }
 
+//
+// Any
+//
+
+class TestPrimitiveAnyKernel : public ::testing::Test {
+  using Traits = TypeTraits<BooleanType>;
+  using ArrayType = typename Traits::ArrayType;
+  using c_type = typename BooleanType::c_type;
+  using ScalarType = typename Traits::ScalarType;

Review comment:
       These typedefs probably not needed, just use the Boolean* types within

##########
File path: cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
##########
@@ -547,6 +547,36 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, 
SimdLevel> {
   }
 };
 
+template <SimdLevel::type SimdLevel>
+struct BooleanAnyImpl : public MinMaxImpl<BooleanType, SimdLevel> {
+  using StateType = MinMaxState<BooleanType, SimdLevel>;
+  using ArrayType = typename TypeTraits<BooleanType>::ArrayType;
+  using MinMaxImpl<BooleanType, SimdLevel>::MinMaxImpl;
+
+  void Consume(KernelContext*, const ExecBatch& batch) override {
+    // short-circuit if seen a True already
+    if (this->state.max == true) {
+      return;
+    }
+
+    ArrayType arr(batch[0].array());

Review comment:
       Just use `BooleanArray` here

##########
File path: cpp/src/arrow/compute/kernels/aggregate_test.cc
##########
@@ -726,6 +726,109 @@ TYPED_TEST(TestRandomNumericMinMaxKernel, 
RandomArrayMinMax) {
   }
 }
 
+//
+// Any
+//
+
+class TestPrimitiveAnyKernel : public ::testing::Test {
+  using Traits = TypeTraits<BooleanType>;
+  using ArrayType = typename Traits::ArrayType;
+  using c_type = typename BooleanType::c_type;
+  using ScalarType = typename Traits::ScalarType;
+
+ public:
+  void AssertAnyIs(const Datum& array, c_type expected) {
+    ASSERT_OK_AND_ASSIGN(Datum out, Any(array));
+    const ScalarType& out_any = out.scalar_as<ScalarType>();
+    const auto expected_any = static_cast<const ScalarType>(expected);
+    ASSERT_EQ(out_any, expected_any);
+  }
+
+  void AssertAnyIs(const std::string& json, c_type expected) {
+    auto array = ArrayFromJSON(type_singleton(), json);
+    AssertAnyIs(array, expected);
+  }
+
+  void AssertAnyIs(const std::vector<std::string>& json, c_type expected) {
+    auto array = ChunkedArrayFromJSON(type_singleton(), json);
+    AssertAnyIs(array, expected);
+  }
+
+  std::shared_ptr<DataType> type_singleton() { return 
Traits::type_singleton(); }
+};
+
+class TestAnyKernel : public TestPrimitiveAnyKernel {};
+
+TEST_F(TestAnyKernel, Basics) {
+  std::vector<std::string> chunked_input0 = {"[]", "[true]"};
+  std::vector<std::string> chunked_input1 = {"[true, true, null]", "[true, 
null]"};
+  std::vector<std::string> chunked_input2 = {"[false, false, false]", 
"[false]"};
+  std::vector<std::string> chunked_input3 = {"[false, null]", "[null, false]"};
+
+  this->AssertAnyIs("[false]", false);
+  this->AssertAnyIs("[true, false]", true);
+  this->AssertAnyIs("[null, null, null]", false);
+  this->AssertAnyIs("[false, false, false]", false);
+  this->AssertAnyIs("[false, false, false, null]", false);
+  this->AssertAnyIs("[true, null, true, true]", true);
+  this->AssertAnyIs("[false, null, false, true]", true);
+  this->AssertAnyIs("[true, null, false, true]", true);
+  this->AssertAnyIs(chunked_input0, true);
+  this->AssertAnyIs(chunked_input1, true);
+  this->AssertAnyIs(chunked_input2, false);
+  this->AssertAnyIs(chunked_input3, false);
+}
+
+struct AnyResult {
+  using c_type = typename BooleanType::c_type;
+
+  c_type any = false;
+};

Review comment:
       Is this needed?

##########
File path: cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
##########
@@ -584,6 +614,33 @@ struct MinMaxInitState {
   }
 };
 
+template <SimdLevel::type SimdLevel>
+struct AnyInitState {
+  std::unique_ptr<KernelState> state;
+  KernelContext* ctx;
+  const DataType& in_type;
+  const std::shared_ptr<DataType>& out_type;
+  const MinMaxOptions& options;
+
+  AnyInitState(KernelContext* ctx, const DataType& in_type,
+               const std::shared_ptr<DataType>& out_type, const MinMaxOptions& 
options)
+      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
+
+  Status Visit(const DataType&) {
+    return Status::NotImplemented("No any kernel implemented");
+  }
+
+  Status Visit(const BooleanType&) {
+    state.reset(new BooleanAnyImpl<SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  std::unique_ptr<KernelState> Create() {
+    ctx->SetStatus(VisitTypeInline(in_type, this));
+    return std::move(state);
+  }
+};

Review comment:
       Is this really needed? Since there is only one type handled seems like 
you could omit all this and do something simpler in `AnyInit`

##########
File path: cpp/src/arrow/compute/kernels/aggregate_test.cc
##########
@@ -726,6 +726,109 @@ TYPED_TEST(TestRandomNumericMinMaxKernel, 
RandomArrayMinMax) {
   }
 }
 
+//
+// Any
+//
+
+class TestPrimitiveAnyKernel : public ::testing::Test {
+  using Traits = TypeTraits<BooleanType>;
+  using ArrayType = typename Traits::ArrayType;
+  using c_type = typename BooleanType::c_type;
+  using ScalarType = typename Traits::ScalarType;
+
+ public:
+  void AssertAnyIs(const Datum& array, c_type expected) {
+    ASSERT_OK_AND_ASSIGN(Datum out, Any(array));
+    const ScalarType& out_any = out.scalar_as<ScalarType>();
+    const auto expected_any = static_cast<const ScalarType>(expected);
+    ASSERT_EQ(out_any, expected_any);
+  }
+
+  void AssertAnyIs(const std::string& json, c_type expected) {
+    auto array = ArrayFromJSON(type_singleton(), json);
+    AssertAnyIs(array, expected);
+  }
+
+  void AssertAnyIs(const std::vector<std::string>& json, c_type expected) {
+    auto array = ChunkedArrayFromJSON(type_singleton(), json);
+    AssertAnyIs(array, expected);
+  }
+
+  std::shared_ptr<DataType> type_singleton() { return 
Traits::type_singleton(); }
+};
+
+class TestAnyKernel : public TestPrimitiveAnyKernel {};
+
+TEST_F(TestAnyKernel, Basics) {
+  std::vector<std::string> chunked_input0 = {"[]", "[true]"};
+  std::vector<std::string> chunked_input1 = {"[true, true, null]", "[true, 
null]"};
+  std::vector<std::string> chunked_input2 = {"[false, false, false]", 
"[false]"};
+  std::vector<std::string> chunked_input3 = {"[false, null]", "[null, false]"};
+
+  this->AssertAnyIs("[false]", false);
+  this->AssertAnyIs("[true, false]", true);
+  this->AssertAnyIs("[null, null, null]", false);
+  this->AssertAnyIs("[false, false, false]", false);
+  this->AssertAnyIs("[false, false, false, null]", false);
+  this->AssertAnyIs("[true, null, true, true]", true);
+  this->AssertAnyIs("[false, null, false, true]", true);
+  this->AssertAnyIs("[true, null, false, true]", true);
+  this->AssertAnyIs(chunked_input0, true);
+  this->AssertAnyIs(chunked_input1, true);
+  this->AssertAnyIs(chunked_input2, false);
+  this->AssertAnyIs(chunked_input3, false);
+}
+
+struct AnyResult {
+  using c_type = typename BooleanType::c_type;
+
+  c_type any = false;
+};
+
+static AnyResult NaiveAny(const Array& array) {
+  using ArrayType = typename TypeTraits<BooleanType>::ArrayType;
+
+  AnyResult result;
+
+  const auto& array_numeric = reinterpret_cast<const ArrayType&>(array);

Review comment:
       use `checked_cast<const BooleanArray&>` here




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to