[GitHub] [arrow] pitrou commented on a change in pull request #8637: ARROW-10021: [C++][Compute] Return top-n modes in mode kernel

2020-11-12 Thread GitBox


pitrou commented on a change in pull request #8637:
URL: https://github.com/apache/arrow/pull/8637#discussion_r521968240



##
File path: docs/source/cpp/compute.rst
##
@@ -140,7 +140,7 @@ Aggregations
 
+--+++---++
 | min_max  | Unary  | Numeric| Scalar Struct  
(1)| :struct:`MinMaxOptions`|
 
+--+++---++
-| mode | Unary  | Numeric| Scalar Struct  
(2)||
+| mode | Unary  | Numeric| Scalar Struct  
(2)| :struct:`ModeOptions`  |

Review comment:
   And `mode` should be changed to "Struct".





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] pitrou commented on a change in pull request #8637: ARROW-10021: [C++][Compute] Return top-n modes in mode kernel

2020-11-12 Thread GitBox


pitrou commented on a change in pull request #8637:
URL: https://github.com/apache/arrow/pull/8637#discussion_r521967685



##
File path: docs/source/cpp/compute.rst
##
@@ -140,7 +140,7 @@ Aggregations
 
+--+++---++
 | min_max  | Unary  | Numeric| Scalar Struct  
(1)| :struct:`MinMaxOptions`|
 
+--+++---++
-| mode | Unary  | Numeric| Scalar Struct  
(2)||
+| mode | Unary  | Numeric| Scalar Struct  
(2)| :struct:`ModeOptions`  |

Review comment:
   `min_max` returns a scalar, AFAIK.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] pitrou commented on a change in pull request #8637: ARROW-10021: [C++][Compute] Return top-n modes in mode kernel

2020-11-11 Thread GitBox


pitrou commented on a change in pull request #8637:
URL: https://github.com/apache/arrow/pull/8637#discussion_r521255222



##
File path: docs/source/cpp/compute.rst
##
@@ -140,7 +140,7 @@ Aggregations
 
+--+++---++
 | min_max  | Unary  | Numeric| Scalar Struct  
(1)| :struct:`MinMaxOptions`|
 
+--+++---++
-| mode | Unary  | Numeric| Scalar Struct  
(2)||
+| mode | Unary  | Numeric| Scalar Struct  
(2)| :struct:`ModeOptions`  |

Review comment:
   Replace "Scalar Struct" with "Struct".

##
File path: cpp/src/arrow/compute/kernels/aggregate_test.cc
##
@@ -735,71 +735,63 @@ class TestPrimitiveModeKernel : public ::testing::Test {
  public:
   using ArrowType = T;
   using Traits = TypeTraits;
-  using c_type = typename ArrowType::c_type;
-  using ModeType = typename Traits::ScalarType;
-  using CountType = typename TypeTraits::ScalarType;
-
-  void AssertModeIs(const Datum& array, c_type expected_mode, int64_t 
expected_count) {
-ASSERT_OK_AND_ASSIGN(Datum out, Mode(array));
-const StructScalar& value = out.scalar_as();
+  using CType = typename ArrowType::c_type;
+
+  void AssertModesAre(const Datum& array, const int n,
+  const std::vector& expected_modes,
+  const std::vector& expected_counts) {
+ASSERT_OK_AND_ASSIGN(Datum out, Mode(array, ModeOptions{n}));
+const StructArray out_array(out.array());
+ASSERT_EQ(out_array.length(), expected_modes.size());
+ASSERT_EQ(out_array.num_fields(), 2);
+
+const CType* out_modes = out_array.field(0)->data()->GetValues(1);
+const int64_t* out_counts = 
out_array.field(1)->data()->GetValues(1);
+for (int i = 0; i < out_array.length(); ++i) {
+  // equal or nan equal
+  ASSERT_TRUE(
+  (expected_modes[i] == out_modes[i]) ||
+  (expected_modes[i] != expected_modes[i] && out_modes[i] != 
out_modes[i]));
+  ASSERT_EQ(expected_counts[i], out_counts[i]);
+}
+  }
 
-const auto& out_mode = checked_cast(*value.value[0]);
-ASSERT_EQ(expected_mode, out_mode.value);
+  void AssertModesAre(const std::string& json, const int n,
+  const std::vector& expected_modes,
+  const std::vector& expected_counts) {
+auto array = ArrayFromJSON(type_singleton(), json);
+AssertModesAre(array, n, expected_modes, expected_counts);
+  }
 
-const auto& out_count = checked_cast(*value.value[1]);
-ASSERT_EQ(expected_count, out_count.value);
+  void AssertModeIs(const Datum& array, CType expected_mode, int64_t 
expected_count) {
+AssertModesAre(array, 1, {expected_mode}, {expected_count});
   }
 
-  void AssertModeIs(const std::string& json, c_type expected_mode,
+  void AssertModeIs(const std::string& json, CType expected_mode,
 int64_t expected_count) {
 auto array = ArrayFromJSON(type_singleton(), json);
 AssertModeIs(array, expected_mode, expected_count);
   }
 
-  void AssertModeIs(const std::vector& json, c_type expected_mode,
+  void AssertModeIs(const std::vector& json, CType expected_mode,
 int64_t expected_count) {
 auto chunked = ChunkedArrayFromJSON(type_singleton(), json);
 AssertModeIs(chunked, expected_mode, expected_count);
   }
 
-  void AssertModeIsNull(const Datum& array) {
-ASSERT_OK_AND_ASSIGN(Datum out, Mode(array));
-const StructScalar& value = out.scalar_as();
-
-for (const auto& val : value.value) {
-  ASSERT_FALSE(val->is_valid);
-}
+  void AssertModeIsNull(const Datum& array, int n) {

Review comment:
   This should be renamed, since it's not null anymore. `AssertModesEmpty`?

##
File path: cpp/src/arrow/compute/api_aggregate.h
##
@@ -76,6 +76,18 @@ struct ARROW_EXPORT MinMaxOptions : public FunctionOptions {
   enum Mode null_handling;
 };
 
+/// \brief Control Mode kernel behavior
+///
+/// Returns top-n common values and counts.
+/// By default, returns the most common value and count.
+struct ARROW_EXPORT ModeOptions : public FunctionOptions {
+  explicit ModeOptions(int n = 1) : n(n) {}

Review comment:
   Can we use `int64_t` everywhere for this?

##
File path: cpp/src/arrow/compute/kernels/aggregate_test.cc
##
@@ -735,71 +735,63 @@ class TestPrimitiveModeKernel : public ::testing::Test {
  public:
   using ArrowType = T;
   using Traits = TypeTraits;
-  using c_type = typename ArrowType::c_type;
-  using ModeType = typename Traits::ScalarType;
-  using CountType = typename TypeTraits::ScalarType;
-
-  void