This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 4d625b37cc GH-45732: [C++][Compute] Accept more pivot key types 
(#45945)
4d625b37cc is described below

commit 4d625b37ccb6d1cff1f1c47138318acc6751d4b0
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Apr 2 14:10:04 2025 +0200

    GH-45732: [C++][Compute] Accept more pivot key types (#45945)
    
    ### Rationale for this change
    
    Allow the `pivot_wider` and `hash_pivot_wider` functions to accept an 
integral pivot key column, in addition to binary-like.
    
    Since the `key_names` option is a vector of strings, they are cast to the 
appropriate pivot key type for matching.
    
    ### Are these changes tested?
    
    Yes, by new unit tests.
    
    ### Are there any user-facing changes?
    
    No.
    * GitHub Issue: #45732
    
    Lead-authored-by: Antoine Pitrou <[email protected]>
    Co-authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/acero/hash_aggregate_test.cc         |  60 +++++-
 cpp/src/arrow/compute/api_aggregate.h              |   7 +-
 cpp/src/arrow/compute/exec.h                       |   2 +-
 cpp/src/arrow/compute/kernels/aggregate_pivot.cc   |  83 +++++----
 cpp/src/arrow/compute/kernels/aggregate_test.cc    |  42 ++++-
 .../arrow/compute/kernels/hash_aggregate_pivot.cc  |  70 ++++---
 cpp/src/arrow/compute/kernels/pivot_internal.cc    | 173 +++++++++--------
 cpp/src/arrow/compute/kernels/pivot_internal.h     |  15 +-
 docs/source/cpp/compute.rst                        | 204 ++++++++++-----------
 9 files changed, 408 insertions(+), 248 deletions(-)

diff --git a/cpp/src/arrow/acero/hash_aggregate_test.cc 
b/cpp/src/arrow/acero/hash_aggregate_test.cc
index 1c456c2fd7..dce0e44eb1 100644
--- a/cpp/src/arrow/acero/hash_aggregate_test.cc
+++ b/cpp/src/arrow/acero/hash_aggregate_test.cc
@@ -4440,7 +4440,7 @@ TEST_P(GroupBy, PivotBasics) {
   }
 }
 
-TEST_P(GroupBy, PivotAllKeyTypes) {
+TEST_P(GroupBy, PivotBinaryKeyTypes) {
   auto value_type = float32();
   std::vector<std::string> table_json = {R"([
       [1, "width", 10.5],
@@ -4462,6 +4462,49 @@ TEST_P(GroupBy, PivotAllKeyTypes) {
     ARROW_SCOPED_TRACE("key_type = ", *key_type);
     TestPivot(key_type, value_type, options, table_json, expected_json);
   }
+
+  auto key_type = fixed_size_binary(3);
+  table_json = {R"([
+      [1, "wid", 10.5],
+      [2, "wid", 11.5]
+      ])",
+                R"([
+      [2, "hei", 12.5],
+      [3, "wid",  13.5],
+      [1, "hei", 14.5]
+      ])"};
+  expected_json = R"([
+      [1, {"hei": 14.5, "wid": 10.5} ],
+      [2, {"hei": 12.5, "wid": 11.5} ],
+      [3, {"hei": null, "wid": 13.5} ]
+      ])";
+  options.key_names = {"hei", "wid"};
+  ARROW_SCOPED_TRACE("key_type = ", *key_type);
+  TestPivot(key_type, value_type, options, table_json, expected_json);
+}
+
+TEST_P(GroupBy, PivotIntegerKeyTypes) {
+  auto value_type = float32();
+  std::vector<std::string> table_json = {R"([
+      [1, 78, 10.5],
+      [2, 78, 11.5]
+      ])",
+                                         R"([
+      [2, 56, 12.5],
+      [3, 78, 13.5],
+      [1, 56, 14.5]
+      ])"};
+  std::string expected_json = R"([
+      [1, {"56": 14.5, "78": 10.5} ],
+      [2, {"56": 12.5, "78": 11.5} ],
+      [3, {"56": null, "78": 13.5} ]
+      ])";
+  PivotWiderOptions options(/*key_names=*/{"56", "78"});
+
+  for (const auto& key_type : IntTypes()) {
+    ARROW_SCOPED_TRACE("key_type = ", *key_type);
+    TestPivot(key_type, value_type, options, table_json, expected_json);
+  }
 }
 
 TEST_P(GroupBy, PivotNumericValues) {
@@ -4749,6 +4792,21 @@ TEST_P(GroupBy, PivotDuplicateKeys) {
       RunPivot(key_type, value_type, options, table_json));
 }
 
+TEST_P(GroupBy, PivotInvalidKeys) {
+  // Integer key type, but key names cannot be converted to int
+  auto key_type = int32();
+  auto value_type = float32();
+  std::vector<std::string> table_json = {R"([])"};
+  PivotWiderOptions options(/*key_names=*/{"123", "width"});
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, HasSubstr("Failed to parse string: 'width' as a scalar of type 
int32"),
+      RunPivot(key_type, value_type, options, table_json));
+  options.key_names = {"12.3", "45"};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, HasSubstr("Failed to parse string: '12.3' as a scalar of type 
int32"),
+      RunPivot(key_type, value_type, options, table_json));
+}
+
 TEST_P(GroupBy, PivotDuplicateValues) {
   auto key_type = utf8();
   auto value_type = float32();
diff --git a/cpp/src/arrow/compute/api_aggregate.h 
b/cpp/src/arrow/compute/api_aggregate.h
index 1d9076f6ba..8930d04de5 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -202,9 +202,10 @@ class ARROW_EXPORT TDigestOptions : public FunctionOptions 
{
 /// - The corresponding `Aggregate::target` must have two FieldRef elements;
 ///   the first one points to the pivot key column, the second points to the
 ///   pivoted data column.
-/// - The pivot key column must be string-like; its values will be matched
-///   against `key_names` in order to dispatch the pivoted data into the
-///   output.
+/// - The pivot key column can be string, binary or integer; its values will be
+///   matched against `key_names` in order to dispatch the pivoted data into
+///   the output. If the pivot key column is not string-like, the `key_names`
+///   will be cast to the pivot key type.
 ///
 /// "pivot_wider" example
 /// ---------------------
diff --git a/cpp/src/arrow/compute/exec.h b/cpp/src/arrow/compute/exec.h
index 3fbefe4a1a..dae7e1ea68 100644
--- a/cpp/src/arrow/compute/exec.h
+++ b/cpp/src/arrow/compute/exec.h
@@ -276,7 +276,7 @@ struct ExecValue {
   ArraySpan array = {};
   const Scalar* scalar = NULLPTR;
 
-  ExecValue(Scalar* scalar)  // NOLINT implicit conversion
+  ExecValue(const Scalar* scalar)  // NOLINT implicit conversion
       : scalar(scalar) {}
 
   ExecValue(ArraySpan array)  // NOLINT implicit conversion
diff --git a/cpp/src/arrow/compute/kernels/aggregate_pivot.cc 
b/cpp/src/arrow/compute/kernels/aggregate_pivot.cc
index 3ff6327ec9..f3571621e4 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_pivot.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_pivot.cc
@@ -22,6 +22,7 @@
 #include "arrow/scalar.h"
 #include "arrow/util/bit_run_reader.h"
 #include "arrow/util/logging.h"
+#include "arrow/visit_data_inline.h"
 
 namespace arrow::compute::internal {
 namespace {
@@ -30,7 +31,8 @@ using arrow::internal::VisitSetBitRunsVoid;
 using arrow::util::span;
 
 struct PivotImpl : public ScalarAggregator {
-  Status Init(const PivotWiderOptions& options, const std::vector<TypeHolder>& 
in_types) {
+  Status Init(const PivotWiderOptions& options, const std::vector<TypeHolder>& 
in_types,
+              ExecContext* ctx) {
     options_ = &options;
     key_type_ = in_types[0].GetSharedPtr();
     auto value_type = in_types[1].GetSharedPtr();
@@ -42,47 +44,57 @@ struct PivotImpl : public ScalarAggregator {
       values_.push_back(MakeNullScalar(value_type));
     }
     out_type_ = struct_(std::move(fields));
-    ARROW_ASSIGN_OR_RAISE(key_mapper_, PivotWiderKeyMapper::Make(*key_type_, 
options_));
+    ARROW_ASSIGN_OR_RAISE(key_mapper_,
+                          PivotWiderKeyMapper::Make(*key_type_, options_, 
ctx));
     return Status::OK();
   }
 
   Status Consume(KernelContext*, const ExecSpan& batch) override {
     DCHECK_EQ(batch.num_values(), 2);
     if (batch[0].is_array()) {
-      ARROW_ASSIGN_OR_RAISE(span<const PivotWiderKeyIndex> keys,
-                            key_mapper_->MapKeys(batch[0].array));
+      ARROW_ASSIGN_OR_RAISE(auto keys_array, 
key_mapper_->MapKeys(batch[0].array));
+      DCHECK_EQ(keys_array->type->id(), Type::UINT32);
+      ArraySpan keys_span(*keys_array);
       if (batch[1].is_array()) {
         // Array keys, array values
         auto values = batch[1].array.ToArray();
-        for (int64_t i = 0; i < batch.length; ++i) {
-          PivotWiderKeyIndex key = keys[i];
-          if (key != kNullPivotKey && !values->IsNull(i)) {
-            if (ARROW_PREDICT_FALSE(values_[key]->is_valid)) {
-              return DuplicateValue();
-            }
-            ARROW_ASSIGN_OR_RAISE(values_[key], values->GetScalar(i));
-            DCHECK(values_[key]->is_valid);
-          }
-        }
+        int64_t i = 0;
+        RETURN_NOT_OK(VisitArraySpanInline<UInt32Type>(
+            keys_span,
+            [&](uint32_t key) {
+              if (!values->IsNull(i)) {
+                if (ARROW_PREDICT_FALSE(values_[key]->is_valid)) {
+                  return DuplicateValue();
+                }
+                ARROW_ASSIGN_OR_RAISE(values_[key], values->GetScalar(i));
+              }
+              ++i;
+              return Status::OK();
+            },
+            [&]() {
+              ++i;
+              return Status::OK();
+            }));
       } else {
         // Array keys, scalar value
         const Scalar* value = batch[1].scalar;
         if (value->is_valid) {
-          for (int64_t i = 0; i < batch.length; ++i) {
-            PivotWiderKeyIndex key = keys[i];
-            if (key != kNullPivotKey) {
-              if (ARROW_PREDICT_FALSE(values_[key]->is_valid)) {
-                return DuplicateValue();
-              }
-              values_[key] = value->GetSharedPtr();
-            }
-          }
+          RETURN_NOT_OK(VisitArraySpanInline<UInt32Type>(
+              keys_span,
+              [&](uint32_t key) {
+                if (ARROW_PREDICT_FALSE(values_[key]->is_valid)) {
+                  return DuplicateValue();
+                }
+                values_[key] = value->GetSharedPtr();
+                return Status::OK();
+              },
+              [] { return Status::OK(); }));
         }
       }
     } else {
-      ARROW_ASSIGN_OR_RAISE(PivotWiderKeyIndex key,
-                            key_mapper_->MapKey(*batch[0].scalar));
-      if (key != kNullPivotKey) {
+      ARROW_ASSIGN_OR_RAISE(auto maybe_key, 
key_mapper_->MapKey(*batch[0].scalar));
+      if (maybe_key.has_value()) {
+        PivotWiderKeyIndex key = maybe_key.value();
         if (batch[1].is_array()) {
           // Scalar key, array values
           auto values = batch[1].array.ToArray();
@@ -145,10 +157,8 @@ struct PivotImpl : public ScalarAggregator {
 Result<std::unique_ptr<KernelState>> PivotInit(KernelContext* ctx,
                                                const KernelInitArgs& args) {
   const auto& options = checked_cast<const PivotWiderOptions&>(*args.options);
-  DCHECK_EQ(args.inputs.size(), 2);
-  DCHECK(is_base_binary_like(args.inputs[0].id()));
   auto state = std::make_unique<PivotImpl>();
-  RETURN_NOT_OK(state->Init(options, args.inputs));
+  RETURN_NOT_OK(state->Init(options, args.inputs, ctx->exec_context()));
   // GH-45718: This can be simplified once we drop the R openSUSE155 crossbow
   // job
   // R build with openSUSE155 requires an explicit shared_ptr construction
@@ -167,6 +177,8 @@ const FunctionDoc pivot_doc{
      "is emitted. If a pivot key doesn't appear, null is emitted.\n"
      "If more than one non-null value is encountered for a given pivot key,\n"
      "Invalid is raised.\n"
+     "The pivot key column can be string, binary or integer. The `key_names`\n"
+     "will be cast to the pivot key column type for matching.\n"
      "Behavior of unexpected pivot keys is controlled by 
`unexpected_key_behavior`\n"
      "in PivotWiderOptions."),
     {"pivot_keys", "pivot_values"},
@@ -179,12 +191,19 @@ void RegisterScalarAggregatePivot(FunctionRegistry* 
registry) {
 
   auto func = std::make_shared<ScalarAggregateFunction>(
       "pivot_wider", Arity::Binary(), pivot_doc, &default_pivot_options);
-
-  for (auto key_type : BaseBinaryTypes()) {
-    auto sig = KernelSignature::Make({key_type->id(), InputType::Any()},
+  auto add_kernel = [&](InputType key_type) {
+    auto sig = KernelSignature::Make({key_type, InputType::Any()},
                                      OutputType(ResolveOutputType));
     AddAggKernel(std::move(sig), PivotInit, func.get());
+  };
+
+  for (const auto& key_type : BaseBinaryTypes()) {
+    add_kernel(key_type->id());
+  }
+  for (const auto& key_type : IntTypes()) {
+    add_kernel(key_type->id());
   }
+  add_kernel(Type::FIXED_SIZE_BINARY);
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc 
b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index ec012a42cd..d821fc7e2c 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -4504,10 +4504,9 @@ TEST_F(TestPivotKernel, Basics) {
               PivotWiderOptions(/*key_names=*/{"height", "width"}));
 }
 
-TEST_F(TestPivotKernel, AllKeyTypes) {
+TEST_F(TestPivotKernel, BinaryKeyTypes) {
+  auto value_type = float32();
   for (auto key_type : BaseBinaryTypes()) {
-    auto value_type = float32();
-
     auto keys = ArrayFromJSON(key_type, R"(["width", "height"])");
     auto values = ArrayFromJSON(value_type, "[10.5, 11.5]");
     auto expected =
@@ -4516,6 +4515,25 @@ TEST_F(TestPivotKernel, AllKeyTypes) {
     AssertPivot(keys, values, *expected,
                 PivotWiderOptions(/*key_names=*/{"height", "width"}));
   }
+  auto key_type = fixed_size_binary(3);
+  auto keys = ArrayFromJSON(key_type, R"(["wid", "hei"])");
+  auto values = ArrayFromJSON(value_type, "[10.5, 11.5]");
+  auto expected = ScalarFromJSON(
+      struct_({field("hei", value_type), field("wid", value_type)}), "[11.5, 
10.5]");
+  AssertPivot(keys, values, *expected, PivotWiderOptions(/*key_names=*/{"hei", 
"wid"}));
+}
+
+TEST_F(TestPivotKernel, IntegerKeyTypes) {
+  // It is possible to use an integer key column, while passing its string 
equivalent
+  // in PivotWiderOptions::key_names.
+  auto value_type = float32();
+  for (auto key_type : IntTypes()) {
+    auto keys = ArrayFromJSON(key_type, "[34, 12]");
+    auto values = ArrayFromJSON(value_type, "[10.5, 11.5]");
+    auto expected = ScalarFromJSON(
+        struct_({field("12", value_type), field("34", value_type)}), "[11.5, 
10.5]");
+    AssertPivot(keys, values, *expected, 
PivotWiderOptions(/*key_names=*/{"12", "34"}));
+  }
 }
 
 TEST_F(TestPivotKernel, Numbers) {
@@ -4724,6 +4742,24 @@ TEST_F(TestPivotKernel, DuplicateKeyNames) {
       CallFunction("pivot_wider", {keys, values}, &options));
 }
 
+TEST_F(TestPivotKernel, InvalidKeyName) {
+  auto key_type = int32();
+  auto value_type = float32();
+
+  auto keys = ArrayFromJSON(key_type, "[]");
+  auto values = ArrayFromJSON(value_type, "[]");
+  auto options = PivotWiderOptions(/*key_names=*/{"123", "width"});
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr("Failed to parse string: 'width' as a scalar of 
type int32"),
+      CallFunction("pivot_wider", {keys, values}, &options));
+  options.key_names = {"12.3", "45"};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      ::testing::HasSubstr("Failed to parse string: '12.3' as a scalar of type 
int32"),
+      CallFunction("pivot_wider", {keys, values}, &options));
+}
+
 TEST_F(TestPivotKernel, DuplicateValues) {
   auto key_type = utf8();
   auto value_type = float32();
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_pivot.cc 
b/cpp/src/arrow/compute/kernels/hash_aggregate_pivot.cc
index c3dc070e4f..3833d4ddb7 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_pivot.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_pivot.cc
@@ -29,6 +29,7 @@
 #include "arrow/compute/kernels/hash_aggregate_internal.h"
 #include "arrow/compute/kernels/pivot_internal.h"
 #include "arrow/compute/row/grouper.h"
+#include "arrow/util/bit_block_counter.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/span.h"
 #include "arrow/visit_type_inline.h"
@@ -54,7 +55,7 @@ struct GroupedPivotAccumulator {
     return Status::OK();
   }
 
-  Status Consume(span<const uint32_t> groups, span<const PivotWiderKeyIndex> 
keys,
+  Status Consume(span<const uint32_t> groups, const 
std::shared_ptr<ArrayData>& keys,
                  const ArraySpan& values) {
     // To dispatch the values into the right (group, key) coordinates,
     // we first compute a vector of take indices for each output column.
@@ -78,7 +79,8 @@ struct GroupedPivotAccumulator {
     // respective take_indices for the column's keys.
     //
 
-    DCHECK_EQ(groups.size(), keys.size());
+    DCHECK_EQ(keys->type->id(), Type::UINT32);
+    DCHECK_EQ(groups.size(), static_cast<size_t>(keys->length));
     DCHECK_EQ(groups.size(), static_cast<size_t>(values.length));
 
     std::shared_ptr<DataType> take_index_type;
@@ -118,20 +120,28 @@ struct GroupedPivotAccumulator {
       DCHECK_LE(offset, scratch_buffer_.capacity());
 
       // Populate the take_indices for each output column
-      for (int64_t i = 0; i < values.length; ++i) {
-        const PivotWiderKeyIndex key = keys[i];
-        if (key != kNullPivotKey && !values.IsNull(i)) {
-          DCHECK_LT(static_cast<int>(key), num_keys_);
-          const uint32_t group = groups[i];
-          if (bit_util::GetBit(take_bitmap_data[key], group)) {
-            return DuplicateValue();
-          }
-          // For row #group in column #key, we are going to take the value at 
index #i
-          bit_util::SetBit(take_bitmap_data[key], group);
-          take_indices_data[key][group] = static_cast<TakeIndex>(i);
-        }
-      }
-      return Status::OK();
+      const uint8_t* keys_null_bitmap =
+          (keys->GetNullCount() != 0) ? keys->GetValues<uint8_t>(0, 0) : 
nullptr;
+      const uint32_t* key_values = keys->GetValues<uint32_t>(1);
+      const uint8_t* values_null_bitmap =
+          (values.GetNullCount() != 0) ? values.GetValues<uint8_t>(0, 0) : 
nullptr;
+      return ::arrow::internal::VisitTwoBitBlocks(
+          keys_null_bitmap, keys->offset, values_null_bitmap, values.offset,
+          values.length,
+          [&](int64_t i) {
+            // Non-null key, non-null value
+            const uint32_t group = groups[i];
+            const uint32_t key = key_values[i];
+            DCHECK_LT(static_cast<int>(key), num_keys_);
+            if (ARROW_PREDICT_FALSE(bit_util::GetBit(take_bitmap_data[key], 
group))) {
+              return DuplicateValue();
+            }
+            // For row #group in column #key, we are going to take the value 
at index #i
+            bit_util::SetBit(take_bitmap_data[key], group);
+            take_indices_data[key][group] = static_cast<TakeIndex>(i);
+            return Status::OK();
+          },
+          [] { return Status::OK(); });
     };
 
     // Call compute_take_indices with the optimal integer width
@@ -166,12 +176,13 @@ struct GroupedPivotAccumulator {
     return MergeColumns(std::move(new_columns));
   }
 
-  Status Consume(span<const uint32_t> groups, const PivotWiderKeyIndex key,
+  Status Consume(span<const uint32_t> groups, 
std::optional<PivotWiderKeyIndex> maybe_key,
                  const ArraySpan& values) {
-    if (key == kNullPivotKey) {
+    if (!maybe_key.has_value()) {
       // Nothing to update
       return Status::OK();
     }
+    const auto key = maybe_key.value();
     DCHECK_LT(static_cast<int>(key), num_keys_);
     DCHECK_EQ(groups.size(), static_cast<size_t>(values.length));
 
@@ -381,7 +392,8 @@ struct GroupedPivotImpl : public GroupedAggregator {
     }
     out_type_ = struct_(std::move(fields));
     out_struct_type_ = checked_cast<const StructType*>(out_type_.get());
-    ARROW_ASSIGN_OR_RAISE(key_mapper_, PivotWiderKeyMapper::Make(*key_type_, 
options_));
+    ARROW_ASSIGN_OR_RAISE(key_mapper_,
+                          PivotWiderKeyMapper::Make(*key_type_, options_, 
ctx));
     RETURN_NOT_OK(accumulator_.Init(ctx, value_type, options_));
     return Status::OK();
   }
@@ -404,11 +416,11 @@ struct GroupedPivotImpl : public GroupedAggregator {
       return Status::NotImplemented("Consuming scalar pivot value");
     }
     if (batch[0].is_array()) {
-      ARROW_ASSIGN_OR_RAISE(span<const PivotWiderKeyIndex> keys,
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> keys,
                             key_mapper_->MapKeys(batch[0].array));
       return accumulator_.Consume(groups, keys, batch[1].array);
     } else {
-      ARROW_ASSIGN_OR_RAISE(PivotWiderKeyIndex key,
+      ARROW_ASSIGN_OR_RAISE(std::optional<PivotWiderKeyIndex> key,
                             key_mapper_->MapKey(*batch[0].scalar));
       return accumulator_.Consume(groups, key, batch[1].array);
     }
@@ -444,6 +456,8 @@ const FunctionDoc hash_pivot_doc{
      "is emitted. If a pivot key doesn't appear in a given group, null is 
emitted.\n"
      "If more than one non-null value is encountered in the same group for a\n"
      "given pivot key, Invalid is raised.\n"
+     "The pivot key column can be string, binary or integer. The `key_names`\n"
+     "will be cast to the pivot key column type for matching.\n"
      "Behavior of unexpected pivot keys is controlled by 
`unexpected_key_behavior`\n"
      "in PivotWiderOptions."),
     {"pivot_keys", "pivot_values", "group_id_array"},
@@ -457,14 +471,20 @@ void RegisterHashAggregatePivot(FunctionRegistry* 
registry) {
   {
     auto func = std::make_shared<HashAggregateFunction>(
         "hash_pivot_wider", Arity::Ternary(), hash_pivot_doc, 
&default_pivot_options);
-    for (auto key_type : BaseBinaryTypes()) {
+    auto add_kernel = [&](InputType type) {
       // Anything that scatter() (i.e. take()) accepts can be passed as values
-      auto sig = KernelSignature::Make(
-          {key_type->id(), InputType::Any(), InputType(Type::UINT32)},
-          OutputType(ResolveGroupOutputType));
+      auto sig = KernelSignature::Make({type, InputType::Any(), 
InputType(Type::UINT32)},
+                                       OutputType(ResolveGroupOutputType));
       DCHECK_OK(func->AddKernel(
           MakeKernel(std::move(sig), HashAggregateInit<GroupedPivotImpl>)));
+    };
+    for (const auto& key_type : BaseBinaryTypes()) {
+      add_kernel(key_type->id());
+    }
+    for (const auto& key_type : IntTypes()) {
+      add_kernel(key_type->id());
     }
+    add_kernel(Type::FIXED_SIZE_BINARY);
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 }
diff --git a/cpp/src/arrow/compute/kernels/pivot_internal.cc 
b/cpp/src/arrow/compute/kernels/pivot_internal.cc
index 7a65ddc212..72d96213c9 100644
--- a/cpp/src/arrow/compute/kernels/pivot_internal.cc
+++ b/cpp/src/arrow/compute/kernels/pivot_internal.cc
@@ -18,110 +18,139 @@
 #include "arrow/compute/kernels/pivot_internal.h"
 
 #include <cstdint>
+#include <string_view>
+#include <unordered_set>
 
+#include "arrow/array/array_primitive.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/compute/cast.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/kernels/codegen_internal.h"
+#include "arrow/compute/row/grouper.h"
 #include "arrow/scalar.h"
 #include "arrow/type_traits.h"
+#include "arrow/util/bit_run_reader.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/unreachable.h"
 #include "arrow/visit_type_inline.h"
 
 namespace arrow::compute::internal {
 
 using ::arrow::util::span;
 
-struct BasePivotKeyMapper : public PivotWiderKeyMapper {
-  Status Init(const PivotWiderOptions* options) override {
-    if (options->key_names.size() > static_cast<size_t>(kMaxPivotKey) + 1) {
+struct ConcretePivotWiderKeyMapper : public PivotWiderKeyMapper {
+  Status Init(const DataType& key_type, const PivotWiderOptions* options,
+              ExecContext* ctx) {
+    if (options->key_names.size() > static_cast<size_t>(kMaxPivotKey)) {
       return Status::NotImplemented("Pivoting to more than ",
-                                    static_cast<size_t>(kMaxPivotKey) + 1,
-                                    " columns: got ", 
options->key_names.size());
+                                    static_cast<size_t>(kMaxPivotKey), " 
columns: got ",
+                                    options->key_names.size());
     }
-    key_name_map_.reserve(options->key_names.size());
-    PivotWiderKeyIndex index = 0;
-    for (const auto& key_name : options->key_names) {
-      bool inserted =
-          key_name_map_.try_emplace(std::string_view(key_name), 
index++).second;
-      if (!inserted) {
-        return Status::KeyError("Duplicate key name '", key_name,
-                                "' in PivotWiderOptions");
+    unexpected_key_behavior_ = options->unexpected_key_behavior;
+    ARROW_ASSIGN_OR_RAISE(grouper_, Grouper::Make({&key_type}, ctx));
+    // Build a binary array of the pivot key values, and cast it to the 
desired key type
+    BinaryBuilder builder(ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(options->key_names.size()));
+    int64_t total_length = 0;
+    for (const auto& key : options->key_names) {
+      total_length += static_cast<int64_t>(key.length());
+    }
+    RETURN_NOT_OK(builder.ReserveData(total_length));
+    for (const auto& key : options->key_names) {
+      builder.UnsafeAppend(key);
+    }
+    ARROW_ASSIGN_OR_RAISE(auto binary_key_array, builder.Finish());
+    ARROW_ASSIGN_OR_RAISE(auto key_array,
+                          Cast(*binary_key_array, &key_type, 
CastOptions::Safe(), ctx));
+    // Populate the grouper with the keys from the array
+    ExecSpan batch({ExecValue(*key_array->data())}, key_array->length());
+    RETURN_NOT_OK(grouper_->Populate(batch));
+    if (grouper_->num_groups() != options->key_names.size()) {
+      // There's a duplicate key, find it to emit a nicer error message
+      std::unordered_set<std::string_view> seen;
+      for (const auto& key : options->key_names) {
+        auto [_, inserted] = seen.emplace(key);
+        if (!inserted) {
+          return Status::KeyError("Duplicate key name '", key, "' in 
PivotWiderOptions");
+        }
       }
+      Unreachable("Grouper doesn't agree with std::unordered_set");
     }
-    unexpected_key_behavior_ = options->unexpected_key_behavior;
     return Status::OK();
   }
 
- protected:
-  Result<PivotWiderKeyIndex> KeyNotFound(std::string_view key_name) {
-    if (unexpected_key_behavior_ == PivotWiderOptions::kIgnore) {
-      return kNullPivotKey;
+  Result<std::shared_ptr<ArrayData>> MapKeys(const ArraySpan& array) override {
+    if (array.GetNullCount() != 0) {
+      return NullKeyName();
     }
-    DCHECK_EQ(unexpected_key_behavior_, PivotWiderOptions::kRaise);
-    return Status::KeyError("Unexpected pivot key: ", key_name);
+    return MapKeysInternal(array, array.length);
   }
 
-  Result<PivotWiderKeyIndex> LookupKey(std::string_view key_name) {
-    const auto it = this->key_name_map_.find(key_name);
-    if (ARROW_PREDICT_FALSE(it == this->key_name_map_.end())) {
-      return KeyNotFound(key_name);
+  Result<std::optional<PivotWiderKeyIndex>> MapKey(const Scalar& scalar) 
override {
+    if (!scalar.is_valid) {
+      return NullKeyName();
+    }
+    ARROW_ASSIGN_OR_RAISE(auto group_id_array, MapKeysInternal(&scalar, 
/*length=*/1));
+    DCHECK_EQ(group_id_array->length, 1);
+    if (group_id_array->GetNullCount() == 0) {
+      return group_id_array->GetValues<uint32_t>(1)[0];
     } else {
-      return it->second;
+      // For UnexpectedKeyBehavior::kIgnore
+      return std::nullopt;
+    }
+  }
+
+ protected:
+  Result<std::shared_ptr<ArrayData>> MapKeysInternal(const ExecValue& values,
+                                                     int64_t length) {
+    ARROW_ASSIGN_OR_RAISE(auto result, grouper_->Lookup(ExecSpan({values}, 
length)));
+    DCHECK(result.is_array());
+    DCHECK_EQ(result.type()->id(), Type::UINT32);
+    auto group_id_array = result.array();
+    const bool has_nulls = (group_id_array->GetNullCount() != 0);
+    if (ARROW_PREDICT_FALSE(has_nulls) &&
+        unexpected_key_behavior_ == PivotWiderOptions::kRaise) {
+      // Extract unexpected key name, to emit a nicer error message
+      int64_t null_pos = 0;
+      DCHECK_NE(group_id_array->buffers[0], nullptr);
+      ::arrow::internal::BitRunReader 
bit_run_reader(group_id_array->buffers[0]->data(),
+                                                     group_id_array->offset,
+                                                     group_id_array->length);
+      // Search the first unset validity bit, indicating the first unexpected 
key
+      for (;;) {
+        auto run = bit_run_reader.NextRun();
+        if (run.length == 0 || !run.set) {
+          break;
+        }
+        null_pos += run.length;
+      }
+      DCHECK_LT(null_pos, group_id_array->length);
+      DCHECK_LT(null_pos, values.length());
+      std::shared_ptr<Scalar> key_scalar;
+      if (values.is_scalar()) {
+        DCHECK_EQ(null_pos, 0);
+        key_scalar = values.scalar->GetSharedPtr();
+      } else {
+        ARROW_ASSIGN_OR_RAISE(key_scalar, 
values.array.ToArray()->GetScalar(null_pos));
+      }
+      return Status::KeyError("Unexpected pivot key: ", 
key_scalar->ToString());
     }
+    return group_id_array;
   }
 
   Status NullKeyName() { return Status::KeyError("pivot key name cannot be 
null"); }
 
-  // The strings backing the string_views should be kept alive by 
PivotWiderOptions.
-  std::unordered_map<std::string_view, PivotWiderKeyIndex> key_name_map_;
+  std::unique_ptr<Grouper> grouper_;
   PivotWiderOptions::UnexpectedKeyBehavior unexpected_key_behavior_;
-  TypedBufferBuilder<PivotWiderKeyIndex> key_indices_buffer_;
-};
-
-template <typename KeyType>
-struct TypedPivotKeyMapper : public BasePivotKeyMapper {
-  Result<span<const PivotWiderKeyIndex>> MapKeys(const ArraySpan& array) 
override {
-    // XXX Should use a faster hashing facility than unordered_map, for example
-    // Grouper or SwissTable.
-    RETURN_NOT_OK(this->key_indices_buffer_.Reserve(array.length));
-    PivotWiderKeyIndex* key_indices = this->key_indices_buffer_.mutable_data();
-    int64_t i = 0;
-    RETURN_NOT_OK(VisitArrayValuesInline<KeyType>(
-        array,
-        [&](std::string_view key_name) {
-          ARROW_ASSIGN_OR_RAISE(key_indices[i], LookupKey(key_name));
-          ++i;
-          return Status::OK();
-        },
-        [&]() { return NullKeyName(); }));
-    return span(key_indices, array.length);
-  }
-
-  Result<PivotWiderKeyIndex> MapKey(const Scalar& scalar) override {
-    if (!scalar.is_valid) {
-      return NullKeyName();
-    }
-    const auto& binary_scalar = checked_cast<const BaseBinaryScalar&>(scalar);
-    return LookupKey(binary_scalar.view());
-  }
+  std::shared_ptr<Buffer> last_group_ids_;
 };
 
 Result<std::unique_ptr<PivotWiderKeyMapper>> PivotWiderKeyMapper::Make(
-    const DataType& key_type, const PivotWiderOptions* options) {
-  std::unique_ptr<PivotWiderKeyMapper> instance;
-
-  auto visit_key_type =
-      [&](auto&& key_type) -> Result<std::unique_ptr<PivotWiderKeyMapper>> {
-    using T = std::decay_t<decltype(key_type)>;
-    // Only binary-like keys are supported for now
-    if constexpr (is_base_binary_type<T>::value) {
-      instance = std::make_unique<TypedPivotKeyMapper<T>>();
-      RETURN_NOT_OK(instance->Init(options));
-      return std::move(instance);
-    }
-    return Status::NotImplemented("Pivot key type: ", key_type);
-  };
-
-  return VisitType(key_type, visit_key_type);
+    const DataType& key_type, const PivotWiderOptions* options, ExecContext* 
ctx) {
+  auto instance = std::make_unique<ConcretePivotWiderKeyMapper>();
+  RETURN_NOT_OK(instance->Init(key_type, options, ctx));
+  return instance;
 }
 
 }  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/kernels/pivot_internal.h 
b/cpp/src/arrow/compute/kernels/pivot_internal.h
index faa808b7a2..9504e9f0a8 100644
--- a/cpp/src/arrow/compute/kernels/pivot_internal.h
+++ b/cpp/src/arrow/compute/kernels/pivot_internal.h
@@ -20,32 +20,29 @@
 #include <cstdint>
 #include <limits>
 #include <memory>
+#include <optional>
 
 #include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/type_fwd.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
-#include "arrow/util/span.h"
 
 namespace arrow::compute::internal {
 
-using PivotWiderKeyIndex = uint8_t;
+using PivotWiderKeyIndex = uint32_t;
 
-constexpr PivotWiderKeyIndex kNullPivotKey =
+constexpr PivotWiderKeyIndex kMaxPivotKey =
     std::numeric_limits<PivotWiderKeyIndex>::max();
-constexpr PivotWiderKeyIndex kMaxPivotKey = kNullPivotKey - 1;
 
 struct PivotWiderKeyMapper {
   virtual ~PivotWiderKeyMapper() = default;
 
-  virtual Status Init(const PivotWiderOptions* options) = 0;
-  virtual Result<::arrow::util::span<const PivotWiderKeyIndex>> MapKeys(
-      const ArraySpan&) = 0;
-  virtual Result<PivotWiderKeyIndex> MapKey(const Scalar&) = 0;
+  virtual Result<std::shared_ptr<ArrayData>> MapKeys(const ArraySpan&) = 0;
+  virtual Result<std::optional<PivotWiderKeyIndex>> MapKey(const Scalar&) = 0;
 
   static Result<std::unique_ptr<PivotWiderKeyMapper>> Make(
-      const DataType& key_type, const PivotWiderOptions* options);
+      const DataType& key_type, const PivotWiderOptions* options, ExecContext* 
ctx);
 };
 
 }  // namespace arrow::compute::internal
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 6bbcac0074..051ed7da4e 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -199,57 +199,57 @@ Aggregations
 Scalar aggregations operate on a (chunked) array or scalar value and reduce
 the input to a single output value.
 
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| Function name      | Arity   | Input types      | Output type            | 
Options class                    | Notes |
-+====================+=========+==================+========================+==================================+=======+
-| all                | Unary   | Boolean          | Scalar Boolean         | 
:struct:`ScalarAggregateOptions` | \(1)  |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| any                | Unary   | Boolean          | Scalar Boolean         | 
:struct:`ScalarAggregateOptions` | \(1)  |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| approximate_median | Unary   | Numeric          | Scalar Float64         | 
:struct:`ScalarAggregateOptions` |       |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| count              | Unary   | Any              | Scalar Int64           | 
:struct:`CountOptions`           | \(2)  |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| count_all          | Nullary |                  | Scalar Int64           |   
                               |       |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| count_distinct     | Unary   | Non-nested types | Scalar Int64           | 
:struct:`CountOptions`           | \(2)  |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| first              | Unary   | Numeric, Binary  | Scalar Input type      | 
:struct:`ScalarAggregateOptions` | \(3) |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| first_last         | Unary   | Numeric, Binary  | Scalar Struct          | 
:struct:`ScalarAggregateOptions` | \(3) |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| index              | Unary   | Any              | Scalar Int64           | 
:struct:`IndexOptions`           | \(4)  |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| kurtosis           | Unary   | Numeric          | Scalar Float64         | 
:struct:`SkewOptions`            | \(11) |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| last               | Unary   | Numeric, Binary  | Scalar Input type      | 
:struct:`ScalarAggregateOptions` | \(3) |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| max                | Unary   | Non-nested types | Scalar Input type      | 
:struct:`ScalarAggregateOptions` |       |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| mean               | Unary   | Numeric          | Scalar Decimal/Float64 | 
:struct:`ScalarAggregateOptions` | \(5)  |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| min                | Unary   | Non-nested types | Scalar Input type      | 
:struct:`ScalarAggregateOptions` |       |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| min_max            | Unary   | Non-nested types | Scalar Struct          | 
:struct:`ScalarAggregateOptions` | \(6)  |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| mode               | Unary   | Numeric          | Struct                 | 
:struct:`ModeOptions`            | \(7)  |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| pivot_wider        | Binary  | Binary, Any      | Scalar Struct          | 
:struct:`PivotWiderOptions`      | \(8)  |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| product            | Unary   | Numeric          | Scalar Numeric         | 
:struct:`ScalarAggregateOptions` | \(9)  |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| quantile           | Unary   | Numeric          | Scalar Numeric         | 
:struct:`QuantileOptions`        | \(10) |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| skew               | Unary   | Numeric          | Scalar Float64         | 
:struct:`SkewOptions`            | \(11) |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| stddev             | Unary   | Numeric          | Scalar Float64         | 
:struct:`VarianceOptions`        | \(11) |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| sum                | Unary   | Numeric          | Scalar Numeric         | 
:struct:`ScalarAggregateOptions` | \(9)  |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| tdigest            | Unary   | Numeric          | Float64                | 
:struct:`TDigestOptions`         | \(12) |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| variance           | Unary   | Numeric          | Scalar Float64         | 
:struct:`VarianceOptions`        | \(11) |
-+--------------------+---------+------------------+------------------------+----------------------------------+-------+
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| Function name      | Arity   | Input types                                   
| Output type            | Options class                    | Notes |
++====================+=========+===============================================+========================+==================================+=======+
+| all                | Unary   | Boolean                                       
| Scalar Boolean         | :struct:`ScalarAggregateOptions` | \(1)  |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| any                | Unary   | Boolean                                       
| Scalar Boolean         | :struct:`ScalarAggregateOptions` | \(1)  |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| approximate_median | Unary   | Numeric                                       
| Scalar Float64         | :struct:`ScalarAggregateOptions` |       |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| count              | Unary   | Any                                           
| Scalar Int64           | :struct:`CountOptions`           | \(2)  |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| count_all          | Nullary |                                               
| Scalar Int64           |                                  |       |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| count_distinct     | Unary   | Non-nested types                              
| Scalar Int64           | :struct:`CountOptions`           | \(2)  |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| first              | Unary   | Numeric, Binary                               
| Scalar Input type      | :struct:`ScalarAggregateOptions` | \(3) |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| first_last         | Unary   | Numeric, Binary                               
| Scalar Struct          | :struct:`ScalarAggregateOptions` | \(3) |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| index              | Unary   | Any                                           
| Scalar Int64           | :struct:`IndexOptions`           | \(4)  |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| kurtosis           | Unary   | Numeric                                       
| Scalar Float64         | :struct:`SkewOptions`            | \(11) |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| last               | Unary   | Numeric, Binary                               
| Scalar Input type      | :struct:`ScalarAggregateOptions` | \(3) |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| max                | Unary   | Non-nested types                              
| Scalar Input type      | :struct:`ScalarAggregateOptions` |       |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| mean               | Unary   | Numeric                                       
| Scalar Decimal/Float64 | :struct:`ScalarAggregateOptions` | \(5)  |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| min                | Unary   | Non-nested types                              
| Scalar Input type      | :struct:`ScalarAggregateOptions` |       |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| min_max            | Unary   | Non-nested types                              
| Scalar Struct          | :struct:`ScalarAggregateOptions` | \(6)  |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| mode               | Unary   | Numeric                                       
| Struct                 | :struct:`ModeOptions`            | \(7)  |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| pivot_wider        | Binary  | Binary, String, Integer (Arg 0); Any (Arg 1)  
| Scalar Struct          | :struct:`PivotWiderOptions`      | \(8)  |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| product            | Unary   | Numeric                                       
| Scalar Numeric         | :struct:`ScalarAggregateOptions` | \(9)  |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| quantile           | Unary   | Numeric                                       
| Scalar Numeric         | :struct:`QuantileOptions`        | \(10) |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| skew               | Unary   | Numeric                                       
| Scalar Float64         | :struct:`SkewOptions`            | \(11) |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| stddev             | Unary   | Numeric                                       
| Scalar Float64         | :struct:`VarianceOptions`        | \(11) |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| sum                | Unary   | Numeric                                       
| Scalar Numeric         | :struct:`ScalarAggregateOptions` | \(9)  |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| tdigest            | Unary   | Numeric                                       
| Float64                | :struct:`TDigestOptions`         | \(12) |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
+| variance           | Unary   | Numeric                                       
| Scalar Float64         | :struct:`VarianceOptions`        | \(11) |
++--------------------+---------+-----------------------------------------------+------------------------+----------------------------------+-------+
 
 * \(1) If null values are taken into account, by setting the
   ScalarAggregateOptions parameter skip_nulls = false, then `Kleene logic`_
@@ -343,57 +343,57 @@ The supported aggregation functions are as follows. All 
function names are
 prefixed with ``hash_``, which differentiates them from their scalar
 equivalents above and reflects how they are implemented internally.
 
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| Function name           | Arity   | Input types                        | 
Output type            | Options class                    | Notes     |
-+=========================+=========+====================================+========================+==================================+===========+
-| hash_all                | Unary   | Boolean                            | 
Boolean                | :struct:`ScalarAggregateOptions` | \(1)      |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_any                | Unary   | Boolean                            | 
Boolean                | :struct:`ScalarAggregateOptions` | \(1)      |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_approximate_median | Unary   | Numeric                            | 
Float64                | :struct:`ScalarAggregateOptions` |           |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_count              | Unary   | Any                                | 
Int64                  | :struct:`CountOptions`           | \(2)      |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_count_all          | Nullary |                                    | 
Int64                  |                                  |           |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_count_distinct     | Unary   | Any                                | 
Int64                  | :struct:`CountOptions`           | \(2)      |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_distinct           | Unary   | Any                                | 
List of input type     | :struct:`CountOptions`           | \(2) \(3) |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_first              | Unary   | Numeric, Binary                    | 
Input type             | :struct:`ScalarAggregateOptions` | \(11)     |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_first_last         | Unary   | Numeric, Binary                    | 
Struct                 | :struct:`ScalarAggregateOptions` | \(11)     |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_kurtosis           | Unary   | Numeric                            | 
Float64                | :struct:`SkewOptions`            | \(9)      |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_last               | Unary   | Numeric, Binary                    | 
Input type             | :struct:`ScalarAggregateOptions` | \(11)     |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_list               | Unary   | Any                                | 
List of input type     |                                  | \(3)      |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_max                | Unary   | Non-nested, non-binary/string-like | 
Input type             | :struct:`ScalarAggregateOptions` |           |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_mean               | Unary   | Numeric                            | 
Decimal/Float64        | :struct:`ScalarAggregateOptions` | \(4)      |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_min                | Unary   | Non-nested, non-binary/string-like | 
Input type             | :struct:`ScalarAggregateOptions` |           |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_min_max            | Unary   | Non-nested types                   | 
Struct                 | :struct:`ScalarAggregateOptions` | \(5)      |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_one                | Unary   | Any                                | 
Input type             |                                  | \(6)      |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_pivot_wider        | Binary  | Binary, Any                        | 
Struct                 | :struct:`PivotWiderOptions`      | \(7)      |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_product            | Unary   | Numeric                            | 
Numeric                | :struct:`ScalarAggregateOptions` | \(8)      |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_skew               | Unary   | Numeric                            | 
Float64                | :struct:`SkewOptions`            | \(9)      |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_stddev             | Unary   | Numeric                            | 
Float64                | :struct:`VarianceOptions`        | \(9)      |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_sum                | Unary   | Numeric                            | 
Numeric                | :struct:`ScalarAggregateOptions` | \(8)      |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_tdigest            | Unary   | Numeric                            | 
FixedSizeList[Float64] | :struct:`TDigestOptions`         | \(10)     |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_variance           | Unary   | Numeric                            | 
Float64                | :struct:`VarianceOptions`        | \(9)      |
-+-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| Function name           | Arity   | Input types                              
    | Output type            | Options class                    | Notes     |
++=========================+=========+==============================================+========================+==================================+===========+
+| hash_all                | Unary   | Boolean                                  
    | Boolean                | :struct:`ScalarAggregateOptions` | \(1)      |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_any                | Unary   | Boolean                                  
    | Boolean                | :struct:`ScalarAggregateOptions` | \(1)      |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_approximate_median | Unary   | Numeric                                  
    | Float64                | :struct:`ScalarAggregateOptions` |           |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_count              | Unary   | Any                                      
    | Int64                  | :struct:`CountOptions`           | \(2)      |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_count_all          | Nullary |                                          
    | Int64                  |                                  |           |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_count_distinct     | Unary   | Any                                      
    | Int64                  | :struct:`CountOptions`           | \(2)      |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_distinct           | Unary   | Any                                      
    | List of input type     | :struct:`CountOptions`           | \(2) \(3) |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_first              | Unary   | Numeric, Binary                          
    | Input type             | :struct:`ScalarAggregateOptions` | \(11)     |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_first_last         | Unary   | Numeric, Binary                          
    | Struct                 | :struct:`ScalarAggregateOptions` | \(11)     |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_kurtosis           | Unary   | Numeric                                  
    | Float64                | :struct:`SkewOptions`            | \(9)      |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_last               | Unary   | Numeric, Binary                          
    | Input type             | :struct:`ScalarAggregateOptions` | \(11)     |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_list               | Unary   | Any                                      
    | List of input type     |                                  | \(3)      |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_max                | Unary   | Non-nested, non-binary/string-like       
    | Input type             | :struct:`ScalarAggregateOptions` |           |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_mean               | Unary   | Numeric                                  
    | Decimal/Float64        | :struct:`ScalarAggregateOptions` | \(4)      |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_min                | Unary   | Non-nested, non-binary/string-like       
    | Input type             | :struct:`ScalarAggregateOptions` |           |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_min_max            | Unary   | Non-nested types                         
    | Struct                 | :struct:`ScalarAggregateOptions` | \(5)      |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_one                | Unary   | Any                                      
    | Input type             |                                  | \(6)      |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_pivot_wider        | Binary  | Binary, String, Integer (Arg 0); Any 
(Arg 1) | Struct                 | :struct:`PivotWiderOptions`      | \(7)      
|
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_product            | Unary   | Numeric                                  
    | Numeric                | :struct:`ScalarAggregateOptions` | \(8)      |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_skew               | Unary   | Numeric                                  
    | Float64                | :struct:`SkewOptions`            | \(9)      |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_stddev             | Unary   | Numeric                                  
    | Float64                | :struct:`VarianceOptions`        | \(9)      |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_sum                | Unary   | Numeric                                  
    | Numeric                | :struct:`ScalarAggregateOptions` | \(8)      |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_tdigest            | Unary   | Numeric                                  
    | FixedSizeList[Float64] | :struct:`TDigestOptions`         | \(10)     |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
+| hash_variance           | Unary   | Numeric                                  
    | Float64                | :struct:`VarianceOptions`        | \(9)      |
++-------------------------+---------+----------------------------------------------+------------------------+----------------------------------+-----------+
 
 * \(1) If null values are taken into account, by setting the
   :member:`ScalarAggregateOptions::skip_nulls` to false, then `Kleene logic`_

Reply via email to