bkietz commented on a change in pull request #10511:
URL: https://github.com/apache/arrow/pull/10511#discussion_r660923340



##########
File path: python/pyarrow/_compute.pyx
##########
@@ -541,17 +601,19 @@ cdef class FunctionOptions(_Weakrefable):
 
 cdef class _CastOptions(FunctionOptions):
     cdef:
-        unique_ptr[CCastOptions] options
+        CCastOptions* options

Review comment:
       I think this member isn't necessary anymore

##########
File path: cpp/src/arrow/compute/function_internal.h
##########
@@ -0,0 +1,577 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_nested.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/function.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/reflection_internal.h"
+#include "arrow/util/string.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+struct Scalar;
+struct StructScalar;
+using ::arrow::internal::checked_cast;
+namespace compute {
+namespace internal {
+
+class GenericOptionsType : public FunctionOptionsType {
+ public:
+  Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const 
override;
+  Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const Buffer& buffer) const override;
+  virtual Status ToStructScalar(const FunctionOptions& options,
+                                std::vector<std::string>* field_names,
+                                std::vector<std::shared_ptr<Scalar>>* values) 
const = 0;
+  virtual Result<std::unique_ptr<FunctionOptions>> FromStructScalar(
+      const StructScalar& scalar) const = 0;
+};
+
+ARROW_EXPORT
+Result<std::shared_ptr<StructScalar>> FunctionOptionsToStructScalar(
+    const FunctionOptions&);
+ARROW_EXPORT
+Result<std::unique_ptr<FunctionOptions>> FunctionOptionsFromStructScalar(
+    const StructScalar&);
+
+template <typename Enum>
+struct EnumTraits;
+
+template <typename Enum>
+struct BasicEnumTraits {
+  using CType = typename std::underlying_type<Enum>::type;
+  using Type = typename CTypeTraits<CType>::ArrowType;
+};
+template <>
+struct EnumTraits<SortOrder> : BasicEnumTraits<SortOrder> {
+  static std::string name() { return "SortOrder"; }
+  static std::array<SortOrder, 2> values() {
+    return {SortOrder::Ascending, SortOrder::Descending};
+  }
+};
+
+template <typename Enum, typename CType = typename 
std::underlying_type<Enum>::type>
+Result<Enum> ValidateEnumValue(CType raw) {
+  for (auto valid : EnumTraits<Enum>::values()) {
+    if (raw == static_cast<CType>(valid)) {
+      return static_cast<Enum>(raw);
+    }
+  }
+  return Status::Invalid("Invalid value for ", EnumTraits<Enum>::name(), ": ", 
raw);
+}
+
+template <typename T>
+static inline std::string GenericToString(const T& value) {
+  std::stringstream ss;
+  ss << value;
+  return ss.str();
+}

Review comment:
       Nit: could you space these out some?
   ```suggestion
   }
   
   ```

##########
File path: cpp/src/arrow/compute/function_test.cc
##########
@@ -21,16 +21,110 @@
 
 #include <gtest/gtest.h>
 
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/api_scalar.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/cast.h"
 #include "arrow/compute/function.h"
 #include "arrow/compute/kernel.h"
 #include "arrow/datum.h"
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
+#include "arrow/util/key_value_metadata.h"
 
 namespace arrow {
 namespace compute {
 
+TEST(FunctionOptions, Equality) {
+  std::vector<std::shared_ptr<FunctionOptions>> options;
+  options.emplace_back(new ScalarAggregateOptions());
+  options.emplace_back(new ScalarAggregateOptions(/*skip_nulls=*/false, 
/*min_count=*/1));
+  options.emplace_back(new ModeOptions());
+  options.emplace_back(new ModeOptions(/*n=*/2));
+  options.emplace_back(new VarianceOptions());
+  options.emplace_back(new VarianceOptions(/*ddof=*/2));
+  options.emplace_back(new QuantileOptions());
+  options.emplace_back(
+      new QuantileOptions(/*q=*/0.75, 
QuantileOptions::Interpolation::MIDPOINT));
+  options.emplace_back(new TDigestOptions());
+  options.emplace_back(
+      new TDigestOptions(/*q=*/0.75, /*delta=*/50, /*buffer_size=*/1024));
+  options.emplace_back(new IndexOptions(ScalarFromJSON(int64(), "16")));
+  options.emplace_back(new IndexOptions(ScalarFromJSON(boolean(), "true")));
+  options.emplace_back(new IndexOptions(ScalarFromJSON(boolean(), "null")));
+  options.emplace_back(new ElementWiseAggregateOptions());
+  options.emplace_back(new ElementWiseAggregateOptions(/*skip_nulls=*/false));
+  options.emplace_back(new JoinOptions());
+  options.emplace_back(new JoinOptions(JoinOptions::REPLACE, "replacement"));
+  options.emplace_back(new MatchSubstringOptions("pattern"));
+  options.emplace_back(new MatchSubstringOptions("pattern", 
/*ignore_case=*/true));
+  options.emplace_back(new SplitOptions());
+  options.emplace_back(new SplitOptions(/*max_splits=*/2, /*reverse=*/true));
+  options.emplace_back(new SplitPatternOptions("pattern"));
+  options.emplace_back(
+      new SplitPatternOptions("pattern", /*max_splits=*/2, /*reverse=*/true));
+  options.emplace_back(new ReplaceSubstringOptions("pattern", "replacement"));
+  options.emplace_back(
+      new ReplaceSubstringOptions("pattern", "replacement", 
/*max_replacements=*/2));
+  options.emplace_back(new ReplaceSliceOptions(0, 1, "foo"));
+  options.emplace_back(new ReplaceSliceOptions(1, -1, "bar"));
+  options.emplace_back(new ExtractRegexOptions("pattern"));
+  options.emplace_back(new ExtractRegexOptions("pattern2"));
+  options.emplace_back(new SetLookupOptions(ArrayFromJSON(int64(), "[1, 2, 3, 
4]")));
+  options.emplace_back(new SetLookupOptions(ArrayFromJSON(boolean(), "[true, 
false]")));
+  options.emplace_back(new StrptimeOptions("%Y", TimeUnit::type::MILLI));
+  options.emplace_back(new StrptimeOptions("%Y", TimeUnit::type::NANO));
+  options.emplace_back(new PadOptions(5, " "));
+  options.emplace_back(new PadOptions(10, "A"));
+  options.emplace_back(new TrimOptions(" "));
+  options.emplace_back(new TrimOptions("abc"));
+  options.emplace_back(new SliceOptions(/*start=*/1));
+  options.emplace_back(new SliceOptions(/*start=*/1, /*stop=*/-5, 
/*step=*/-2));
+  // N.B. we never actually use field_nullability or field_metadata in Arrow
+  options.emplace_back(new ProjectOptions({"col1"}, {true}, {}));
+  options.emplace_back(new ProjectOptions({"col1"}, {false}, {}));
+  options.emplace_back(
+      new ProjectOptions({"col1"}, {false}, {key_value_metadata({{"key", 
"val"}})}));
+  options.emplace_back(new CastOptions(CastOptions::Safe(boolean())));
+  options.emplace_back(new CastOptions(CastOptions::Unsafe(int64())));
+  options.emplace_back(new FilterOptions());
+  options.emplace_back(
+      new FilterOptions(FilterOptions::NullSelectionBehavior::EMIT_NULL));
+  options.emplace_back(new TakeOptions());
+  options.emplace_back(new TakeOptions(/*boundscheck=*/false));
+  options.emplace_back(new DictionaryEncodeOptions());
+  options.emplace_back(
+      new 
DictionaryEncodeOptions(DictionaryEncodeOptions::NullEncodingBehavior::ENCODE));
+  options.emplace_back(new ArraySortOptions());
+  options.emplace_back(new ArraySortOptions(SortOrder::Descending));
+  options.emplace_back(new SortOptions());
+  options.emplace_back(new SortOptions({SortKey("key", 
SortOrder::Ascending)}));
+  options.emplace_back(new SortOptions({SortKey("key", 
SortOrder::Descending)}));

Review comment:
       Just to ensure we have at least one `std::vector` with more than 1 
element:
   ```suggestion
     options.emplace_back(new SortOptions({SortKey("key", 
SortOrder::Descending),
                                           SortKey("val", 
SortOrder::Descending)}));
   ```

##########
File path: cpp/src/arrow/util/reflection_internal.h
##########
@@ -99,6 +99,11 @@ struct PropertyTuple {
     ForEachTupleMember(props_, fn);
   }
 
+  template <template <typename...> class T, typename... Args>
+  struct Apply {
+    typedef T<Args..., Properties...> type;
+  };
+

Review comment:
       ```suggestion
   ```

##########
File path: cpp/src/arrow/compute/function_internal.h
##########
@@ -0,0 +1,577 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_nested.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/function.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/reflection_internal.h"
+#include "arrow/util/string.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+struct Scalar;
+struct StructScalar;
+using ::arrow::internal::checked_cast;
+namespace compute {
+namespace internal {
+
+class GenericOptionsType : public FunctionOptionsType {
+ public:
+  Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const 
override;
+  Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const Buffer& buffer) const override;
+  virtual Status ToStructScalar(const FunctionOptions& options,
+                                std::vector<std::string>* field_names,
+                                std::vector<std::shared_ptr<Scalar>>* values) 
const = 0;
+  virtual Result<std::unique_ptr<FunctionOptions>> FromStructScalar(
+      const StructScalar& scalar) const = 0;
+};
+
+ARROW_EXPORT
+Result<std::shared_ptr<StructScalar>> FunctionOptionsToStructScalar(
+    const FunctionOptions&);
+ARROW_EXPORT
+Result<std::unique_ptr<FunctionOptions>> FunctionOptionsFromStructScalar(
+    const StructScalar&);
+
+template <typename Enum>
+struct EnumTraits;
+
+template <typename Enum>
+struct BasicEnumTraits {
+  using CType = typename std::underlying_type<Enum>::type;
+  using Type = typename CTypeTraits<CType>::ArrowType;
+};
+template <>
+struct EnumTraits<SortOrder> : BasicEnumTraits<SortOrder> {
+  static std::string name() { return "SortOrder"; }
+  static std::array<SortOrder, 2> values() {
+    return {SortOrder::Ascending, SortOrder::Descending};
+  }
+};
+
+template <typename Enum, typename CType = typename 
std::underlying_type<Enum>::type>
+Result<Enum> ValidateEnumValue(CType raw) {
+  for (auto valid : EnumTraits<Enum>::values()) {
+    if (raw == static_cast<CType>(valid)) {
+      return static_cast<Enum>(raw);
+    }
+  }
+  return Status::Invalid("Invalid value for ", EnumTraits<Enum>::name(), ": ", 
raw);
+}
+
+template <typename T>
+static inline std::string GenericToString(const T& value) {
+  std::stringstream ss;
+  ss << value;
+  return ss.str();
+}
+static inline std::string GenericToString(bool value) { return value ? "true" 
: "false"; }
+static inline std::string GenericToString(const std::string& value) {
+  std::stringstream ss;
+  ss << '"' << value << '"';
+  return ss.str();
+}
+template <typename T>
+static inline std::string GenericToString(const std::shared_ptr<T>& value) {
+  std::stringstream ss;
+  return value ? value->ToString() : "<NULLPTR>";
+}
+static inline std::string GenericToString(const std::shared_ptr<Scalar>& 
value) {
+  std::stringstream ss;
+  ss << value->type->ToString() << ":" << value->ToString();
+  return ss.str();
+}
+static inline std::string GenericToString(
+    const std::shared_ptr<const KeyValueMetadata>& value) {
+  std::stringstream ss;
+  ss << "KeyValueMetadata{";
+  if (value) {
+    bool first = true;
+    for (const auto& pair : value->sorted_pairs()) {
+      if (!first) ss << ", ";
+      first = false;
+      ss << pair.first << ':' << pair.second;
+    }
+  }
+  ss << '}';
+  return ss.str();
+}
+static inline std::string GenericToString(const Datum& value) {
+  switch (value.kind()) {
+    case Datum::NONE:
+      return "<NULL DATUM>";
+    case Datum::SCALAR:
+      return GenericToString(value.scalar());
+    case Datum::ARRAY: {
+      std::stringstream ss;
+      ss << value.type()->ToString() << ':' << value.make_array()->ToString();
+      return ss.str();
+    }
+    case Datum::CHUNKED_ARRAY:
+    case Datum::RECORD_BATCH:
+    case Datum::TABLE:
+    case Datum::COLLECTION:
+      return value.ToString();
+  }
+  return value.ToString();
+}
+template <typename T>
+static inline std::string GenericToString(const std::vector<T>& value) {
+  std::stringstream ss;
+  ss << "[";
+  bool first = true;
+  // Don't use range-for with auto& to avoid Clang -Wrange-loop-analysis
+  for (auto it = value.begin(); it != value.end(); it++) {
+    if (!first) ss << ", ";
+    first = false;
+    ss << GenericToString(*it);
+  }
+  ss << ']';
+  return ss.str();
+}
+static inline std::string GenericToString(SortOrder value) {
+  switch (value) {
+    case SortOrder::Ascending:
+      return "Ascending";
+    case SortOrder::Descending:
+      return "Descending";
+  }
+  return "<INVALID SORT ORDER>";
+}
+static inline std::string GenericToString(const std::vector<SortKey>& value) {
+  std::stringstream ss;
+  ss << '[';
+  bool first = true;
+  for (const auto& key : value) {
+    if (!first) {
+      ss << ", ";
+    }
+    first = false;
+    ss << key.ToString();
+  }
+  ss << ']';
+  return ss.str();
+}
+
+template <typename T>
+static inline bool GenericEquals(const T& left, const T& right) {
+  return left == right;
+}
+template <typename T>
+static inline bool GenericEquals(const std::shared_ptr<T>& left,
+                                 const std::shared_ptr<T>& right) {
+  if (left && right) {
+    return left->Equals(*right);
+  }
+  return left == right;
+}
+static inline bool IsEmpty(const std::shared_ptr<const KeyValueMetadata>& 
meta) {
+  return !meta || meta->size() == 0;
+}
+static inline bool GenericEquals(const std::shared_ptr<const 
KeyValueMetadata>& left,
+                                 const std::shared_ptr<const 
KeyValueMetadata>& right) {
+  // Special case since null metadata is considered equivalent to empty
+  if (IsEmpty(left) || IsEmpty(right)) {
+    return IsEmpty(left) && IsEmpty(right);
+  }
+  return left->Equals(*right);
+}
+template <typename T>
+static inline bool GenericEquals(const std::vector<T>& left,
+                                 const std::vector<T>& right) {
+  if (left.size() != right.size()) return false;
+  for (size_t i = 0; i < left.size(); i++) {
+    if (!GenericEquals(left[i], right[i])) return false;
+  }
+  return true;
+}
+
+template <typename T>
+static inline decltype(TypeTraits<typename 
CTypeTraits<T>::ArrowType>::type_singleton())
+GenericTypeSingleton() {
+  return TypeTraits<typename CTypeTraits<T>::ArrowType>::type_singleton();
+}
+template <typename T>
+static inline enable_if_same<T, std::shared_ptr<const KeyValueMetadata>,
+                             std::shared_ptr<DataType>>
+GenericTypeSingleton() {
+  return map(binary(), binary());
+}
+template <typename T>
+static inline decltype(TypeTraits<typename 
EnumTraits<T>::Type>::type_singleton())
+GenericTypeSingleton() {
+  return TypeTraits<typename EnumTraits<T>::Type>::type_singleton();
+}
+template <typename T>
+static inline enable_if_same<T, SortKey, std::shared_ptr<DataType>>
+GenericTypeSingleton() {
+  std::vector<std::shared_ptr<Field>> fields;
+  fields.emplace_back(new Field("name", GenericTypeSingleton<std::string>()));
+  fields.emplace_back(new Field("order", GenericTypeSingleton<SortOrder>()));
+  return std::make_shared<StructType>(std::move(fields));
+}
+
+// N.B. ordering of overloads is relatively fragile
+template <typename T>
+static inline Result<decltype(MakeScalar(std::declval<T>()))> GenericToScalar(
+    const T& value) {
+  return MakeScalar(value);
+}
+template <typename T,
+          typename Enable = 
arrow::internal::void_t<decltype(EnumTraits<T>::name())>>
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const T value) {
+  using CType = typename EnumTraits<T>::CType;
+  return GenericToScalar(static_cast<CType>(value));
+}
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const SortKey& 
value) {
+  ARROW_ASSIGN_OR_RAISE(auto name, GenericToScalar(value.name));
+  ARROW_ASSIGN_OR_RAISE(auto order, GenericToScalar(value.order));
+  return StructScalar::Make({name, order}, {"name", "order"});
+}
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::shared_ptr<const KeyValueMetadata>& value) {
+  auto ty = GenericTypeSingleton<std::shared_ptr<const KeyValueMetadata>>();
+  std::unique_ptr<ArrayBuilder> builder;
+  RETURN_NOT_OK(MakeBuilder(default_memory_pool(), ty, &builder));
+  auto* map_builder = checked_cast<MapBuilder*>(builder.get());
+  auto* key_builder = checked_cast<BinaryBuilder*>(map_builder->key_builder());
+  auto* item_builder = 
checked_cast<BinaryBuilder*>(map_builder->item_builder());
+  RETURN_NOT_OK(map_builder->Append());
+  if (value) {
+    RETURN_NOT_OK(key_builder->AppendValues(value->keys()));
+    RETURN_NOT_OK(item_builder->AppendValues(value->values()));
+  }
+  std::shared_ptr<Array> arr;
+  RETURN_NOT_OK(map_builder->Finish(&arr));
+  return arr->GetScalar(0);
+}
+template <typename T>
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::vector<T>& value) {
+  std::shared_ptr<DataType> type = GenericTypeSingleton<T>();
+  std::vector<std::shared_ptr<Scalar>> scalars;
+  scalars.reserve(value.size());
+  // Don't use range-for with auto& to avoid Clang -Wrange-loop-analysis
+  for (auto it = value.begin(); it != value.end(); it++) {
+    ARROW_ASSIGN_OR_RAISE(auto scalar, GenericToScalar(*it));
+    scalars.push_back(std::move(scalar));
+  }
+  std::unique_ptr<ArrayBuilder> builder;
+  RETURN_NOT_OK(
+      MakeBuilder(default_memory_pool(), type ? type : scalars[0]->type, 
&builder));
+  RETURN_NOT_OK(builder->AppendScalars(scalars));
+  std::shared_ptr<Array> out;
+  RETURN_NOT_OK(builder->Finish(&out));
+  return std::make_shared<ListScalar>(std::move(out));
+}
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::shared_ptr<DataType>& value) {
+  if (!value) {
+    return Status::Invalid("shared_ptr<DataType> is nullptr");
+  }
+  return MakeNullScalar(value);
+}
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::shared_ptr<Scalar>& value) {
+  return value;
+}
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
+    const std::shared_ptr<Array>& value) {
+  return std::make_shared<ListScalar>(value);
+}
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const Datum& 
value) {
+  // TODO(ARROW-9434): store in a union instead.
+  switch (value.kind()) {
+    case Datum::ARRAY:
+      return GenericToScalar(value.make_array());
+      break;
+    default:
+      return Status::NotImplemented("Cannot serialize Datum kind ", 
value.kind());
+  }
+}
+
+template <typename T>
+static inline enable_if_primitive_ctype<typename CTypeTraits<T>::ArrowType, 
Result<T>>
+GenericFromScalar(const std::shared_ptr<Scalar>& value) {
+  using ArrowType = typename CTypeTraits<T>::ArrowType;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+  if (value->type->id() != ArrowType::type_id) {
+    return Status::Invalid("Expected type ", ArrowType::type_id, " but got ",
+                           value->type->ToString());
+  }
+  const auto& holder = checked_cast<const ScalarType&>(*value);
+  if (!holder.is_valid) return Status::Invalid("Got null scalar");
+  return holder.value;
+}
+template <typename T>
+static inline enable_if_primitive_ctype<typename EnumTraits<T>::Type, 
Result<T>>
+GenericFromScalar(const std::shared_ptr<Scalar>& value) {
+  ARROW_ASSIGN_OR_RAISE(auto raw_val,
+                        GenericFromScalar<typename 
EnumTraits<T>::CType>(value));
+  return ValidateEnumValue<T>(raw_val);
+}
+
+template <typename T, typename U>
+using enable_if_same_result = enable_if_same<T, U, Result<T>>;
+
+template <typename T>
+static inline enable_if_same_result<T, std::string> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  if (!is_base_binary_like(value->type->id())) {
+    return Status::Invalid("Expected binary-like type but got ", 
value->type->ToString());
+  }
+  const auto& holder = checked_cast<const BaseBinaryScalar&>(*value);
+  if (!holder.is_valid) return Status::Invalid("Got null scalar");
+  return holder.value->ToString();
+}
+template <typename T>
+static inline enable_if_same_result<T, SortKey> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  if (value->type->id() != Type::STRUCT) {
+    return Status::Invalid("Expected type STRUCT but got ", value->type->id());
+  }
+  if (!value->is_valid) return Status::Invalid("Got null scalar");
+  const auto& holder = checked_cast<const StructScalar&>(*value);
+  ARROW_ASSIGN_OR_RAISE(auto name_holder, holder.field("name"));
+  ARROW_ASSIGN_OR_RAISE(auto order_holder, holder.field("order"));
+  ARROW_ASSIGN_OR_RAISE(auto name, 
GenericFromScalar<std::string>(name_holder));
+  ARROW_ASSIGN_OR_RAISE(auto order, 
GenericFromScalar<SortOrder>(order_holder));
+  return SortKey{std::move(name), order};
+}
+
+template <typename T>
+static inline enable_if_same_result<T, std::shared_ptr<DataType>> 
GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  return value->type;
+}
+template <typename T>
+static inline enable_if_same_result<T, std::shared_ptr<Scalar>> 
GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  return value;
+}
+template <typename T>
+static inline enable_if_same_result<T, std::shared_ptr<const KeyValueMetadata>>
+GenericFromScalar(const std::shared_ptr<Scalar>& value) {
+  auto ty = GenericTypeSingleton<std::shared_ptr<const KeyValueMetadata>>();
+  if (!value->type->Equals(ty)) {
+    return Status::Invalid("Expected ", ty->ToString(), " but got ",
+                           value->type->ToString());
+  }
+  const auto& holder = checked_cast<const MapScalar&>(*value);
+  std::vector<std::string> keys;
+  std::vector<std::string> values;
+  const auto& list = checked_cast<const StructArray&>(*holder.value);
+  const auto& key_arr = checked_cast<const BinaryArray&>(*list.field(0));
+  const auto& value_arr = checked_cast<const BinaryArray&>(*list.field(1));
+  for (int64_t i = 0; i < list.length(); i++) {
+    keys.push_back(key_arr.GetString(i));
+    values.push_back(value_arr.GetString(i));
+  }
+  return key_value_metadata(std::move(keys), std::move(values));
+}
+template <typename T>
+static inline enable_if_same_result<T, Datum> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  if (value->type->id() == Type::LIST) {
+    const auto& holder = checked_cast<const BaseListScalar&>(*value);
+    return holder.value;
+  }
+  // TODO(ARROW-9434): handle other possible datum kinds by looking for a union
+  return Status::Invalid("Cannot deserialize Datum from ", value->ToString());
+}
+
+template <typename T>
+static enable_if_same<typename CTypeTraits<T>::ArrowType, ListType, Result<T>>
+GenericFromScalar(const std::shared_ptr<Scalar>& value) {
+  using ValueType = typename T::value_type;
+  if (value->type->id() != Type::LIST) {
+    return Status::Invalid("Expected type LIST but got ", 
value->type->ToString());
+  }
+  const auto& holder = checked_cast<const BaseListScalar&>(*value);
+  if (!holder.is_valid) return Status::Invalid("Got null scalar");
+  std::vector<ValueType> result;
+  for (int i = 0; i < holder.value->length(); i++) {
+    ARROW_ASSIGN_OR_RAISE(auto scalar, holder.value->GetScalar(i));
+    ARROW_ASSIGN_OR_RAISE(auto v, GenericFromScalar<ValueType>(scalar));
+    result.push_back(std::move(v));
+  }
+  return result;
+}
+
+template <typename Options>
+struct StringifyImpl {
+  template <typename Tuple>
+  StringifyImpl(const Options& obj, const Tuple& props)
+      : obj_(obj), members_(props.size()) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t i) {
+    std::stringstream ss;
+    ss << prop.name() << '=' << GenericToString(prop.get(obj_));
+    members_[i] = ss.str();
+  }
+
+  std::string Finish() {
+    return "{" + arrow::internal::JoinStrings(members_, ", ") + "}";
+  }
+
+  const Options& obj_;
+  std::vector<std::string> members_;
+};
+
+template <typename Options>
+struct CompareImpl {
+  template <typename Tuple>
+  CompareImpl(const Options& l, const Options& r, const Tuple& props)
+      : left_(l), right_(r) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t) {
+    equal_ &= GenericEquals(prop.get(left_), prop.get(right_));
+  }
+
+  const Options& left_;
+  const Options& right_;
+  bool equal_ = true;
+};
+
+template <typename Options>
+struct ToStructScalarImpl {
+  template <typename Tuple>
+  ToStructScalarImpl(const Options& obj, const Tuple& props,
+                     std::vector<std::string>* field_names,
+                     std::vector<std::shared_ptr<Scalar>>* values)
+      : obj_(obj), field_names_(field_names), values_(values) {
+    props.ForEach(*this);
+  }
+
+  template <typename Property>
+  void operator()(const Property& prop, size_t) {
+    if (!status_.ok()) return;
+    auto result = GenericToScalar(prop.get(obj_));
+    if (!result.ok()) {
+      status_ = result.status().WithMessage("Cannot serialize field ", 
prop.name(),

Review comment:
       ```suggestion
         status_ = result.status().WithMessage("Could not serialize field ", 
prop.name(),
   ```

##########
File path: cpp/src/arrow/compute/function.h
##########
@@ -39,12 +40,47 @@ namespace compute {
 ///
 /// @{
 
+/// \brief Extension point for defining options outside libarrow (but
+/// still within this project).
+class ARROW_EXPORT FunctionOptionsType {
+ public:
+  virtual ~FunctionOptionsType() = default;
+
+  virtual const char* type_name() const = 0;
+  virtual std::string Stringify(const FunctionOptions&) const = 0;
+  virtual bool Compare(const FunctionOptions&, const FunctionOptions&) const = 
0;
+  virtual Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) 
const;
+  virtual Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const Buffer& buffer) const;
+};
+
 /// \brief Base class for specifying options configuring a function's behavior,
 /// such as error handling.
-struct ARROW_EXPORT FunctionOptions {
+class ARROW_EXPORT FunctionOptions : public 
util::EqualityComparable<FunctionOptions> {
+ public:
   virtual ~FunctionOptions() = default;
+
+  const FunctionOptionsType* options_type() const { return options_type_; }
+  const char* type_name() const { return options_type()->type_name(); }
+
+  bool Equals(const FunctionOptions& other) const;
+  using util::EqualityComparable<FunctionOptions>::Equals;
+  using util::EqualityComparable<FunctionOptions>::operator==;
+  using util::EqualityComparable<FunctionOptions>::operator!=;
+  std::string ToString() const;
+  /// \brief Serialize an options struct to a buffer.
+  Result<std::shared_ptr<Buffer>> Serialize() const;
+  /// \brief Deserialize an options struct from a buffer.

Review comment:
       ```suggestion
     /// \brief Deserialize an options struct from a buffer.
     /// Note: this will only look for `type_name` in the default 
FunctionRegistry;
     /// to use a custom FunctionRegistry, look up the FunctionOptionsType then
     /// call FunctionOptionsType::Deserialize()
   ```

##########
File path: cpp/src/arrow/compute/function_internal.h
##########
@@ -0,0 +1,577 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_nested.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/function.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/reflection_internal.h"
+#include "arrow/util/string.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+struct Scalar;
+struct StructScalar;
+using ::arrow::internal::checked_cast;
+namespace compute {
+namespace internal {
+
+class GenericOptionsType : public FunctionOptionsType {
+ public:
+  Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const 
override;
+  Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const Buffer& buffer) const override;
+  virtual Status ToStructScalar(const FunctionOptions& options,
+                                std::vector<std::string>* field_names,
+                                std::vector<std::shared_ptr<Scalar>>* values) 
const = 0;
+  virtual Result<std::unique_ptr<FunctionOptions>> FromStructScalar(
+      const StructScalar& scalar) const = 0;
+};
+
+ARROW_EXPORT
+Result<std::shared_ptr<StructScalar>> FunctionOptionsToStructScalar(
+    const FunctionOptions&);
+ARROW_EXPORT
+Result<std::unique_ptr<FunctionOptions>> FunctionOptionsFromStructScalar(
+    const StructScalar&);
+
+template <typename Enum>
+struct EnumTraits;
+
+template <typename Enum>
+struct BasicEnumTraits {
+  using CType = typename std::underlying_type<Enum>::type;
+  using Type = typename CTypeTraits<CType>::ArrowType;
+};
+template <>
+struct EnumTraits<SortOrder> : BasicEnumTraits<SortOrder> {
+  static std::string name() { return "SortOrder"; }
+  static std::array<SortOrder, 2> values() {
+    return {SortOrder::Ascending, SortOrder::Descending};
+  }
+};

Review comment:
       Not necessary, but maybe a bit nicer:
   
   ```suggestion
   template <typename Enum, Enum... Values>
   struct BasicEnumTraits {
     using CType = typename std::underlying_type<Enum>::type;
     using Type = typename CTypeTraits<CType>::ArrowType;
     static std::array<Enum, sizeof...(Values)> values() { return {Values...}; }
   };
   
   template <>
   struct EnumTraits<SortOrder>
       : BasicEnumTraits<SortOrder, SortOrder::Ascending, 
SortOrder::Descending> {
     static std::string name() { return "SortOrder"; }
   };
   ```

##########
File path: cpp/src/arrow/compute/function_internal.h
##########
@@ -0,0 +1,577 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_nested.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/function.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/reflection_internal.h"
+#include "arrow/util/string.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+struct Scalar;
+struct StructScalar;
+using ::arrow::internal::checked_cast;
+namespace compute {
+namespace internal {
+
+class GenericOptionsType : public FunctionOptionsType {
+ public:
+  Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const 
override;
+  Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const Buffer& buffer) const override;
+  virtual Status ToStructScalar(const FunctionOptions& options,
+                                std::vector<std::string>* field_names,
+                                std::vector<std::shared_ptr<Scalar>>* values) 
const = 0;
+  virtual Result<std::unique_ptr<FunctionOptions>> FromStructScalar(
+      const StructScalar& scalar) const = 0;
+};
+
+ARROW_EXPORT
+Result<std::shared_ptr<StructScalar>> FunctionOptionsToStructScalar(
+    const FunctionOptions&);
+ARROW_EXPORT
+Result<std::unique_ptr<FunctionOptions>> FunctionOptionsFromStructScalar(
+    const StructScalar&);
+
+template <typename Enum>
+struct EnumTraits;
+
+template <typename Enum>
+struct BasicEnumTraits {
+  using CType = typename std::underlying_type<Enum>::type;
+  using Type = typename CTypeTraits<CType>::ArrowType;
+};
+template <>
+struct EnumTraits<SortOrder> : BasicEnumTraits<SortOrder> {
+  static std::string name() { return "SortOrder"; }
+  static std::array<SortOrder, 2> values() {
+    return {SortOrder::Ascending, SortOrder::Descending};
+  }
+};

Review comment:
       We could also use nameof here to get the strings "Ascending", 
"Descending"

##########
File path: cpp/src/arrow/compute/function_internal.h
##########
@@ -0,0 +1,577 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "arrow/array/builder_base.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_nested.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/function.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/reflection_internal.h"
+#include "arrow/util/string.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+struct Scalar;
+struct StructScalar;
+using ::arrow::internal::checked_cast;
+namespace compute {
+namespace internal {
+
+class GenericOptionsType : public FunctionOptionsType {
+ public:
+  Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const 
override;
+  Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const Buffer& buffer) const override;
+  virtual Status ToStructScalar(const FunctionOptions& options,
+                                std::vector<std::string>* field_names,
+                                std::vector<std::shared_ptr<Scalar>>* values) 
const = 0;
+  virtual Result<std::unique_ptr<FunctionOptions>> FromStructScalar(
+      const StructScalar& scalar) const = 0;
+};
+
+ARROW_EXPORT
+Result<std::shared_ptr<StructScalar>> FunctionOptionsToStructScalar(
+    const FunctionOptions&);
+ARROW_EXPORT
+Result<std::unique_ptr<FunctionOptions>> FunctionOptionsFromStructScalar(
+    const StructScalar&);
+
+template <typename Enum>
+struct EnumTraits;

Review comment:
       please put this in `reflection_internal.h`




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to