bkietz commented on code in PR #14415:
URL: https://github.com/apache/arrow/pull/14415#discussion_r999574162


##########
cpp/src/arrow/engine/substrait/extension_set.cc:
##########
@@ -678,17 +693,48 @@ static EnumParser<OverflowBehavior> kOverflowParser =
     GetEnumParser<OverflowBehavior>(kOverflowOptions);
 
 template <typename Enum>
-Result<Enum> ParseEnumArg(const SubstraitCall& call, uint32_t arg_index,
+Result<std::optional<Enum>> ParseOption(const SubstraitCall& call,
+                                        std::string_view option_name,
+                                        const EnumParser<Enum>& parser,
+                                        const std::vector<Enum>& 
implemented_options) {
+  std::optional<std::vector<std::string> const*> enum_arg = 
call.GetOption(option_name);
+  if (!enum_arg.has_value()) {
+    return std::nullopt;
+  }
+  std::vector<std::string> const* prefs = *enum_arg;
+  for (const std::string& pref : *prefs) {
+    ARROW_ASSIGN_OR_RAISE(Enum parsed, parser(pref));
+    for (Enum implemented_opt : implemented_options) {
+      if (implemented_opt == parsed) {
+        return parsed;
+      }
+    }
+  }
+  // Prepare error message
+  std::stringstream joined_prefs;
+  for (std::size_t i = 0; i < prefs->size(); i++) {
+    joined_prefs << (*prefs)[i];
+    if (i < prefs->size() - 1) {
+      joined_prefs << ", ";
+    }
+  }
+  return Status::NotImplemented("During a call to a function with id ", 
call.id().uri,
+                                "#", call.id().name, " the plan requested the 
option ",
+                                option_name, " to be one of [", 
joined_prefs.str(),
+                                "] but none of those option values are 
supported");

Review Comment:
   ```suggestion
                                   "] but the only supported options are [",
                                   implemented_options_as_strings_somehow, "]");
   ```
   ?



##########
cpp/src/arrow/engine/substrait/extension_set.cc:
##########
@@ -156,15 +156,33 @@ Result<compute::Expression> 
SubstraitCall::GetValueArg(uint32_t index) const {
   return value_arg_it->second;
 }
 
-bool SubstraitCall::HasValueArg(uint32_t index) const {
+bool SubstraitCall::HasValueArg(int index) const {
   return value_args_.find(index) != value_args_.end();
 }
 
-void SubstraitCall::SetValueArg(uint32_t index, compute::Expression value_arg) 
{
+void SubstraitCall::SetValueArg(int index, compute::Expression value_arg) {
   size_ = std::max(size_, index + 1);
   value_args_[index] = std::move(value_arg);
 }
 
+std::optional<std::vector<std::string> const*> SubstraitCall::GetOption(
+    std::string_view option_name) const {
+  auto opt = options_.find(std::string(option_name));
+  if (opt == options_.end()) {
+    return std::nullopt;
+  }
+  return &opt->second;
+}
+
+void SubstraitCall::SetOption(std::string_view option_name,
+                              const std::vector<std::string_view>& 
option_preferences) {
+  std::vector<std::string> prefs_copy;
+  std::transform(option_preferences.begin(), option_preferences.end(),
+                 std::back_inserter(prefs_copy),
+                 [](std::string_view pref) { return std::string(pref); });
+  options_[std::string(option_name)] = prefs_copy;

Review Comment:
   Nit: copying a temporary vector and using std algorithms
   ```suggestion
     auto& prefs = options_[std::string(option_name)];
     for (std::string_view pref : option_preferences) {
       prefs.emplace_back(pref);
     }
   ```



##########
cpp/src/arrow/engine/substrait/extension_set.cc:
##########
@@ -645,22 +663,19 @@ struct ExtensionIdRegistryImpl : ExtensionIdRegistry {
 };
 
 template <typename Enum>
-using EnumParser = 
std::function<Result<Enum>(std::optional<std::string_view>)>;
+using EnumParser = std::function<Result<Enum>(std::string_view)>;
 
 template <typename Enum>
 EnumParser<Enum> GetEnumParser(const std::vector<std::string>& options) {
   std::unordered_map<std::string, Enum> parse_map;
   for (std::size_t i = 0; i < options.size(); i++) {
     parse_map[options[i]] = static_cast<Enum>(i + 1);
   }
-  return [parse_map](std::optional<std::string_view> enum_val) -> Result<Enum> 
{
-    if (!enum_val) {
-      // Assumes 0 is always kUnspecified in Enum
-      return static_cast<Enum>(0);
-    }
-    auto maybe_parsed = parse_map.find(std::string(*enum_val));
+  return [parse_map](std::string_view enum_val) -> Result<Enum> {

Review Comment:
   ```suggestion
     return [parse_map = std::move(parse_map)](std::string_view enum_val) -> 
Result<Enum> {
   ```



##########
cpp/src/arrow/engine/substrait/extension_set.cc:
##########
@@ -645,22 +663,19 @@ struct ExtensionIdRegistryImpl : ExtensionIdRegistry {
 };
 
 template <typename Enum>
-using EnumParser = 
std::function<Result<Enum>(std::optional<std::string_view>)>;
+using EnumParser = std::function<Result<Enum>(std::string_view)>;
 
 template <typename Enum>
 EnumParser<Enum> GetEnumParser(const std::vector<std::string>& options) {
   std::unordered_map<std::string, Enum> parse_map;
   for (std::size_t i = 0; i < options.size(); i++) {
     parse_map[options[i]] = static_cast<Enum>(i + 1);
   }
-  return [parse_map](std::optional<std::string_view> enum_val) -> Result<Enum> 
{
-    if (!enum_val) {
-      // Assumes 0 is always kUnspecified in Enum
-      return static_cast<Enum>(0);
-    }
-    auto maybe_parsed = parse_map.find(std::string(*enum_val));
+  return [parse_map](std::string_view enum_val) -> Result<Enum> {
+    auto maybe_parsed = parse_map.find(std::string(enum_val));

Review Comment:
   Nit: it's a minor convention that `/mabe_\w+/` indicates a `Result<>` or 
sometimes an `optional<>`
   ```suggestion
       auto it = parse_map.find(std::string(enum_val));
   ```



##########
cpp/src/arrow/engine/substrait/extension_set.cc:
##########
@@ -678,17 +693,48 @@ static EnumParser<OverflowBehavior> kOverflowParser =
     GetEnumParser<OverflowBehavior>(kOverflowOptions);
 
 template <typename Enum>
-Result<Enum> ParseEnumArg(const SubstraitCall& call, uint32_t arg_index,
+Result<std::optional<Enum>> ParseOption(const SubstraitCall& call,
+                                        std::string_view option_name,
+                                        const EnumParser<Enum>& parser,
+                                        const std::vector<Enum>& 
implemented_options) {
+  std::optional<std::vector<std::string> const*> enum_arg = 
call.GetOption(option_name);
+  if (!enum_arg.has_value()) {
+    return std::nullopt;
+  }
+  std::vector<std::string> const* prefs = *enum_arg;
+  for (const std::string& pref : *prefs) {
+    ARROW_ASSIGN_OR_RAISE(Enum parsed, parser(pref));
+    for (Enum implemented_opt : implemented_options) {
+      if (implemented_opt == parsed) {
+        return parsed;
+      }
+    }
+  }
+  // Prepare error message
+  std::stringstream joined_prefs;
+  for (std::size_t i = 0; i < prefs->size(); i++) {
+    joined_prefs << (*prefs)[i];
+    if (i < prefs->size() - 1) {
+      joined_prefs << ", ";
+    }
+  }
+  return Status::NotImplemented("During a call to a function with id ", 
call.id().uri,
+                                "#", call.id().name, " the plan requested the 
option ",
+                                option_name, " to be one of [", 
joined_prefs.str(),

Review Comment:
   Nit: use JoinStrings
   ```suggestion
     return Status::NotImplemented("During a call to a function with id ", 
call.id().uri,
                                   "#", call.id().name, " the plan requested 
the option ",
                                   option_name, " to be one of [",
                                   arrow::internal::JoinStrings(*prefs),
   ```



##########
cpp/src/arrow/engine/substrait/extension_set.cc:
##########
@@ -698,12 +744,15 @@ Result<std::vector<compute::Expression>> 
GetValueArgs(const SubstraitCall& call,
 ExtensionIdRegistry::SubstraitCallToArrow 
DecodeOptionlessOverflowableArithmetic(
     const std::string& function_name) {
   return [function_name](const SubstraitCall& call) -> 
Result<compute::Expression> {
-    ARROW_ASSIGN_OR_RAISE(OverflowBehavior overflow_behavior,
-                          ParseEnumArg(call, 0, kOverflowParser));
+    ARROW_ASSIGN_OR_RAISE(
+        std::optional<OverflowBehavior> maybe_overflow_behavior,
+        ParseOption(call, "overflow", kOverflowParser,
+                    {OverflowBehavior::kSilent, OverflowBehavior::kError}));

Review Comment:
   Not sure how you feel about this, but: we could save some boilerplate by 
having ParseOption just return `implemented_options[0]` as the default in case 
the option is unspecified.
   ```suggestion
           OverflowBehavior overflow_behavior,
           ParseOption(call, "overflow", kOverflowParser,
                       {OverflowBehavior::kSilent, OverflowBehavior::kError}));
   ```



##########
cpp/src/arrow/engine/substrait/expression_internal.cc:
##########
@@ -90,6 +95,9 @@ Result<SubstraitCall> DecodeScalarFunction(
     ARROW_RETURN_NOT_OK(DecodeArg(scalar_fn.arguments(i), 
static_cast<uint32_t>(i), &call,
                                   ext_set, conversion_options));
   }
+  for (int i = 0; i < scalar_fn.options_size(); i++) {
+    DecodeOption(scalar_fn.options(i), &call);

Review Comment:
   ```suggestion
     for (const auto& option : scalar_fn.options()) {
       DecodeOption(option, &call);
   ```



##########
cpp/src/arrow/engine/substrait/extension_set.cc:
##########
@@ -156,15 +156,33 @@ Result<compute::Expression> 
SubstraitCall::GetValueArg(uint32_t index) const {
   return value_arg_it->second;
 }
 
-bool SubstraitCall::HasValueArg(uint32_t index) const {
+bool SubstraitCall::HasValueArg(int index) const {
   return value_args_.find(index) != value_args_.end();
 }
 
-void SubstraitCall::SetValueArg(uint32_t index, compute::Expression value_arg) 
{
+void SubstraitCall::SetValueArg(int index, compute::Expression value_arg) {
   size_ = std::max(size_, index + 1);
   value_args_[index] = std::move(value_arg);
 }
 
+std::optional<std::vector<std::string> const*> SubstraitCall::GetOption(
+    std::string_view option_name) const {
+  auto opt = options_.find(std::string(option_name));
+  if (opt == options_.end()) {
+    return std::nullopt;

Review Comment:
   Nit: this could be simpler
   ```suggestion
   const std::vector<std::string>* SubstraitCall::GetOption(
       std::string_view option_name) const {
     auto opt = options_.find(std::string(option_name));
     if (opt == options_.end()) {
       return nullptr;
   ```



##########
cpp/src/arrow/engine/substrait/plan_internal.cc:
##########
@@ -132,10 +133,29 @@ Result<ExtensionSet> GetExtensionSetFromPlan(const 
substrait::Plan& plan,
                             conversion_options, registry);
 }
 
+namespace {
+
+// FIXME Is there some way to get these from the cmake files?

Review Comment:
   There does not appear to be a way to inspect a substrait clone and get the 
version numbers apart from parsing CHANGELOG.md. You could add `#cmakedefine 
ARROW_SUBSTRAIT_BUILD_VERSION` to config.h.cmake then parse that here, or parse 
it in cmake and add `#cmakedefine ARROW_SUBSTRAIT_BUILD_VERSION_MAJOR` etc to 
config.h.cmake. This is for example how ThirdpartyToolchain.cmake provides a 
`#define` to indicate that jemalloc is vendored 
https://github.com/drin/arrow/blob/dea465396f981f9bd9862e501dc6750ca4fee6d9/cpp/cmake_modules/ThirdpartyToolchain.cmake#L1864-L1865
 -> 
https://github.com/drin/arrow/blob/dea465396f981f9bd9862e501dc6750ca4fee6d9/cpp/src/arrow/util/config.h.cmake#L48



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to