This is an automated email from the ASF dual-hosted git repository.

westonpace pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new a9552d1825 GH-34122: [C++] Allow calling function registry functions 
without requiring a Substrait mapping (#34288)
a9552d1825 is described below

commit a9552d1825a60d2664f164b9686b673e46848120
Author: Weston Pace <[email protected]>
AuthorDate: Thu Feb 23 14:49:57 2023 -0800

    GH-34122: [C++] Allow calling function registry functions without requiring 
a Substrait mapping (#34288)
    
    This introduces the special URI 
`urn:arrow:substrait_simple_extension_function` which can be interpreted as 
"just call a function from the Arrow function registry by name".  It should 
only be used in plans where the consumer is Acero as these plans will not 
likely make any sense elsewhere.
    * Closes: #34122
    
    Authored-by: Weston Pace <[email protected]>
    Signed-off-by: Weston Pace <[email protected]>
---
 cpp/src/arrow/engine/substrait/extension_set.cc | 21 +++++++++++++++++++++
 cpp/src/arrow/engine/substrait/extension_set.h  |  7 +++++++
 cpp/src/arrow/engine/substrait/function_test.cc | 12 ++++++++++++
 3 files changed, 40 insertions(+)

diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc 
b/cpp/src/arrow/engine/substrait/extension_set.cc
index 54ca6fcaf0..6db36e6a7a 100644
--- a/cpp/src/arrow/engine/substrait/extension_set.cc
+++ b/cpp/src/arrow/engine/substrait/extension_set.cc
@@ -189,6 +189,8 @@ void SubstraitCall::SetOption(std::string_view option_name,
   }
 }
 
+bool SubstraitCall::HasOptions() const { return !options_.empty(); }
+
 // A builder used when creating a Substrait plan from an Arrow execution plan. 
 In
 // that situation we do not have a set of anchor values already defined so we 
keep
 // a map of what Ids we have seen.
@@ -337,6 +339,22 @@ const int* GetIndex(const KeyToIndex& key_to_index, const 
Key& key) {
 
 namespace {
 
+ExtensionIdRegistry::SubstraitCallToArrow kSimpleSubstraitToArrow =
+    [](const SubstraitCall& call) -> Result<::arrow::compute::Expression> {
+  std::vector<::arrow::compute::Expression> args;
+  for (int i = 0; i < call.size(); i++) {
+    if (!call.HasValueArg(i)) {
+      return Status::Invalid("Simple function mappings can only use value 
arguments");
+    }
+    if (call.HasOptions()) {
+      return Status::Invalid("Simple function mappings must not specify 
options");
+    }
+    ARROW_ASSIGN_OR_RAISE(::arrow::compute::Expression arg, 
call.GetValueArg(i));
+    args.push_back(std::move(arg));
+  }
+  return ::arrow::compute::call(std::string(call.id().name), std::move(args));
+};
+
 struct ExtensionIdRegistryImpl : ExtensionIdRegistry {
   ExtensionIdRegistryImpl() : parent_(nullptr) {}
   explicit ExtensionIdRegistryImpl(const ExtensionIdRegistry* parent) : 
parent_(parent) {}
@@ -541,6 +559,9 @@ struct ExtensionIdRegistryImpl : ExtensionIdRegistry {
 
   Result<SubstraitCallToArrow> GetSubstraitCallToArrow(
       Id substrait_function_id) const override {
+    if (substrait_function_id.uri == kArrowSimpleExtensionFunctionsUri) {
+      return kSimpleSubstraitToArrow;
+    }
     auto maybe_converter = substrait_to_arrow_.find(substrait_function_id);
     if (maybe_converter == substrait_to_arrow_.end()) {
       if (parent_) {
diff --git a/cpp/src/arrow/engine/substrait/extension_set.h 
b/cpp/src/arrow/engine/substrait/extension_set.h
index 8ec8956757..1f2ef09a6d 100644
--- a/cpp/src/arrow/engine/substrait/extension_set.h
+++ b/cpp/src/arrow/engine/substrait/extension_set.h
@@ -67,6 +67,12 @@ constexpr const char* kSubstraitAggregateGenericFunctionsUri 
=
     "https://github.com/substrait-io/substrait/blob/main/extensions/";
     "functions_aggregate_generic.yaml";
 
+/// If a function call contains this URI then the function is looked up
+/// in the registry directly, all arguments are mapped as value arguments,
+/// and any options are ignored.
+constexpr const char* kArrowSimpleExtensionFunctionsUri =
+    "urn:arrow:substrait_simple_extension_function";
+
 struct ARROW_ENGINE_EXPORT Id {
   std::string_view uri, name;
   bool empty() const { return uri.empty() && name.empty(); }
@@ -136,6 +142,7 @@ class ARROW_ENGINE_EXPORT SubstraitCall {
       std::string_view option_name) const;
   void SetOption(std::string_view option_name,
                  const std::vector<std::string_view>& option_preferences);
+  bool HasOptions() const;
   int size() const { return size_; }
 
  private:
diff --git a/cpp/src/arrow/engine/substrait/function_test.cc 
b/cpp/src/arrow/engine/substrait/function_test.cc
index 3cdc23b792..a0b02bfd76 100644
--- a/cpp/src/arrow/engine/substrait/function_test.cc
+++ b/cpp/src/arrow/engine/substrait/function_test.cc
@@ -189,6 +189,12 @@ TEST(FunctionMapping, ValidCases) {
        {int8(), int8()},
        "-119",
        int8()},
+      {{kArrowSimpleExtensionFunctionsUri, "add_checked"},
+       {"10", "15"},
+       kNoOptions,
+       {int8(), int8()},
+       "25",
+       int8()},
       {{kSubstraitArithmeticFunctionsUri, "subtract"},
        {"-119", "10"},
        {{"overflow", {"SILENT", "ERROR"}}},
@@ -498,6 +504,12 @@ TEST(FunctionMapping, ErrorCases) {
        {int8(), int8()},
        "",
        int8()},
+      {{kArrowSimpleExtensionFunctionsUri, "add_checked"},
+       {"127", "10"},
+       kNoOptions,
+       {int8(), int8()},
+       "",
+       int8()},
       {{kSubstraitArithmeticFunctionsUri, "subtract"},
        {"-119", "10"},
        {{"overflow", {"ERROR", "SILENT"}}},

Reply via email to