This is an automated email from the ASF dual-hosted git repository.
westonpace pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new a9552d1825 GH-34122: [C++] Allow calling function registry functions
without requiring a Substrait mapping (#34288)
a9552d1825 is described below
commit a9552d1825a60d2664f164b9686b673e46848120
Author: Weston Pace <[email protected]>
AuthorDate: Thu Feb 23 14:49:57 2023 -0800
GH-34122: [C++] Allow calling function registry functions without requiring
a Substrait mapping (#34288)
This introduces the special URI
`urn:arrow:substrait_simple_extension_function` which can be interpreted as
"just call a function from the Arrow function registry by name". It should
only be used in plans where the consumer is Acero as these plans will not
likely make any sense elsewhere.
* Closes: #34122
Authored-by: Weston Pace <[email protected]>
Signed-off-by: Weston Pace <[email protected]>
---
cpp/src/arrow/engine/substrait/extension_set.cc | 21 +++++++++++++++++++++
cpp/src/arrow/engine/substrait/extension_set.h | 7 +++++++
cpp/src/arrow/engine/substrait/function_test.cc | 12 ++++++++++++
3 files changed, 40 insertions(+)
diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc
b/cpp/src/arrow/engine/substrait/extension_set.cc
index 54ca6fcaf0..6db36e6a7a 100644
--- a/cpp/src/arrow/engine/substrait/extension_set.cc
+++ b/cpp/src/arrow/engine/substrait/extension_set.cc
@@ -189,6 +189,8 @@ void SubstraitCall::SetOption(std::string_view option_name,
}
}
+bool SubstraitCall::HasOptions() const { return !options_.empty(); }
+
// A builder used when creating a Substrait plan from an Arrow execution plan.
In
// that situation we do not have a set of anchor values already defined so we
keep
// a map of what Ids we have seen.
@@ -337,6 +339,22 @@ const int* GetIndex(const KeyToIndex& key_to_index, const
Key& key) {
namespace {
+ExtensionIdRegistry::SubstraitCallToArrow kSimpleSubstraitToArrow =
+ [](const SubstraitCall& call) -> Result<::arrow::compute::Expression> {
+ std::vector<::arrow::compute::Expression> args;
+ for (int i = 0; i < call.size(); i++) {
+ if (!call.HasValueArg(i)) {
+ return Status::Invalid("Simple function mappings can only use value
arguments");
+ }
+ if (call.HasOptions()) {
+ return Status::Invalid("Simple function mappings must not specify
options");
+ }
+ ARROW_ASSIGN_OR_RAISE(::arrow::compute::Expression arg,
call.GetValueArg(i));
+ args.push_back(std::move(arg));
+ }
+ return ::arrow::compute::call(std::string(call.id().name), std::move(args));
+};
+
struct ExtensionIdRegistryImpl : ExtensionIdRegistry {
ExtensionIdRegistryImpl() : parent_(nullptr) {}
explicit ExtensionIdRegistryImpl(const ExtensionIdRegistry* parent) :
parent_(parent) {}
@@ -541,6 +559,9 @@ struct ExtensionIdRegistryImpl : ExtensionIdRegistry {
Result<SubstraitCallToArrow> GetSubstraitCallToArrow(
Id substrait_function_id) const override {
+ if (substrait_function_id.uri == kArrowSimpleExtensionFunctionsUri) {
+ return kSimpleSubstraitToArrow;
+ }
auto maybe_converter = substrait_to_arrow_.find(substrait_function_id);
if (maybe_converter == substrait_to_arrow_.end()) {
if (parent_) {
diff --git a/cpp/src/arrow/engine/substrait/extension_set.h
b/cpp/src/arrow/engine/substrait/extension_set.h
index 8ec8956757..1f2ef09a6d 100644
--- a/cpp/src/arrow/engine/substrait/extension_set.h
+++ b/cpp/src/arrow/engine/substrait/extension_set.h
@@ -67,6 +67,12 @@ constexpr const char* kSubstraitAggregateGenericFunctionsUri
=
"https://github.com/substrait-io/substrait/blob/main/extensions/"
"functions_aggregate_generic.yaml";
+/// If a function call contains this URI then the function is looked up
+/// in the registry directly, all arguments are mapped as value arguments,
+/// and any options are ignored.
+constexpr const char* kArrowSimpleExtensionFunctionsUri =
+ "urn:arrow:substrait_simple_extension_function";
+
struct ARROW_ENGINE_EXPORT Id {
std::string_view uri, name;
bool empty() const { return uri.empty() && name.empty(); }
@@ -136,6 +142,7 @@ class ARROW_ENGINE_EXPORT SubstraitCall {
std::string_view option_name) const;
void SetOption(std::string_view option_name,
const std::vector<std::string_view>& option_preferences);
+ bool HasOptions() const;
int size() const { return size_; }
private:
diff --git a/cpp/src/arrow/engine/substrait/function_test.cc
b/cpp/src/arrow/engine/substrait/function_test.cc
index 3cdc23b792..a0b02bfd76 100644
--- a/cpp/src/arrow/engine/substrait/function_test.cc
+++ b/cpp/src/arrow/engine/substrait/function_test.cc
@@ -189,6 +189,12 @@ TEST(FunctionMapping, ValidCases) {
{int8(), int8()},
"-119",
int8()},
+ {{kArrowSimpleExtensionFunctionsUri, "add_checked"},
+ {"10", "15"},
+ kNoOptions,
+ {int8(), int8()},
+ "25",
+ int8()},
{{kSubstraitArithmeticFunctionsUri, "subtract"},
{"-119", "10"},
{{"overflow", {"SILENT", "ERROR"}}},
@@ -498,6 +504,12 @@ TEST(FunctionMapping, ErrorCases) {
{int8(), int8()},
"",
int8()},
+ {{kArrowSimpleExtensionFunctionsUri, "add_checked"},
+ {"127", "10"},
+ kNoOptions,
+ {int8(), int8()},
+ "",
+ int8()},
{{kSubstraitArithmeticFunctionsUri, "subtract"},
{"-119", "10"},
{{"overflow", {"ERROR", "SILENT"}}},