This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new c353c81c9d GH-38589: [C++][Gandiva] Support registering external C
functions (#38632)
c353c81c9d is described below
commit c353c81c9d92bfb72b7f033f5870d09e249bfdb7
Author: Yue <[email protected]>
AuthorDate: Fri Nov 17 15:59:23 2023 +0800
GH-38589: [C++][Gandiva] Support registering external C functions (#38632)
### Rationale for this change
This PR tries to enhance Gandiva by supporting registering external C
functions to its function registry, so that developers can author third party
functions with complex dependency and expose them as C functions to be used in
Gandiva expression. See more details in GH-38589.
### What changes are included in this PR?
This PR primarily adds a new API to the `FunctionRegistry` so that
developers can use it to register external C functions:
```C++
arrow::Status Register(
NativeFunction func, void* c_function_ptr,
std::optional<FunctionHolderMaker> function_holder_maker =
std::nullopt);
```
### Are these changes tested?
* The changes are tested via unit tests in this PR, and the unit tests
include several C functions written using C++ and we confirm this kind of
functions can be used by Gandiva after registration using the above mentioned
new API.
* Additionally, locally I wrote some Rust based functions, and integrate
the Rust based functions into a C++ program by using the new registration API
and verified this approach did work, but this piece of work is not included in
the PR.
### Are there any user-facing changes?
There are several new APIs added to `FunctionRegistry` class:
```C++
/// \brief register a C function into the function registry
/// @ param func the registered function's metadata
/// @ param c_function_ptr the function pointer to the
/// registered function's implementation
/// @ param function_holder_maker this will be used as the function
holder if the
/// function requires a function holder
arrow::Status Register(
NativeFunction func, void* c_function_ptr,
std::optional<FunctionHolderMaker> function_holder_maker =
std::nullopt);
/// \brief get a list of C functions saved in the registry
const std::vector<std::pair<NativeFunction, void*>>& GetCFunctions()
const;
const FunctionHolderMakerRegistry& GetFunctionHolderMakerRegistry() const;
```
* Closes: #38589
### Notes
* This PR is related with https://github.com/apache/arrow/pull/38116, which
adds the initial support for registering LLVM IR based external functions into
Gandiva.
Authored-by: Yue Ni <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
cpp/src/gandiva/CMakeLists.txt | 2 +
cpp/src/gandiva/cast_time.cc | 3 +-
cpp/src/gandiva/context_helper.cc | 3 +-
cpp/src/gandiva/decimal_xlarge.cc | 3 +-
cpp/src/gandiva/engine.cc | 8 +-
cpp/src/gandiva/engine.h | 2 +-
cpp/src/gandiva/exported_funcs.h | 26 +++--
cpp/src/gandiva/exported_funcs_registry.cc | 5 +-
cpp/src/gandiva/exported_funcs_registry.h | 2 +-
cpp/src/gandiva/expr_decomposer.cc | 8 +-
cpp/src/gandiva/external_c_functions.cc | 79 +++++++++++++
cpp/src/gandiva/function_holder_maker_registry.cc | 72 ++++++++++++
...registry.h => function_holder_maker_registry.h} | 41 +++----
cpp/src/gandiva/function_holder_registry.h | 80 -------------
cpp/src/gandiva/function_registry.cc | 25 +++-
cpp/src/gandiva/function_registry.h | 23 ++++
cpp/src/gandiva/gdv_function_stubs.cc | 3 +-
cpp/src/gandiva/gdv_function_stubs.h | 2 +
cpp/src/gandiva/gdv_hash_function_stubs.cc | 3 +-
cpp/src/gandiva/gdv_string_function_stubs.cc | 5 +-
cpp/src/gandiva/llvm_generator.h | 2 +-
cpp/src/gandiva/llvm_generator_test.cc | 37 ++++--
cpp/src/gandiva/tests/projector_test.cc | 77 ++++++++++++-
cpp/src/gandiva/tests/test_util.cc | 128 ++++++++++++++++++++-
cpp/src/gandiva/tests/test_util.h | 21 +++-
25 files changed, 517 insertions(+), 143 deletions(-)
diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index 3448d51676..3f038f54a7 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -62,7 +62,9 @@ set(SRC_FILES
expression_registry.cc
exported_funcs_registry.cc
exported_funcs.cc
+ external_c_functions.cc
filter.cc
+ function_holder_maker_registry.cc
function_ir_builder.cc
function_registry.cc
function_registry_arithmetic.cc
diff --git a/cpp/src/gandiva/cast_time.cc b/cpp/src/gandiva/cast_time.cc
index 843ce01f89..eeb2ea3fdd 100644
--- a/cpp/src/gandiva/cast_time.cc
+++ b/cpp/src/gandiva/cast_time.cc
@@ -29,7 +29,7 @@
namespace gandiva {
-void ExportedTimeFunctions::AddMappings(Engine* engine) const {
+arrow::Status ExportedTimeFunctions::AddMappings(Engine* engine) const {
std::vector<llvm::Type*> args;
auto types = engine->types();
@@ -42,6 +42,7 @@ void ExportedTimeFunctions::AddMappings(Engine* engine) const
{
engine->AddGlobalMappingForFunc("gdv_fn_time_with_zone",
types->i32_type() /*return_type*/, args,
reinterpret_cast<void*>(gdv_fn_time_with_zone));
+ return arrow::Status::OK();
}
} // namespace gandiva
diff --git a/cpp/src/gandiva/context_helper.cc
b/cpp/src/gandiva/context_helper.cc
index 224bfd8f56..03bbe1b7a6 100644
--- a/cpp/src/gandiva/context_helper.cc
+++ b/cpp/src/gandiva/context_helper.cc
@@ -25,7 +25,7 @@
namespace gandiva {
-void ExportedContextFunctions::AddMappings(Engine* engine) const {
+arrow::Status ExportedContextFunctions::AddMappings(Engine* engine) const {
std::vector<llvm::Type*> args;
auto types = engine->types();
@@ -50,6 +50,7 @@ void ExportedContextFunctions::AddMappings(Engine* engine)
const {
engine->AddGlobalMappingForFunc("gdv_fn_context_arena_reset",
types->void_type(), args,
reinterpret_cast<void*>(gdv_fn_context_arena_reset));
+ return arrow::Status::OK();
}
} // namespace gandiva
diff --git a/cpp/src/gandiva/decimal_xlarge.cc
b/cpp/src/gandiva/decimal_xlarge.cc
index caebd8b09e..21212422f3 100644
--- a/cpp/src/gandiva/decimal_xlarge.cc
+++ b/cpp/src/gandiva/decimal_xlarge.cc
@@ -38,7 +38,7 @@
namespace gandiva {
-void ExportedDecimalFunctions::AddMappings(Engine* engine) const {
+arrow::Status ExportedDecimalFunctions::AddMappings(Engine* engine) const {
std::vector<llvm::Type*> args;
auto types = engine->types();
@@ -93,6 +93,7 @@ void ExportedDecimalFunctions::AddMappings(Engine* engine)
const {
engine->AddGlobalMappingForFunc("gdv_xlarge_compare", types->i32_type()
/*return_type*/,
args,
reinterpret_cast<void*>(gdv_xlarge_compare));
+ return arrow::Status::OK();
}
} // namespace gandiva
diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc
index 5ae1d76876..1cea1fd2cb 100644
--- a/cpp/src/gandiva/engine.cc
+++ b/cpp/src/gandiva/engine.cc
@@ -147,7 +147,7 @@ Engine::Engine(const std::shared_ptr<Configuration>& conf,
Status Engine::Init() {
std::call_once(register_exported_funcs_flag, gandiva::RegisterExportedFuncs);
// Add mappings for global functions that can be accessed from LLVM/IR
module.
- AddGlobalMappings();
+ ARROW_RETURN_NOT_OK(AddGlobalMappings());
return Status::OK();
}
@@ -447,7 +447,11 @@ void Engine::AddGlobalMappingForFunc(const std::string&
name, llvm::Type* ret_ty
execution_engine_->addGlobalMapping(fn, function_ptr);
}
-void Engine::AddGlobalMappings() { ExportedFuncsRegistry::AddMappings(this); }
+arrow::Status Engine::AddGlobalMappings() {
+ ARROW_RETURN_NOT_OK(ExportedFuncsRegistry::AddMappings(this));
+ ExternalCFunctions c_funcs(function_registry_);
+ return c_funcs.AddMappings(this);
+}
std::string Engine::DumpIR() {
std::string ir;
diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h
index 566977dc4a..df2d8b36d9 100644
--- a/cpp/src/gandiva/engine.h
+++ b/cpp/src/gandiva/engine.h
@@ -97,7 +97,7 @@ class GANDIVA_EXPORT Engine {
Status LoadExternalPreCompiledIR();
// Create and add mappings for cpp functions that can be accessed from LLVM.
- void AddGlobalMappings();
+ arrow::Status AddGlobalMappings();
// Remove unused functions to reduce compile time.
Status RemoveUnusedFunctions();
diff --git a/cpp/src/gandiva/exported_funcs.h b/cpp/src/gandiva/exported_funcs.h
index 82aa020a21..414ec5c5bf 100644
--- a/cpp/src/gandiva/exported_funcs.h
+++ b/cpp/src/gandiva/exported_funcs.h
@@ -18,6 +18,7 @@
#pragma once
#include <vector>
+#include "gandiva/function_registry.h"
#include "gandiva/visibility.h"
namespace gandiva {
@@ -29,37 +30,48 @@ class ExportedFuncsBase {
public:
virtual ~ExportedFuncsBase() = default;
- virtual void AddMappings(Engine* engine) const = 0;
+ virtual arrow::Status AddMappings(Engine* engine) const = 0;
};
// Class for exporting Stub functions
class ExportedStubFunctions : public ExportedFuncsBase {
- void AddMappings(Engine* engine) const override;
+ arrow::Status AddMappings(Engine* engine) const override;
};
// Class for exporting Context functions
class ExportedContextFunctions : public ExportedFuncsBase {
- void AddMappings(Engine* engine) const override;
+ arrow::Status AddMappings(Engine* engine) const override;
};
// Class for exporting Time functions
class ExportedTimeFunctions : public ExportedFuncsBase {
- void AddMappings(Engine* engine) const override;
+ arrow::Status AddMappings(Engine* engine) const override;
};
// Class for exporting Decimal functions
class ExportedDecimalFunctions : public ExportedFuncsBase {
- void AddMappings(Engine* engine) const override;
+ arrow::Status AddMappings(Engine* engine) const override;
};
// Class for exporting String functions
class ExportedStringFunctions : public ExportedFuncsBase {
- void AddMappings(Engine* engine) const override;
+ arrow::Status AddMappings(Engine* engine) const override;
};
// Class for exporting Hash functions
class ExportedHashFunctions : public ExportedFuncsBase {
- void AddMappings(Engine* engine) const override;
+ arrow::Status AddMappings(Engine* engine) const override;
+};
+
+class ExternalCFunctions : public ExportedFuncsBase {
+ public:
+ explicit ExternalCFunctions(std::shared_ptr<FunctionRegistry>
function_registry)
+ : function_registry_(std::move(function_registry)) {}
+
+ arrow::Status AddMappings(Engine* engine) const override;
+
+ private:
+ std::shared_ptr<FunctionRegistry> function_registry_;
};
GANDIVA_EXPORT void RegisterExportedFuncs();
diff --git a/cpp/src/gandiva/exported_funcs_registry.cc
b/cpp/src/gandiva/exported_funcs_registry.cc
index 2c928a7a2a..137d29eefb 100644
--- a/cpp/src/gandiva/exported_funcs_registry.cc
+++ b/cpp/src/gandiva/exported_funcs_registry.cc
@@ -21,10 +21,11 @@
namespace gandiva {
-void ExportedFuncsRegistry::AddMappings(Engine* engine) {
+arrow::Status ExportedFuncsRegistry::AddMappings(Engine* engine) {
for (const auto& entry : *registered()) {
- entry->AddMappings(engine);
+ ARROW_RETURN_NOT_OK(entry->AddMappings(engine));
}
+ return arrow::Status::OK();
}
const ExportedFuncsRegistry::list_type& ExportedFuncsRegistry::Registered() {
diff --git a/cpp/src/gandiva/exported_funcs_registry.h
b/cpp/src/gandiva/exported_funcs_registry.h
index 08c45aec6a..a34308bb96 100644
--- a/cpp/src/gandiva/exported_funcs_registry.h
+++ b/cpp/src/gandiva/exported_funcs_registry.h
@@ -34,7 +34,7 @@ class GANDIVA_EXPORT ExportedFuncsRegistry {
using list_type = std::vector<std::shared_ptr<ExportedFuncsBase>>;
// Add functions from all the registered classes to the engine.
- static void AddMappings(Engine* engine);
+ static arrow::Status AddMappings(Engine* engine);
static bool Register(std::shared_ptr<ExportedFuncsBase> entry) {
registered()->emplace_back(std::move(entry));
diff --git a/cpp/src/gandiva/expr_decomposer.cc
b/cpp/src/gandiva/expr_decomposer.cc
index 957d9d046b..42566ca035 100644
--- a/cpp/src/gandiva/expr_decomposer.cc
+++ b/cpp/src/gandiva/expr_decomposer.cc
@@ -25,11 +25,12 @@
#include "gandiva/annotator.h"
#include "gandiva/dex.h"
-#include "gandiva/function_holder_registry.h"
+#include "gandiva/function_holder_maker_registry.h"
#include "gandiva/function_registry.h"
#include "gandiva/function_signature.h"
#include "gandiva/in_holder.h"
#include "gandiva/node.h"
+#include "gandiva/regex_functions_holder.h"
namespace gandiva {
@@ -81,9 +82,10 @@ Status ExprDecomposer::Visit(const FunctionNode& in_node) {
std::shared_ptr<FunctionHolder> holder;
int holder_idx = -1;
if (native_function->NeedsFunctionHolder()) {
- auto status = FunctionHolderRegistry::Make(desc->name(), node, &holder);
+ auto function_holder_maker_registry =
registry_.GetFunctionHolderMakerRegistry();
+ ARROW_ASSIGN_OR_RAISE(holder,
+ function_holder_maker_registry.Make(desc->name(),
node));
holder_idx = annotator_.AddHolderPointer(holder.get());
- ARROW_RETURN_NOT_OK(status);
}
if (native_function->result_nullable_type() == kResultNullIfNull) {
diff --git a/cpp/src/gandiva/external_c_functions.cc
b/cpp/src/gandiva/external_c_functions.cc
new file mode 100644
index 0000000000..fcba00aed3
--- /dev/null
+++ b/cpp/src/gandiva/external_c_functions.cc
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License
+
+#include <llvm/IR/Type.h>
+
+#include "gandiva/engine.h"
+#include "gandiva/exported_funcs.h"
+
+namespace {
+// calculate the number of arguments for a function signature
+size_t GetNumArgs(const gandiva::FunctionSignature& sig,
+ const gandiva::NativeFunction& func) {
+ auto num_args = 0;
+ num_args += func.NeedsContext() ? 1 : 0;
+ num_args += func.NeedsFunctionHolder() ? 1 : 0;
+ for (auto const& arg : sig.param_types()) {
+ num_args += arg->id() == arrow::Type::STRING ? 2 : 1;
+ }
+ num_args += sig.ret_type()->id() == arrow::Type::STRING ? 1 : 0;
+ return num_args;
+}
+
+// map from a NativeFunction's signature to the corresponding LLVM signature
+arrow::Result<std::pair<std::vector<llvm::Type*>, llvm::Type*>>
MapToLLVMSignature(
+ const gandiva::FunctionSignature& sig, const gandiva::NativeFunction& func,
+ gandiva::LLVMTypes* types) {
+ std::vector<llvm::Type*> arg_llvm_types;
+ arg_llvm_types.reserve(GetNumArgs(sig, func));
+
+ if (func.NeedsContext()) {
+ arg_llvm_types.push_back(types->i64_type());
+ }
+ if (func.NeedsFunctionHolder()) {
+ arg_llvm_types.push_back(types->i64_type());
+ }
+ for (auto const& arg : sig.param_types()) {
+ arg_llvm_types.push_back(types->IRType(arg->id()));
+ if (arg->id() == arrow::Type::STRING) {
+ // string type needs an additional length argument
+ arg_llvm_types.push_back(types->i32_type());
+ }
+ }
+ if (sig.ret_type()->id() == arrow::Type::STRING) {
+ // for string output, the last arg is the output length
+ arg_llvm_types.push_back(types->i32_ptr_type());
+ }
+ auto ret_llvm_type = types->IRType(sig.ret_type()->id());
+ return std::make_pair(std::move(arg_llvm_types), ret_llvm_type);
+}
+} // namespace
+
+namespace gandiva {
+Status ExternalCFunctions::AddMappings(Engine* engine) const {
+ auto const& c_funcs = function_registry_->GetCFunctions();
+ auto const types = engine->types();
+ for (auto& [func, func_ptr] : c_funcs) {
+ for (auto const& sig : func.signatures()) {
+ ARROW_ASSIGN_OR_RAISE(auto llvm_signature, MapToLLVMSignature(sig, func,
types));
+ auto& [args, ret_llvm_type] = llvm_signature;
+ engine->AddGlobalMappingForFunc(func.pc_name(), ret_llvm_type, args,
func_ptr);
+ }
+ }
+ return Status::OK();
+}
+} // namespace gandiva
diff --git a/cpp/src/gandiva/function_holder_maker_registry.cc
b/cpp/src/gandiva/function_holder_maker_registry.cc
new file mode 100644
index 0000000000..bb93402475
--- /dev/null
+++ b/cpp/src/gandiva/function_holder_maker_registry.cc
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/function_holder_maker_registry.h"
+
+#include <functional>
+
+#include "arrow/util/string.h"
+#include "gandiva/function_holder.h"
+#include "gandiva/interval_holder.h"
+#include "gandiva/random_generator_holder.h"
+#include "gandiva/regex_functions_holder.h"
+#include "gandiva/to_date_holder.h"
+
+namespace gandiva {
+
+using arrow::internal::AsciiToLower;
+
+FunctionHolderMakerRegistry::FunctionHolderMakerRegistry()
+ : function_holder_makers_(DefaultHolderMakers()) {}
+
+arrow::Status FunctionHolderMakerRegistry::Register(const std::string& name,
+ FunctionHolderMaker
holder_maker) {
+ function_holder_makers_.emplace(AsciiToLower(name), std::move(holder_maker));
+ return arrow::Status::OK();
+}
+
+template <typename HolderType>
+static arrow::Result<FunctionHolderPtr> HolderMaker(const FunctionNode& node) {
+ std::shared_ptr<HolderType> derived_instance;
+ ARROW_RETURN_NOT_OK(HolderType::Make(node, &derived_instance));
+ return derived_instance;
+}
+
+arrow::Result<FunctionHolderPtr> FunctionHolderMakerRegistry::Make(
+ const std::string& name, const FunctionNode& node) {
+ auto lowered_name = AsciiToLower(name);
+ auto found = function_holder_makers_.find(lowered_name);
+ if (found == function_holder_makers_.end()) {
+ return Status::Invalid("function holder not registered for function " +
name);
+ }
+
+ return found->second(node);
+}
+
+FunctionHolderMakerRegistry::MakerMap
FunctionHolderMakerRegistry::DefaultHolderMakers() {
+ static const MakerMap maker_map = {
+ {"like", HolderMaker<LikeHolder>},
+ {"to_date", HolderMaker<ToDateHolder>},
+ {"random", HolderMaker<RandomGeneratorHolder>},
+ {"rand", HolderMaker<RandomGeneratorHolder>},
+ {"regexp_replace", HolderMaker<ReplaceHolder>},
+ {"regexp_extract", HolderMaker<ExtractHolder>},
+ {"castintervalday", HolderMaker<IntervalDaysHolder>},
+ {"castintervalyear", HolderMaker<IntervalYearsHolder>}};
+ return maker_map;
+}
+} // namespace gandiva
diff --git a/cpp/src/gandiva/exported_funcs_registry.h
b/cpp/src/gandiva/function_holder_maker_registry.h
similarity index 50%
copy from cpp/src/gandiva/exported_funcs_registry.h
copy to cpp/src/gandiva/function_holder_maker_registry.h
index 08c45aec6a..f215a4852a 100644
--- a/cpp/src/gandiva/exported_funcs_registry.h
+++ b/cpp/src/gandiva/function_holder_maker_registry.h
@@ -18,38 +18,35 @@
#pragma once
#include <memory>
-#include <vector>
+#include <string>
+#include <unordered_map>
-#include <gandiva/engine.h>
-#include <gandiva/visibility.h>
+#include "arrow/status.h"
+#include "gandiva/function_holder.h"
+#include "gandiva/node.h"
namespace gandiva {
-class ExportedFuncsBase;
-
-/// Registry for classes that export functions which can be accessed by
-/// LLVM/IR code.
-class GANDIVA_EXPORT ExportedFuncsRegistry {
+/// registry of function holder makers
+class FunctionHolderMakerRegistry {
public:
- using list_type = std::vector<std::shared_ptr<ExportedFuncsBase>>;
+ using FunctionHolderMaker =
+ std::function<arrow::Result<FunctionHolderPtr>(const FunctionNode&)>;
- // Add functions from all the registered classes to the engine.
- static void AddMappings(Engine* engine);
+ FunctionHolderMakerRegistry();
- static bool Register(std::shared_ptr<ExportedFuncsBase> entry) {
- registered()->emplace_back(std::move(entry));
- return true;
- }
+ arrow::Status Register(const std::string& name, FunctionHolderMaker
holder_maker);
- // list all the registered ExportedFuncsBase
- static const list_type& Registered();
+ /// \brief lookup a function holder maker using the given function name,
+ /// and make a FunctionHolderPtr using the found holder maker and the given
FunctionNode
+ arrow::Result<FunctionHolderPtr> Make(const std::string& name,
+ const FunctionNode& node);
private:
- static list_type* registered();
-};
+ using MakerMap = std::unordered_map<std::string, FunctionHolderMaker>;
-#define REGISTER_EXPORTED_FUNCS(classname) \
- [[maybe_unused]] static bool _registered_##classname = \
- ExportedFuncsRegistry::Register(std::make_shared<classname>())
+ MakerMap function_holder_makers_;
+ static MakerMap DefaultHolderMakers();
+};
} // namespace gandiva
diff --git a/cpp/src/gandiva/function_holder_registry.h
b/cpp/src/gandiva/function_holder_registry.h
deleted file mode 100644
index 7220f0d9d0..0000000000
--- a/cpp/src/gandiva/function_holder_registry.h
+++ /dev/null
@@ -1,80 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <functional>
-#include <memory>
-#include <string>
-#include <unordered_map>
-
-#include "arrow/status.h"
-#include "gandiva/function_holder.h"
-#include "gandiva/interval_holder.h"
-#include "gandiva/node.h"
-#include "gandiva/random_generator_holder.h"
-#include "gandiva/regex_functions_holder.h"
-#include "gandiva/to_date_holder.h"
-
-namespace gandiva {
-
-#define LAMBDA_MAKER(derived) \
- [](const FunctionNode& node, FunctionHolderPtr* holder) { \
- std::shared_ptr<derived> derived_instance; \
- auto status = derived::Make(node, &derived_instance); \
- if (status.ok()) { \
- *holder = derived_instance; \
- } \
- return status; \
- }
-
-/// Static registry of function holders.
-class FunctionHolderRegistry {
- public:
- using maker_type = std::function<Status(const FunctionNode&,
FunctionHolderPtr*)>;
- using map_type = std::unordered_map<std::string, maker_type>;
-
- static Status Make(const std::string& name, const FunctionNode& node,
- FunctionHolderPtr* holder) {
- std::string data = name;
- std::transform(data.begin(), data.end(), data.begin(),
- [](unsigned char c) { return std::tolower(c); });
-
- auto found = makers().find(data);
- if (found == makers().end()) {
- return Status::Invalid("function holder not registered for function " +
name);
- }
-
- return found->second(node, holder);
- }
-
- private:
- static map_type& makers() {
- static map_type maker_map = {{"like", LAMBDA_MAKER(LikeHolder)},
- {"ilike", LAMBDA_MAKER(LikeHolder)},
- {"to_date", LAMBDA_MAKER(ToDateHolder)},
- {"random",
LAMBDA_MAKER(RandomGeneratorHolder)},
- {"rand", LAMBDA_MAKER(RandomGeneratorHolder)},
- {"regexp_replace",
LAMBDA_MAKER(ReplaceHolder)},
- {"regexp_extract",
LAMBDA_MAKER(ExtractHolder)},
- {"castintervalday",
LAMBDA_MAKER(IntervalDaysHolder)},
- {"castintervalyear",
LAMBDA_MAKER(IntervalYearsHolder)}};
- return maker_map;
- }
-};
-
-} // namespace gandiva
diff --git a/cpp/src/gandiva/function_registry.cc
b/cpp/src/gandiva/function_registry.cc
index 5d676dfa8d..2e392630ee 100644
--- a/cpp/src/gandiva/function_registry.cc
+++ b/cpp/src/gandiva/function_registry.cc
@@ -64,7 +64,7 @@ FunctionRegistry::iterator FunctionRegistry::back() const {
const NativeFunction* FunctionRegistry::LookupSignature(
const FunctionSignature& signature) const {
- auto got = pc_registry_map_.find(&signature);
+ auto const got = pc_registry_map_.find(&signature);
return got == pc_registry_map_.end() ? nullptr : got->second;
}
@@ -109,11 +109,34 @@ arrow::Status FunctionRegistry::Register(const
std::vector<NativeFunction>& func
return Status::OK();
}
+arrow::Status FunctionRegistry::Register(
+ NativeFunction func, void* c_function_ptr,
+ std::optional<FunctionHolderMaker> function_holder_maker) {
+ if (function_holder_maker.has_value()) {
+ // all signatures should have the same base name, use the first
signature's base name
+ auto const& func_base_name = func.signatures().begin()->base_name();
+ ARROW_RETURN_NOT_OK(holder_maker_registry_.Register(
+ func_base_name, std::move(function_holder_maker).value()));
+ }
+ c_functions_.emplace_back(func, c_function_ptr);
+ return FunctionRegistry::Add(std::move(func));
+}
+
const std::vector<std::shared_ptr<arrow::Buffer>>&
FunctionRegistry::GetBitcodeBuffers()
const {
return bitcode_memory_buffers_;
}
+const std::vector<std::pair<NativeFunction, void*>>&
FunctionRegistry::GetCFunctions()
+ const {
+ return c_functions_;
+}
+
+const FunctionHolderMakerRegistry&
FunctionRegistry::GetFunctionHolderMakerRegistry()
+ const {
+ return holder_maker_registry_;
+}
+
arrow::Result<std::shared_ptr<FunctionRegistry>> MakeDefaultFunctionRegistry()
{
auto registry = std::make_shared<FunctionRegistry>();
for (auto const& funcs :
diff --git a/cpp/src/gandiva/function_registry.h
b/cpp/src/gandiva/function_registry.h
index 01984961dc..24b64fac5f 100644
--- a/cpp/src/gandiva/function_registry.h
+++ b/cpp/src/gandiva/function_registry.h
@@ -18,11 +18,14 @@
#pragma once
#include <memory>
+#include <optional>
#include <string>
#include <vector>
#include "arrow/buffer.h"
#include "arrow/status.h"
+#include "gandiva/function_holder.h"
+#include "gandiva/function_holder_maker_registry.h"
#include "gandiva/function_registry_common.h"
#include "gandiva/gandiva_aliases.h"
#include "gandiva/native_function.h"
@@ -34,6 +37,9 @@ namespace gandiva {
class GANDIVA_EXPORT FunctionRegistry {
public:
using iterator = const NativeFunction*;
+ using FunctionHolderMaker =
+ std::function<arrow::Result<std::shared_ptr<FunctionHolder>>(
+ const FunctionNode& function_node)>;
FunctionRegistry();
FunctionRegistry(const FunctionRegistry&) = delete;
@@ -52,9 +58,24 @@ class GANDIVA_EXPORT FunctionRegistry {
arrow::Status Register(const std::vector<NativeFunction>& funcs,
std::shared_ptr<arrow::Buffer> bitcode_buffer);
+ /// \brief register a C function into the function registry
+ /// @param func the registered function's metadata
+ /// @param c_function_ptr the function pointer to the
+ /// registered function's implementation
+ /// @param function_holder_maker this will be used as the function holder if
the
+ /// function requires a function holder
+ arrow::Status Register(
+ NativeFunction func, void* c_function_ptr,
+ std::optional<FunctionHolderMaker> function_holder_maker = std::nullopt);
+
/// \brief get a list of bitcode memory buffers saved in the registry
const std::vector<std::shared_ptr<arrow::Buffer>>& GetBitcodeBuffers() const;
+ /// \brief get a list of C functions saved in the registry
+ const std::vector<std::pair<NativeFunction, void*>>& GetCFunctions() const;
+
+ const FunctionHolderMakerRegistry& GetFunctionHolderMakerRegistry() const;
+
iterator begin() const;
iterator end() const;
iterator back() const;
@@ -65,6 +86,8 @@ class GANDIVA_EXPORT FunctionRegistry {
std::vector<NativeFunction> pc_registry_;
SignatureMap pc_registry_map_;
std::vector<std::shared_ptr<arrow::Buffer>> bitcode_memory_buffers_;
+ std::vector<std::pair<NativeFunction, void*>> c_functions_;
+ FunctionHolderMakerRegistry holder_maker_registry_;
Status Add(NativeFunction func);
};
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc
b/cpp/src/gandiva/gdv_function_stubs.cc
index 67d39aeba5..0ad3c1738e 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -822,7 +822,7 @@ const char* gdv_mask_show_last_n_utf8_int32(int64_t
context, const char* data,
namespace gandiva {
-void ExportedStubFunctions::AddMappings(Engine* engine) const {
+arrow::Status ExportedStubFunctions::AddMappings(Engine* engine) const {
std::vector<llvm::Type*> args;
auto types = engine->types();
@@ -1268,5 +1268,6 @@ void ExportedStubFunctions::AddMappings(Engine* engine)
const {
engine->AddGlobalMappingForFunc("mask_utf8", types->i8_ptr_type()
/*return_type*/, args,
reinterpret_cast<void*>(mask_utf8));
+ return arrow::Status::OK();
}
} // namespace gandiva
diff --git a/cpp/src/gandiva/gdv_function_stubs.h
b/cpp/src/gandiva/gdv_function_stubs.h
index 5356a91f3c..3f52537ee0 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -74,8 +74,10 @@ int64_t gdv_fn_to_date_utf8_utf8_int32(int64_t context,
int64_t ptr, const char*
bool in2_validity, int32_t
suppress_errors,
bool in3_validity, bool* out_valid);
+GANDIVA_EXPORT
void gdv_fn_context_set_error_msg(int64_t context_ptr, const char* err_msg);
+GANDIVA_EXPORT
uint8_t* gdv_fn_context_arena_malloc(int64_t context_ptr, int32_t data_len);
void gdv_fn_context_arena_reset(int64_t context_ptr);
diff --git a/cpp/src/gandiva/gdv_hash_function_stubs.cc
b/cpp/src/gandiva/gdv_hash_function_stubs.cc
index 018b0fbb70..aac70a06be 100644
--- a/cpp/src/gandiva/gdv_hash_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_hash_function_stubs.cc
@@ -216,7 +216,7 @@ const char* gdv_fn_sha1_decimal128(int64_t context, int64_t
x_high, uint64_t x_l
namespace gandiva {
-void ExportedHashFunctions::AddMappings(Engine* engine) const {
+arrow::Status ExportedHashFunctions::AddMappings(Engine* engine) const {
std::vector<llvm::Type*> args;
auto types = engine->types();
@@ -1041,5 +1041,6 @@ void ExportedHashFunctions::AddMappings(Engine* engine)
const {
engine->AddGlobalMappingForFunc("gdv_fn_md5_decimal128",
types->i8_ptr_type() /*return_type*/, args,
reinterpret_cast<void*>(gdv_fn_md5_decimal128));
+ return arrow::Status::OK();
}
} // namespace gandiva
diff --git a/cpp/src/gandiva/gdv_string_function_stubs.cc
b/cpp/src/gandiva/gdv_string_function_stubs.cc
index 3bfb297af1..9f5b5ce64b 100644
--- a/cpp/src/gandiva/gdv_string_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_string_function_stubs.cc
@@ -15,8 +15,6 @@
// specific language governing permissions and limitations
// under the License.
-//#pragma once
-
#include "gandiva/gdv_function_stubs.h"
#include <utf8proc.h>
@@ -761,7 +759,7 @@ const char* translate_utf8_utf8_utf8(int64_t context, const
char* in, int32_t in
namespace gandiva {
-void ExportedStringFunctions::AddMappings(Engine* engine) const {
+arrow::Status ExportedStringFunctions::AddMappings(Engine* engine) const {
std::vector<llvm::Type*> args;
auto types = engine->types();
@@ -988,5 +986,6 @@ void ExportedStringFunctions::AddMappings(Engine* engine)
const {
engine->AddGlobalMappingForFunc("translate_utf8_utf8_utf8",
types->i8_ptr_type() /*return_type*/, args,
reinterpret_cast<void*>(translate_utf8_utf8_utf8));
+ return arrow::Status::OK();
}
} // namespace gandiva
diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h
index 1921e25653..fae6ed48de 100644
--- a/cpp/src/gandiva/llvm_generator.h
+++ b/cpp/src/gandiva/llvm_generator.h
@@ -88,7 +88,7 @@ class GANDIVA_EXPORT LLVMGenerator {
FRIEND_TEST(TestLLVMGenerator, VerifyPCFunctions);
FRIEND_TEST(TestLLVMGenerator, TestAdd);
FRIEND_TEST(TestLLVMGenerator, TestNullInternal);
- FRIEND_TEST(TestLLVMGenerator, VerifyExtendedPCFunctions);
+ friend class TestLLVMGenerator;
llvm::LLVMContext* context() { return engine_->context(); }
llvm::IRBuilder<>* ir_builder() { return engine_->ir_builder(); }
diff --git a/cpp/src/gandiva/llvm_generator_test.cc
b/cpp/src/gandiva/llvm_generator_test.cc
index 671ce91e87..853d8ae6c3 100644
--- a/cpp/src/gandiva/llvm_generator_test.cc
+++ b/cpp/src/gandiva/llvm_generator_test.cc
@@ -36,6 +36,24 @@ typedef int64_t (*add_vector_func_t)(int64_t* elements, int
nelements);
class TestLLVMGenerator : public ::testing::Test {
protected:
std::shared_ptr<FunctionRegistry> registry_ = default_function_registry();
+
+ public:
+ // create a Configuration with the given registry and verify that the given
function
+ // exists in the module.
+ static void VerifyFunctionMapping(
+ const std::string& function_name,
+ const std::function<std::shared_ptr<Configuration>(
+ std::shared_ptr<FunctionRegistry>)>& config_factory) {
+ auto external_registry = std::make_shared<FunctionRegistry>();
+ auto config = config_factory(std::move(external_registry));
+
+ std::unique_ptr<LLVMGenerator> generator;
+ ASSERT_OK(LLVMGenerator::Make(config, false, &generator));
+
+ auto module = generator->module();
+ ASSERT_OK(generator->engine_->LoadFunctionIRs());
+ EXPECT_NE(module->getFunction(function_name), nullptr);
+ }
};
// Verify that a valid pc function exists for every function in the registry.
@@ -116,16 +134,19 @@ TEST_F(TestLLVMGenerator, TestAdd) {
}
TEST_F(TestLLVMGenerator, VerifyExtendedPCFunctions) {
- auto external_registry = std::make_shared<FunctionRegistry>();
- auto config_with_func_registry =
- TestConfigurationWithFunctionRegistry(std::move(external_registry));
+ VerifyFunctionMapping("multiply_by_two_int32", [](auto registry) {
+ return TestConfigWithFunctionRegistry(std::move(registry));
+ });
+}
- std::unique_ptr<LLVMGenerator> generator;
- ASSERT_OK(LLVMGenerator::Make(config_with_func_registry, false, &generator));
+TEST_F(TestLLVMGenerator, VerifyExtendedCFunctions) {
+ VerifyFunctionMapping("multiply_by_three_int32", [](auto registry) {
+ return TestConfigWithCFunction(std::move(registry));
+ });
- auto module = generator->module();
- ASSERT_OK(generator->engine_->LoadFunctionIRs());
- EXPECT_NE(module->getFunction("multiply_by_two_int32"), nullptr);
+ VerifyFunctionMapping("multiply_by_n_int32_int32", [](auto registry) {
+ return TestConfigWithHolderFunction(std::move(registry));
+ });
}
} // namespace gandiva
diff --git a/cpp/src/gandiva/tests/projector_test.cc
b/cpp/src/gandiva/tests/projector_test.cc
index 38566fb408..59eeb3d92f 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -3594,7 +3594,7 @@ TEST_F(TestProjector, TestExtendedFunctions) {
std::shared_ptr<Projector> projector;
auto external_registry = std::make_shared<FunctionRegistry>();
auto config_with_func_registry =
- TestConfigurationWithFunctionRegistry(std::move(external_registry));
+ TestConfigWithFunctionRegistry(std::move(external_registry));
ARROW_EXPECT_OK(
Projector::Make(schema, {multiply}, config_with_func_registry,
&projector));
@@ -3608,4 +3608,79 @@ TEST_F(TestProjector, TestExtendedFunctions) {
EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0));
}
+TEST_F(TestProjector, TestExtendedCFunctions) {
+ auto in_field = field("in", arrow::int32());
+ auto schema = arrow::schema({in_field});
+ auto out_field = field("out", arrow::int64());
+ auto multiply =
+ TreeExprBuilder::MakeExpression("multiply_by_three", {in_field},
out_field);
+
+ std::shared_ptr<Projector> projector;
+ auto external_registry = std::make_shared<FunctionRegistry>();
+ auto config_with_func_registry =
TestConfigWithCFunction(std::move(external_registry));
+ ARROW_EXPECT_OK(
+ Projector::Make(schema, {multiply}, config_with_func_registry,
&projector));
+
+ int num_records = 4;
+ auto array = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, true});
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array});
+ auto out = MakeArrowArrayInt64({3, 6, 9, 12}, {true, true, true, true});
+
+ arrow::ArrayVector outs;
+ ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs));
+ EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0));
+}
+
+TEST_F(TestProjector, TestExtendedCFunctionsWithFunctionHolder) {
+ auto multiple = TreeExprBuilder::MakeLiteral(5);
+ auto in_field = field("in", arrow::int32());
+ auto schema = arrow::schema({in_field});
+ auto out_field = field("out", arrow::int64());
+
+ auto in_node = TreeExprBuilder::MakeField(in_field);
+ auto multiply_by_n_func =
+ TreeExprBuilder::MakeFunction("multiply_by_n", {in_node, multiple},
arrow::int64());
+ auto multiply = TreeExprBuilder::MakeExpression(multiply_by_n_func,
out_field);
+
+ std::shared_ptr<Projector> projector;
+ auto external_registry = std::make_shared<FunctionRegistry>();
+ auto config_with_func_registry =
+ TestConfigWithHolderFunction(std::move(external_registry));
+ ARROW_EXPECT_OK(
+ Projector::Make(schema, {multiply}, config_with_func_registry,
&projector));
+
+ int num_records = 4;
+ auto array = MakeArrowArrayInt32({1, 2, 3, 4}, {true, true, true, true});
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array});
+ auto out = MakeArrowArrayInt64({5, 10, 15, 20}, {true, true, true, true});
+
+ arrow::ArrayVector outs;
+ ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs));
+ EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0));
+}
+
+TEST_F(TestProjector, TestExtendedCFunctionThatNeedsContext) {
+ auto in_field = field("in", arrow::utf8());
+ auto schema = arrow::schema({in_field});
+ auto out_field = field("out", arrow::utf8());
+ auto multiply =
+ TreeExprBuilder::MakeExpression("multiply_by_two_formula", {in_field},
out_field);
+
+ std::shared_ptr<Projector> projector;
+ auto external_registry = std::make_shared<FunctionRegistry>();
+ auto config_with_func_registry =
+ TestConfigWithContextFunction(std::move(external_registry));
+ ARROW_EXPECT_OK(
+ Projector::Make(schema, {multiply}, config_with_func_registry,
&projector));
+
+ int num_records = 4;
+ auto array = MakeArrowArrayUtf8({"1", "2", "3", "10"}, {true, true, true,
true});
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array});
+ auto out = MakeArrowArrayUtf8({"1x2", "2x2", "3x2", "10x2"}, {true, true,
true, true});
+
+ arrow::ArrayVector outs;
+ ARROW_EXPECT_OK(projector->Evaluate(*in_batch, pool_, &outs));
+ EXPECT_ARROW_ARRAY_EQUALS(out, outs.at(0));
+}
+
} // namespace gandiva
diff --git a/cpp/src/gandiva/tests/test_util.cc
b/cpp/src/gandiva/tests/test_util.cc
index 4a0a15c722..959ea3cd7a 100644
--- a/cpp/src/gandiva/tests/test_util.cc
+++ b/cpp/src/gandiva/tests/test_util.cc
@@ -17,8 +17,13 @@
#include "gandiva/tests/test_util.h"
+#include <memory>
+#include <utility>
+
#include "arrow/util/io_util.h"
#include "arrow/util/logging.h"
+#include "gandiva/function_holder.h"
+#include "gandiva/gdv_function_stubs.h"
namespace gandiva {
std::shared_ptr<Configuration> TestConfiguration() {
@@ -43,11 +48,124 @@ NativeFunction GetTestExternalFunction() {
return multiply_by_two_func;
}
-std::shared_ptr<Configuration> TestConfigurationWithFunctionRegistry(
+static NativeFunction GetTestExternalCFunction() {
+ NativeFunction multiply_by_three_func(
+ "multiply_by_three", {}, {arrow::int32()}, arrow::int64(),
+ ResultNullableType::kResultNullIfNull, "multiply_by_three_int32");
+ return multiply_by_three_func;
+}
+
+static NativeFunction GetTestFunctionWithFunctionHolder() {
+ // the 2nd parameter is expected to be an int32 literal
+ NativeFunction multiply_by_n_func("multiply_by_n", {}, {arrow::int32(),
arrow::int32()},
+ arrow::int64(),
ResultNullableType::kResultNullIfNull,
+ "multiply_by_n_int32_int32",
+ NativeFunction::kNeedsFunctionHolder);
+ return multiply_by_n_func;
+}
+
+static NativeFunction GetTestFunctionWithContext() {
+ NativeFunction multiply_by_two_formula(
+ "multiply_by_two_formula", {}, {arrow::utf8()}, arrow::utf8(),
+ ResultNullableType::kResultNullIfNull, "multiply_by_two_formula_utf8",
+ NativeFunction::kNeedsContext);
+ return multiply_by_two_formula;
+}
+
+static std::shared_ptr<Configuration> BuildConfigurationWithRegistry(
+ std::shared_ptr<FunctionRegistry> registry,
+ const std::function<arrow::Status(std::shared_ptr<FunctionRegistry>)>&
+ register_func) {
+ ARROW_EXPECT_OK(register_func(registry));
+ return ConfigurationBuilder().build(std::move(registry));
+}
+
+std::shared_ptr<Configuration> TestConfigWithFunctionRegistry(
+ std::shared_ptr<FunctionRegistry> registry) {
+ return BuildConfigurationWithRegistry(std::move(registry), [](auto reg) {
+ return reg->Register({GetTestExternalFunction()},
GetTestFunctionLLVMIRPath());
+ });
+}
+
+class MultiplyHolder : public FunctionHolder {
+ public:
+ explicit MultiplyHolder(int32_t num) : num_(num) {}
+
+ static arrow::Result<std::shared_ptr<MultiplyHolder>> Make(const
FunctionNode& node) {
+ ARROW_RETURN_IF(node.children().size() != 2,
+ Status::Invalid("'multiply_by_n' function requires two
parameters"));
+
+ auto literal = dynamic_cast<LiteralNode*>(node.children().at(1).get());
+ ARROW_RETURN_IF(
+ literal == nullptr,
+ Status::Invalid(
+ "'multiply_by_n' function requires a literal as the 2nd
parameter"));
+
+ auto literal_type = literal->return_type()->id();
+ ARROW_RETURN_IF(
+ literal_type != arrow::Type::INT32,
+ Status::Invalid(
+ "'multiply_by_n' function requires an int32 literal as the 2nd
parameter"));
+
+ return std::make_shared<MultiplyHolder>(
+ literal->is_null() ? 0 : std::get<int32_t>(literal->holder()));
+ }
+
+ int32_t operator()() const { return num_; }
+
+ private:
+ int32_t num_;
+};
+
+extern "C" {
+// this function is used as an external C function for testing so it has to be
declared
+// with extern C
+static int64_t multiply_by_three(int32_t value) { return value * 3; }
+
+// this function requires a function holder
+static int64_t multiply_by_n(int64_t holder_ptr, int32_t value) {
+ auto* holder = reinterpret_cast<MultiplyHolder*>(holder_ptr);
+ return value * (*holder)();
+}
+
+// given a number string, return a string "{number}x2"
+static const char* multiply_by_two_formula(int64_t ctx, const char* value,
+ int32_t value_len, int32_t*
out_len) {
+ auto result = std::string(value, value_len) + "x2";
+ *out_len = static_cast<int32_t>(result.length());
+ auto out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(ctx,
*out_len));
+ if (out == nullptr) {
+ gdv_fn_context_set_error_msg(ctx, "Could not allocate memory for output
string");
+ *out_len = 0;
+ return "";
+ }
+ memcpy(out, result.c_str(), *out_len);
+ return out;
+}
+}
+
+std::shared_ptr<Configuration> TestConfigWithCFunction(
+ std::shared_ptr<FunctionRegistry> registry) {
+ return BuildConfigurationWithRegistry(std::move(registry), [](auto reg) {
+ return reg->Register(GetTestExternalCFunction(),
+ reinterpret_cast<void*>(multiply_by_three));
+ });
+}
+
+std::shared_ptr<Configuration> TestConfigWithHolderFunction(
+ std::shared_ptr<FunctionRegistry> registry) {
+ return BuildConfigurationWithRegistry(std::move(registry), [](auto reg) {
+ return reg->Register(
+ GetTestFunctionWithFunctionHolder(),
reinterpret_cast<void*>(multiply_by_n),
+ [](const FunctionNode& node) { return MultiplyHolder::Make(node); });
+ });
+}
+
+std::shared_ptr<Configuration> TestConfigWithContextFunction(
std::shared_ptr<FunctionRegistry> registry) {
- ARROW_EXPECT_OK(
- registry->Register({GetTestExternalFunction()},
GetTestFunctionLLVMIRPath()));
- auto external_func_config =
ConfigurationBuilder().build(std::move(registry));
- return external_func_config;
+ return BuildConfigurationWithRegistry(std::move(registry), [](auto reg) {
+ return reg->Register(GetTestFunctionWithContext(),
+ reinterpret_cast<void*>(multiply_by_two_formula));
+ });
}
} // namespace gandiva
diff --git a/cpp/src/gandiva/tests/test_util.h
b/cpp/src/gandiva/tests/test_util.h
index e431e53096..69d63732ae 100644
--- a/cpp/src/gandiva/tests/test_util.h
+++ b/cpp/src/gandiva/tests/test_util.h
@@ -98,7 +98,26 @@ static inline ArrayPtr MakeArrowTypeArray(const
std::shared_ptr<arrow::DataType>
std::shared_ptr<Configuration> TestConfiguration();
-std::shared_ptr<Configuration> TestConfigurationWithFunctionRegistry(
+// helper function to create a Configuration with an external function
registered to the
+// given function registry
+std::shared_ptr<Configuration> TestConfigWithFunctionRegistry(
+ std::shared_ptr<FunctionRegistry> registry);
+
+// helper function to create a Configuration with an external C function
registered to
+// the given function registry
+std::shared_ptr<Configuration> TestConfigWithCFunction(
+ std::shared_ptr<FunctionRegistry> registry);
+
+// helper function to create a Configuration with an external function
registered
+// to the given function registry, and the external function is a function
with a function
+// holder
+std::shared_ptr<Configuration> TestConfigWithHolderFunction(
+ std::shared_ptr<FunctionRegistry> registry);
+
+// helper function to create a Configuration with an external function
registered
+// to the given function registry, and the external function is a function
that needs
+// context
+std::shared_ptr<Configuration> TestConfigWithContextFunction(
std::shared_ptr<FunctionRegistry> registry);
std::string GetTestFunctionLLVMIRPath();