This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 83cba25017 GH-37848: [C++][Gandiva] Migrate LLVM JIT engine from MCJIT
to ORC v2/LLJIT (#39098)
83cba25017 is described below
commit 83cba25017a5c3a03e47f1851f242fa284f93533
Author: Yue <[email protected]>
AuthorDate: Fri Jan 5 03:02:40 2024 +0800
GH-37848: [C++][Gandiva] Migrate LLVM JIT engine from MCJIT to ORC v2/LLJIT
(#39098)
### Rationale for this change
Gandiva currently employs MCJIT as its internal JIT engine. However, LLVM
has introduced a newer JIT API known as ORC v2/LLJIT since LLVM 7.0, and it has
several advantage over MCJIT, in particular, MCJIT is not actively maintained,
and is slated for eventual deprecation and removal.
### What changes are included in this PR?
* This PR replaces the MCJIT JIT engine with the ORC v2 engine, using the
`LLJIT` API.
* This PR adds a new JIT linker option `JITLink`
(https://llvm.org/docs/JITLink.html), which can be used together with `LLJIT`,
for LLVM 14+ on Linux/macOS platform. It is turned off by default but could be
turned on with environment variable `GANDIVA_USE_JIT_LINK`
### Are these changes tested?
Yes, they are covered by existing unit tests
### Are there any user-facing changes?
* `Configuration` class has a new option called `dump_ir`. If users would
like to call `DumpIR` API of `Projector` and `Filter`, they have to set the
`dump_ir` option first.
* Closes: #37848
Authored-by: Yue Ni <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
cpp/cmake_modules/FindLLVMAlt.cmake | 2 +-
cpp/src/gandiva/configuration.h | 17 +-
cpp/src/gandiva/engine.cc | 357 +++++++++++++++++++-----------
cpp/src/gandiva/engine.h | 46 ++--
cpp/src/gandiva/engine_llvm_test.cc | 26 ++-
cpp/src/gandiva/filter.cc | 8 +-
cpp/src/gandiva/filter.h | 2 +-
cpp/src/gandiva/llvm_generator.cc | 23 +-
cpp/src/gandiva/llvm_generator.h | 12 +-
cpp/src/gandiva/llvm_generator_test.cc | 21 +-
cpp/src/gandiva/projector.cc | 8 +-
cpp/src/gandiva/projector.h | 2 +-
cpp/src/gandiva/tests/micro_benchmarks.cc | 31 +++
cpp/src/gandiva/tests/test_util.cc | 4 +
cpp/src/gandiva/tests/test_util.h | 2 +
python/pyarrow/gandiva.pyx | 59 ++++-
python/pyarrow/includes/libgandiva.pxd | 14 +-
python/pyarrow/tests/test_gandiva.py | 6 +-
18 files changed, 441 insertions(+), 199 deletions(-)
diff --git a/cpp/cmake_modules/FindLLVMAlt.cmake
b/cpp/cmake_modules/FindLLVMAlt.cmake
index 69f680824b..2730f82981 100644
--- a/cpp/cmake_modules/FindLLVMAlt.cmake
+++ b/cpp/cmake_modules/FindLLVMAlt.cmake
@@ -93,8 +93,8 @@ if(LLVM_FOUND)
debuginfodwarf
ipo
linker
- mcjit
native
+ orcjit
target)
if(LLVM_VERSION_MAJOR GREATER_EQUAL 14)
list(APPEND LLVM_TARGET_COMPONENTS passes)
diff --git a/cpp/src/gandiva/configuration.h b/cpp/src/gandiva/configuration.h
index f43a2b1907..620c58537f 100644
--- a/cpp/src/gandiva/configuration.h
+++ b/cpp/src/gandiva/configuration.h
@@ -37,10 +37,12 @@ class GANDIVA_EXPORT Configuration {
explicit Configuration(bool optimize,
std::shared_ptr<FunctionRegistry> function_registry =
- gandiva::default_function_registry())
+ gandiva::default_function_registry(),
+ bool dump_ir = false)
: optimize_(optimize),
target_host_cpu_(true),
- function_registry_(function_registry) {}
+ function_registry_(std::move(function_registry)),
+ dump_ir_(dump_ir) {}
Configuration() : Configuration(true) {}
@@ -50,11 +52,13 @@ class GANDIVA_EXPORT Configuration {
bool optimize() const { return optimize_; }
bool target_host_cpu() const { return target_host_cpu_; }
+ bool dump_ir() const { return dump_ir_; }
std::shared_ptr<FunctionRegistry> function_registry() const {
return function_registry_;
}
void set_optimize(bool optimize) { optimize_ = optimize; }
+ void set_dump_ir(bool dump_ir) { dump_ir_ = dump_ir; }
void target_host_cpu(bool target_host_cpu) { target_host_cpu_ =
target_host_cpu; }
void set_function_registry(std::shared_ptr<FunctionRegistry>
function_registry) {
function_registry_ = std::move(function_registry);
@@ -65,6 +69,9 @@ class GANDIVA_EXPORT Configuration {
bool target_host_cpu_; /* set the mcpu flag to host cpu while compiling llvm
ir */
std::shared_ptr<FunctionRegistry>
function_registry_; /* function registry that may contain external
functions */
+ // flag indicating if IR dumping is needed, defaults to false, and turning
it on will
+ // negatively affect performance
+ bool dump_ir_ = false;
};
/// \brief configuration builder for gandiva
@@ -83,6 +90,12 @@ class GANDIVA_EXPORT ConfigurationBuilder {
return configuration;
}
+ std::shared_ptr<Configuration> build_with_ir_dumping(bool dump_ir) {
+ std::shared_ptr<Configuration> configuration(
+ new Configuration(true, gandiva::default_function_registry(),
dump_ir));
+ return configuration;
+ }
+
std::shared_ptr<Configuration> build(
std::shared_ptr<FunctionRegistry> function_registry) {
std::shared_ptr<Configuration> configuration(
diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc
index 1cea1fd2cb..fc047f2ac0 100644
--- a/cpp/src/gandiva/engine.cc
+++ b/cpp/src/gandiva/engine.cc
@@ -31,7 +31,8 @@
#include <unordered_set>
#include <utility>
-#include "arrow/util/logging.h"
+#include <arrow/util/io_util.h>
+#include <arrow/util/logging.h>
#if defined(_MSC_VER)
#pragma warning(push)
@@ -46,13 +47,14 @@
#include <llvm/Analysis/TargetTransformInfo.h>
#include <llvm/Bitcode/BitcodeReader.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
-#include <llvm/ExecutionEngine/MCJIT.h>
+#include <llvm/ExecutionEngine/Orc/LLJIT.h>
#include <llvm/IR/DataLayout.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Linker/Linker.h>
+#include <llvm/Transforms/Utils/Cloning.h>
#if LLVM_VERSION_MAJOR >= 17
#include <llvm/TargetParser/SubtargetFeature.h>
#else
@@ -86,6 +88,13 @@
#include <llvm/Transforms/Utils.h>
#include <llvm/Transforms/Vectorize.h>
+// JITLink is available in LLVM 9+
+// but the `InProcessMemoryManager::Create` API was added since LLVM 14
+#if LLVM_VERSION_MAJOR >= 14 && !defined(_WIN32)
+#define JIT_LINK_SUPPORTED
+#include <llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h>
+#endif
+
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
@@ -103,9 +112,136 @@ extern const size_t kPrecompiledBitcodeSize;
std::once_flag llvm_init_once_flag;
static bool llvm_init = false;
static llvm::StringRef cpu_name;
-static llvm::SmallVector<std::string, 10> cpu_attrs;
+static std::vector<std::string> cpu_attrs;
std::once_flag register_exported_funcs_flag;
+template <typename T>
+arrow::Result<T> AsArrowResult(llvm::Expected<T>& expected,
+ const std::string& error_context) {
+ if (!expected) {
+ return Status::CodeGenError(error_context,
llvm::toString(expected.takeError()));
+ }
+ return std::move(expected.get());
+}
+
+Result<llvm::orc::JITTargetMachineBuilder> MakeTargetMachineBuilder(
+ const Configuration& conf) {
+ llvm::orc::JITTargetMachineBuilder jtmb(
+ (llvm::Triple(llvm::sys::getDefaultTargetTriple())));
+ if (conf.target_host_cpu()) {
+ jtmb.setCPU(cpu_name.str());
+ jtmb.addFeatures(cpu_attrs);
+ }
+ auto const opt_level =
+ conf.optimize() ? llvm::CodeGenOpt::Aggressive : llvm::CodeGenOpt::None;
+ jtmb.setCodeGenOptLevel(opt_level);
+ return jtmb;
+}
+
+std::string DumpModuleIR(const llvm::Module& module) {
+ std::string ir;
+ llvm::raw_string_ostream stream(ir);
+ module.print(stream, nullptr);
+ return ir;
+}
+
+void AddAbsoluteSymbol(llvm::orc::LLJIT& lljit, const std::string& name,
+ void* function_ptr) {
+ llvm::orc::MangleAndInterner mangle(lljit.getExecutionSession(),
lljit.getDataLayout());
+
+ //
https://github.com/llvm/llvm-project/commit/8b1771bd9f304be39d4dcbdcccedb6d3bcd18200#diff-77984a824d9182e5c67a481740f3bc5da78d5bd4cf6e1716a083ddb30a4a4931
+ // LLVM 17 introduced ExecutorSymbolDef and move most of ORC APIs to
ExecutorAddr
+#if LLVM_VERSION_MAJOR >= 17
+ llvm::orc::ExecutorSymbolDef symbol(
+ llvm::orc::ExecutorAddr(reinterpret_cast<uint64_t>(function_ptr)),
+ llvm::JITSymbolFlags::Exported);
+#else
+ llvm::JITEvaluatedSymbol
symbol(reinterpret_cast<llvm::JITTargetAddress>(function_ptr),
+ llvm::JITSymbolFlags::Exported);
+#endif
+
+ auto error = lljit.getMainJITDylib().define(
+ llvm::orc::absoluteSymbols({{mangle(name), symbol}}));
+ llvm::cantFail(std::move(error));
+}
+
+// add current process symbol to dylib
+// LLVM >= 18 does this automatically
+void AddProcessSymbol(llvm::orc::LLJIT& lljit) {
+ lljit.getMainJITDylib().addGenerator(
+
llvm::cantFail(llvm::orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
+ lljit.getDataLayout().getGlobalPrefix())));
+ // the `atexit` symbol cannot be found for ASAN
+#ifdef ADDRESS_SANITIZER
+ if (!lljit.lookup("atexit")) {
+ AddAbsoluteSymbol(lljit, "atexit", reinterpret_cast<void*>(atexit));
+ }
+#endif
+}
+
+#ifdef JIT_LINK_SUPPORTED
+Result<std::unique_ptr<llvm::jitlink::InProcessMemoryManager>>
CreateMemmoryManager() {
+ auto maybe_mem_manager = llvm::jitlink::InProcessMemoryManager::Create();
+ return AsArrowResult(maybe_mem_manager, "Could not create memory manager: ");
+}
+
+Status UseJITLinkIfEnabled(llvm::orc::LLJITBuilder& jit_builder) {
+ static auto maybe_use_jit_link =
::arrow::internal::GetEnvVar("GANDIVA_USE_JIT_LINK");
+ if (maybe_use_jit_link.ok()) {
+ ARROW_ASSIGN_OR_RAISE(static auto memory_manager, CreateMemmoryManager());
+ jit_builder.setObjectLinkingLayerCreator(
+ [&](llvm::orc::ExecutionSession& ES, const llvm::Triple& TT) {
+ return std::make_unique<llvm::orc::ObjectLinkingLayer>(ES,
*memory_manager);
+ });
+ }
+ return Status::OK();
+}
+#endif
+
+Result<std::unique_ptr<llvm::orc::LLJIT>> BuildJIT(
+ llvm::orc::JITTargetMachineBuilder jtmb,
+ std::optional<std::reference_wrapper<GandivaObjectCache>>& object_cache) {
+ llvm::orc::LLJITBuilder jit_builder;
+
+#ifdef JIT_LINK_SUPPORTED
+ ARROW_RETURN_NOT_OK(UseJITLinkIfEnabled(jit_builder));
+#endif
+
+ jit_builder.setJITTargetMachineBuilder(std::move(jtmb));
+ if (object_cache.has_value()) {
+ jit_builder.setCompileFunctionCreator(
+ [&object_cache](llvm::orc::JITTargetMachineBuilder JTMB)
+ ->
llvm::Expected<std::unique_ptr<llvm::orc::IRCompileLayer::IRCompiler>> {
+ auto target_machine = JTMB.createTargetMachine();
+ if (!target_machine) {
+ return target_machine.takeError();
+ }
+ // after compilation, the object code will be stored into the given
object
+ // cache
+ return std::make_unique<llvm::orc::TMOwningSimpleCompiler>(
+ std::move(*target_machine), &object_cache.value().get());
+ });
+ }
+ auto maybe_jit = jit_builder.create();
+ ARROW_ASSIGN_OR_RAISE(auto jit,
+ AsArrowResult(maybe_jit, "Could not create LLJIT
instance: "));
+
+ AddProcessSymbol(*jit);
+ return jit;
+}
+
+Status Engine::SetLLVMObjectCache(GandivaObjectCache& object_cache) {
+ auto cached_buffer = object_cache.getObject(nullptr);
+ if (cached_buffer) {
+ auto error = lljit_->addObjectFile(std::move(cached_buffer));
+ if (error) {
+ return Status::CodeGenError("Failed to add cached object file to LLJIT:
",
+ llvm::toString(std::move(error)));
+ }
+ }
+ return Status::OK();
+}
+
void Engine::InitOnce() {
DCHECK_EQ(llvm_init, false);
@@ -127,28 +263,34 @@ void Engine::InitOnce() {
}
}
ARROW_LOG(INFO) << "Detected CPU Name : " << cpu_name.str();
- ARROW_LOG(INFO) << "Detected CPU Features:" << cpu_attrs_str;
+ ARROW_LOG(INFO) << "Detected CPU Features: [" << cpu_attrs_str << "]";
llvm_init = true;
}
Engine::Engine(const std::shared_ptr<Configuration>& conf,
- std::unique_ptr<llvm::LLVMContext> ctx,
- std::unique_ptr<llvm::ExecutionEngine> engine, llvm::Module*
module,
- bool cached)
- : context_(std::move(ctx)),
- execution_engine_(std::move(engine)),
+ std::unique_ptr<llvm::orc::LLJIT> lljit,
+ std::unique_ptr<llvm::TargetMachine> target_machine, bool
cached)
+ : context_(std::make_unique<llvm::LLVMContext>()),
+ lljit_(std::move(lljit)),
ir_builder_(std::make_unique<llvm::IRBuilder<>>(*context_)),
- module_(module),
types_(*context_),
optimize_(conf->optimize()),
cached_(cached),
- function_registry_(conf->function_registry()) {}
+ function_registry_(conf->function_registry()),
+ target_machine_(std::move(target_machine)),
+ conf_(conf) {
+ // LLVM 10 doesn't like the expr function name to be the same as the module
name
+ auto module_id = "gdv_module_" +
std::to_string(reinterpret_cast<uintptr_t>(this));
+ module_ = std::make_unique<llvm::Module>(module_id, *context_);
+}
+
+Engine::~Engine() {}
Status Engine::Init() {
std::call_once(register_exported_funcs_flag, gandiva::RegisterExportedFuncs);
+
// Add mappings for global functions that can be accessed from LLVM/IR
module.
ARROW_RETURN_NOT_OK(AddGlobalMappings());
-
return Status::OK();
}
@@ -163,101 +305,32 @@ Status Engine::LoadFunctionIRs() {
}
/// factory method to construct the engine.
-Status Engine::Make(const std::shared_ptr<Configuration>& conf, bool cached,
- std::unique_ptr<Engine>* out) {
+Result<std::unique_ptr<Engine>> Engine::Make(
+ const std::shared_ptr<Configuration>& conf, bool cached,
+ std::optional<std::reference_wrapper<GandivaObjectCache>> object_cache) {
std::call_once(llvm_init_once_flag, InitOnce);
- auto ctx = std::make_unique<llvm::LLVMContext>();
- auto module = std::make_unique<llvm::Module>("codegen", *ctx);
-
- // Capture before moving, ExecutionEngine does not allow retrieving the
- // original Module.
- auto module_ptr = module.get();
-
- auto opt_level =
- conf->optimize() ? llvm::CodeGenOpt::Aggressive : llvm::CodeGenOpt::None;
-
- // Note that the lifetime of the error string is not captured by the
- // ExecutionEngine but only for the lifetime of the builder. Found by
- // inspecting LLVM sources.
- std::string builder_error;
-
- llvm::EngineBuilder engine_builder(std::move(module));
-
- engine_builder.setEngineKind(llvm::EngineKind::JIT)
- .setOptLevel(opt_level)
- .setErrorStr(&builder_error);
-
- if (conf->target_host_cpu()) {
- engine_builder.setMCPU(cpu_name);
- engine_builder.setMAttrs(cpu_attrs);
- }
- std::unique_ptr<llvm::ExecutionEngine> exec_engine{engine_builder.create()};
-
- if (exec_engine == nullptr) {
- return Status::CodeGenError("Could not instantiate llvm::ExecutionEngine:
",
- builder_error);
- }
+ ARROW_ASSIGN_OR_RAISE(auto jtmb, MakeTargetMachineBuilder(*conf));
+ ARROW_ASSIGN_OR_RAISE(auto jit, BuildJIT(jtmb, object_cache));
+ auto maybe_tm = jtmb.createTargetMachine();
+ ARROW_ASSIGN_OR_RAISE(auto target_machine,
+ AsArrowResult(maybe_tm, "Could not create target
machine: "));
std::unique_ptr<Engine> engine{
- new Engine(conf, std::move(ctx), std::move(exec_engine), module_ptr,
cached)};
- ARROW_RETURN_NOT_OK(engine->Init());
- *out = std::move(engine);
- return Status::OK();
-}
-
-// This method was modified from its original version for a part of MLIR
-// Original source from
-//
https://github.com/llvm/llvm-project/blob/9f2ce5b915a505a5488a5cf91bb0a8efa9ddfff7/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
-// The original copyright notice follows.
-
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-static void SetDataLayout(llvm::Module* module) {
- auto target_triple = llvm::sys::getDefaultTargetTriple();
- std::string error_message;
- auto target = llvm::TargetRegistry::lookupTarget(target_triple,
error_message);
- if (!target) {
- return;
- }
-
- std::string cpu(llvm::sys::getHostCPUName());
- llvm::SubtargetFeatures features;
- llvm::StringMap<bool> host_features;
-
- if (llvm::sys::getHostCPUFeatures(host_features)) {
- for (auto& f : host_features) {
- features.AddFeature(f.first(), f.second);
- }
- }
+ new Engine(conf, std::move(jit), std::move(target_machine), cached)};
- std::unique_ptr<llvm::TargetMachine> machine(
- target->createTargetMachine(target_triple, cpu, features.getString(),
{}, {}));
-
- module->setDataLayout(machine->createDataLayout());
-}
-// end of the modified method from MLIR
-
-template <typename T>
-static arrow::Result<T> AsArrowResult(llvm::Expected<T>& expected) {
- if (!expected) {
- std::string str;
- llvm::raw_string_ostream stream(str);
- stream << expected.takeError();
- return Status::CodeGenError(stream.str());
- }
- return std::move(expected.get());
+ ARROW_RETURN_NOT_OK(engine->Init());
+ return engine;
}
static arrow::Status VerifyAndLinkModule(
- llvm::Module* dest_module,
+ llvm::Module& dest_module,
llvm::Expected<std::unique_ptr<llvm::Module>> src_module_or_error) {
- ARROW_ASSIGN_OR_RAISE(auto src_ir_module,
AsArrowResult(src_module_or_error));
+ ARROW_ASSIGN_OR_RAISE(
+ auto src_ir_module,
+ AsArrowResult(src_module_or_error, "Failed to verify and link module:
"));
- // set dataLayout
- SetDataLayout(src_ir_module.get());
+ src_ir_module->setDataLayout(dest_module.getDataLayout());
std::string error_info;
llvm::raw_string_ostream error_stream(error_info);
@@ -265,16 +338,21 @@ static arrow::Status VerifyAndLinkModule(
llvm::verifyModule(*src_ir_module, &error_stream),
Status::CodeGenError("verify of IR Module failed: " +
error_stream.str()));
- ARROW_RETURN_IF(llvm::Linker::linkModules(*dest_module,
std::move(src_ir_module)),
+ ARROW_RETURN_IF(llvm::Linker::linkModules(dest_module,
std::move(src_ir_module)),
Status::CodeGenError("failed to link IR Modules"));
return Status::OK();
}
+llvm::Module* Engine::module() {
+ DCHECK(!module_finalized_) << "module cannot be accessed after finalized";
+ return module_.get();
+}
+
// Handling for pre-compiled IR libraries.
Status Engine::LoadPreCompiledIR() {
- auto bitcode = llvm::StringRef(reinterpret_cast<const
char*>(kPrecompiledBitcode),
- kPrecompiledBitcodeSize);
+ auto const bitcode = llvm::StringRef(reinterpret_cast<const
char*>(kPrecompiledBitcode),
+ kPrecompiledBitcodeSize);
/// Read from file into memory buffer.
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> buffer_or_error =
@@ -291,14 +369,14 @@ Status Engine::LoadPreCompiledIR() {
llvm::getOwningLazyBitcodeModule(std::move(buffer), *context());
// NOTE: llvm::handleAllErrors() fails linking with RTTI-disabled LLVM builds
// (ARROW-5148)
- ARROW_RETURN_NOT_OK(VerifyAndLinkModule(module_,
std::move(module_or_error)));
+ ARROW_RETURN_NOT_OK(VerifyAndLinkModule(*module_,
std::move(module_or_error)));
return Status::OK();
}
static llvm::MemoryBufferRef AsLLVMMemoryBuffer(const arrow::Buffer&
arrow_buffer) {
- auto data = reinterpret_cast<const char*>(arrow_buffer.data());
- auto size = arrow_buffer.size();
- return llvm::MemoryBufferRef(llvm::StringRef(data, size),
"external_bitcode");
+ auto const data = reinterpret_cast<const char*>(arrow_buffer.data());
+ auto const size = arrow_buffer.size();
+ return {llvm::StringRef(data, size), "external_bitcode"};
}
Status Engine::LoadExternalPreCompiledIR() {
@@ -306,7 +384,7 @@ Status Engine::LoadExternalPreCompiledIR() {
for (auto const& buffer : buffers) {
auto llvm_memory_buffer_ref = AsLLVMMemoryBuffer(*buffer);
auto module_or_error = llvm::parseBitcodeFile(llvm_memory_buffer_ref,
*context());
- ARROW_RETURN_NOT_OK(VerifyAndLinkModule(module_,
std::move(module_or_error)));
+ ARROW_RETURN_NOT_OK(VerifyAndLinkModule(*module_,
std::move(module_or_error)));
}
return Status::OK();
@@ -386,7 +464,8 @@ static void
OptimizeModuleWithLegacyPassManager(llvm::Module& module,
std::unique_ptr<llvm::legacy::PassManager> pass_manager(
new llvm::legacy::PassManager());
-
pass_manager->add(llvm::createTargetTransformInfoWrapperPass(target_analysis));
+ pass_manager->add(
+ llvm::createTargetTransformInfoWrapperPass(std::move(target_analysis)));
pass_manager->add(llvm::createFunctionInliningPass());
pass_manager->add(llvm::createInstructionCombiningPass());
pass_manager->add(llvm::createPromoteMemoryToRegisterPass());
@@ -411,40 +490,64 @@ Status Engine::FinalizeModule() {
ARROW_RETURN_NOT_OK(RemoveUnusedFunctions());
if (optimize_) {
- auto target_analysis =
execution_engine_->getTargetMachine()->getTargetIRAnalysis();
-
+ auto target_analysis = target_machine_->getTargetIRAnalysis();
// misc passes to allow for inlining, vectorization, ..
#if LLVM_VERSION_MAJOR >= 14
- OptimizeModuleWithNewPassManager(*module_, target_analysis);
+ OptimizeModuleWithNewPassManager(*module_, std::move(target_analysis));
#else
- OptimizeModuleWithLegacyPassManager(*module_, target_analysis);
+ OptimizeModuleWithLegacyPassManager(*module_,
std::move(target_analysis));
#endif
}
ARROW_RETURN_IF(llvm::verifyModule(*module_, &llvm::errs()),
Status::CodeGenError("Module verification failed after
optimizer"));
- }
- // do the compilation
- execution_engine_->finalizeObject();
+ // print the module IR and save it for later use if IR dumping is needed
+ // since the module will be moved to construct LLJIT instance, and it is
not
+ // available after LLJIT instance is constructed
+ if (conf_->dump_ir()) {
+ module_ir_ = DumpModuleIR(*module_);
+ }
+
+ llvm::orc::ThreadSafeModule tsm(std::move(module_), std::move(context_));
+ auto error = lljit_->addIRModule(std::move(tsm));
+ if (error) {
+ return Status::CodeGenError("Failed to add IR module to LLJIT: ",
+ llvm::toString(std::move(error)));
+ }
+ }
module_finalized_ = true;
return Status::OK();
}
-void* Engine::CompiledFunction(std::string& function) {
- DCHECK(module_finalized_);
- return
reinterpret_cast<void*>(execution_engine_->getFunctionAddress(function));
+Result<void*> Engine::CompiledFunction(const std::string& function) {
+ DCHECK(module_finalized_)
+ << "module must be finalized before getting compiled function";
+ auto sym = lljit_->lookup(function);
+ if (!sym) {
+ return Status::CodeGenError("Failed to look up function: " + function +
+ " error: " + llvm::toString(sym.takeError()));
+ }
+ // Since LLVM 15, `LLJIT::lookup` returns ExecutorAddrs rather than
+ // JITEvaluatedSymbols
+#if LLVM_VERSION_MAJOR >= 15
+ auto fn_addr = sym->getValue();
+#else
+ auto fn_addr = sym->getAddress();
+#endif
+ auto fn_ptr = reinterpret_cast<void*>(fn_addr);
+ if (fn_ptr == nullptr) {
+ return Status::CodeGenError("Failed to get address for function: " +
function);
+ }
+ return fn_ptr;
}
void Engine::AddGlobalMappingForFunc(const std::string& name, llvm::Type*
ret_type,
- const std::vector<llvm::Type*>& args,
- void* function_ptr) {
- constexpr bool is_var_arg = false;
- auto prototype = llvm::FunctionType::get(ret_type, args, is_var_arg);
- constexpr auto linkage = llvm::GlobalValue::ExternalLinkage;
- auto fn = llvm::Function::Create(prototype, linkage, name, module());
- execution_engine_->addGlobalMapping(fn, function_ptr);
+ const std::vector<llvm::Type*>& args,
void* func) {
+ auto const prototype = llvm::FunctionType::get(ret_type, args,
/*is_var_arg*/ false);
+ llvm::Function::Create(prototype, llvm::GlobalValue::ExternalLinkage, name,
module());
+ AddAbsoluteSymbol(*lljit_, name, func);
}
arrow::Status Engine::AddGlobalMappings() {
@@ -453,11 +556,9 @@ arrow::Status Engine::AddGlobalMappings() {
return c_funcs.AddMappings(this);
}
-std::string Engine::DumpIR() {
- std::string ir;
- llvm::raw_string_ostream stream(ir);
- module_->print(stream, nullptr);
- return ir;
+const std::string& Engine::ir() {
+ DCHECK(!module_ir_.empty()) << "dump_ir in Configuration must be set for
dumping IR";
+ return module_ir_;
}
} // namespace gandiva
diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h
index df2d8b36d9..565c3f1425 100644
--- a/cpp/src/gandiva/engine.h
+++ b/cpp/src/gandiva/engine.h
@@ -17,11 +17,16 @@
#pragma once
+#include <cinttypes>
+#include <functional>
#include <memory>
+#include <optional>
#include <set>
#include <string>
#include <vector>
+#include <llvm/Analysis/TargetTransformInfo.h>
+
#include "arrow/util/logging.h"
#include "arrow/util/macros.h"
#include "gandiva/configuration.h"
@@ -30,23 +35,34 @@
#include "gandiva/llvm_types.h"
#include "gandiva/visibility.h"
+namespace llvm::orc {
+class LLJIT;
+} // namespace llvm::orc
+
namespace gandiva {
/// \brief LLVM Execution engine wrapper.
class GANDIVA_EXPORT Engine {
public:
+ ~Engine();
llvm::LLVMContext* context() { return context_.get(); }
llvm::IRBuilder<>* ir_builder() { return ir_builder_.get(); }
LLVMTypes* types() { return &types_; }
- llvm::Module* module() { return module_; }
+
+ /// Retrieve LLVM module in the engine.
+ /// This should only be called before `FinalizeModule` is called
+ llvm::Module* module();
/// Factory method to create and initialize the engine object.
///
/// \param[in] config the engine configuration
/// \param[in] cached flag to mark if the module is already compiled and
cached
- /// \param[out] engine the created engine
- static Status Make(const std::shared_ptr<Configuration>& config, bool cached,
- std::unique_ptr<Engine>* engine);
+ /// \param[in] object_cache an optional object_cache used for building the
module
+ /// \return arrow::Result containing the created engine
+ static Result<std::unique_ptr<Engine>> Make(
+ const std::shared_ptr<Configuration>& config, bool cached,
+ std::optional<std::reference_wrapper<GandivaObjectCache>> object_cache =
+ std::nullopt);
/// Add the function to the list of IR functions that need to be compiled.
/// Compiling only the functions that are used by the module saves time.
@@ -59,36 +75,31 @@ class GANDIVA_EXPORT Engine {
Status FinalizeModule();
/// Set LLVM ObjectCache.
- void SetLLVMObjectCache(GandivaObjectCache& object_cache) {
- execution_engine_->setObjectCache(&object_cache);
- }
+ Status SetLLVMObjectCache(GandivaObjectCache& object_cache);
/// Get the compiled function corresponding to the irfunction.
- void* CompiledFunction(std::string& function);
+ Result<void*> CompiledFunction(const std::string& function);
// Create and add a mapping for the cpp function to make it accessible from
LLVM.
void AddGlobalMappingForFunc(const std::string& name, llvm::Type* ret_type,
const std::vector<llvm::Type*>& args, void*
func);
/// Return the generated IR for the module.
- std::string DumpIR();
+ const std::string& ir();
/// Load the function IRs that can be accessed in the module.
Status LoadFunctionIRs();
private:
Engine(const std::shared_ptr<Configuration>& conf,
- std::unique_ptr<llvm::LLVMContext> ctx,
- std::unique_ptr<llvm::ExecutionEngine> engine, llvm::Module* module,
- bool cached);
+ std::unique_ptr<llvm::orc::LLJIT> lljit,
+ std::unique_ptr<llvm::TargetMachine> target_machine, bool cached);
// Post construction init. This _must_ be called after the constructor.
Status Init();
static void InitOnce();
- llvm::ExecutionEngine& execution_engine() { return *execution_engine_; }
-
/// load pre-compiled IR modules from precompiled_bitcode.cc and merge them
into
/// the main module.
Status LoadPreCompiledIR();
@@ -103,9 +114,9 @@ class GANDIVA_EXPORT Engine {
Status RemoveUnusedFunctions();
std::unique_ptr<llvm::LLVMContext> context_;
- std::unique_ptr<llvm::ExecutionEngine> execution_engine_;
+ std::unique_ptr<llvm::orc::LLJIT> lljit_;
std::unique_ptr<llvm::IRBuilder<>> ir_builder_;
- llvm::Module* module_;
+ std::unique_ptr<llvm::Module> module_;
LLVMTypes types_;
std::vector<std::string> functions_to_compile_;
@@ -115,6 +126,9 @@ class GANDIVA_EXPORT Engine {
bool cached_;
bool functions_loaded_ = false;
std::shared_ptr<FunctionRegistry> function_registry_;
+ std::string module_ir_;
+ std::unique_ptr<llvm::TargetMachine> target_machine_;
+ const std::shared_ptr<Configuration> conf_;
};
} // namespace gandiva
diff --git a/cpp/src/gandiva/engine_llvm_test.cc
b/cpp/src/gandiva/engine_llvm_test.cc
index 9baaa82d2e..78f468d13f 100644
--- a/cpp/src/gandiva/engine_llvm_test.cc
+++ b/cpp/src/gandiva/engine_llvm_test.cc
@@ -24,14 +24,14 @@
namespace gandiva {
-typedef int64_t (*add_vector_func_t)(int64_t* data, int n);
+using add_vector_func_t = int64_t (*)(int64_t*, int);
class TestEngine : public ::testing::Test {
protected:
- std::string BuildVecAdd(Engine* engine) {
- auto types = engine->types();
- llvm::IRBuilder<>* builder = engine->ir_builder();
- llvm::LLVMContext* context = engine->context();
+ std::string BuildVecAdd(Engine* gdv_engine) {
+ auto types = gdv_engine->types();
+ llvm::IRBuilder<>* builder = gdv_engine->ir_builder();
+ llvm::LLVMContext* context = gdv_engine->context();
// Create fn prototype :
// int64_t add_longs(int64_t *elements, int32_t nelements)
@@ -42,10 +42,10 @@ class TestEngine : public ::testing::Test {
llvm::FunctionType::get(types->i64_type(), arguments, false
/*isVarArg*/);
// Create fn
- std::string func_name = "add_longs";
- engine->AddFunctionToCompile(func_name);
+ std::string func_name = "add_longs_test_expr";
+ gdv_engine->AddFunctionToCompile(func_name);
llvm::Function* fn = llvm::Function::Create(
- prototype, llvm::GlobalValue::ExternalLinkage, func_name,
engine->module());
+ prototype, llvm::GlobalValue::ExternalLinkage, func_name,
gdv_engine->module());
assert(fn != nullptr);
// Name the arguments
@@ -99,7 +99,9 @@ class TestEngine : public ::testing::Test {
return func_name;
}
- void BuildEngine() { ASSERT_OK(Engine::Make(TestConfiguration(), false,
&engine)); }
+ void BuildEngine() {
+ ASSERT_OK_AND_ASSIGN(engine, Engine::Make(TestConfiguration(), false));
+ }
std::unique_ptr<Engine> engine;
std::shared_ptr<Configuration> configuration = TestConfiguration();
@@ -111,7 +113,8 @@ TEST_F(TestEngine, TestAddUnoptimised) {
std::string fn_name = BuildVecAdd(engine.get());
ASSERT_OK(engine->FinalizeModule());
- auto add_func =
reinterpret_cast<add_vector_func_t>(engine->CompiledFunction(fn_name));
+ ASSERT_OK_AND_ASSIGN(auto fn_ptr, engine->CompiledFunction(fn_name));
+ auto add_func = reinterpret_cast<add_vector_func_t>(fn_ptr);
int64_t my_array[] = {1, 3, -5, 8, 10};
EXPECT_EQ(add_func(my_array, 5), 17);
@@ -123,7 +126,8 @@ TEST_F(TestEngine, TestAddOptimised) {
std::string fn_name = BuildVecAdd(engine.get());
ASSERT_OK(engine->FinalizeModule());
- auto add_func =
reinterpret_cast<add_vector_func_t>(engine->CompiledFunction(fn_name));
+ EXPECT_OK_AND_ASSIGN(auto fn_ptr, engine->CompiledFunction(fn_name));
+ auto add_func = reinterpret_cast<add_vector_func_t>(fn_ptr);
int64_t my_array[] = {1, 3, -5, 8, 10};
EXPECT_EQ(add_func(my_array, 5), 17);
diff --git a/cpp/src/gandiva/filter.cc b/cpp/src/gandiva/filter.cc
index 416d97b5db..8a270cfdc0 100644
--- a/cpp/src/gandiva/filter.cc
+++ b/cpp/src/gandiva/filter.cc
@@ -65,8 +65,8 @@ Status Filter::Make(SchemaPtr schema, ConditionPtr condition,
GandivaObjectCache obj_cache(cache, cache_key);
// Build LLVM generator, and generate code for the specified expression
- std::unique_ptr<LLVMGenerator> llvm_gen;
- ARROW_RETURN_NOT_OK(LLVMGenerator::Make(configuration, is_cached,
&llvm_gen));
+ ARROW_ASSIGN_OR_RAISE(auto llvm_gen,
+ LLVMGenerator::Make(configuration, is_cached,
obj_cache));
if (!is_cached) {
// Run the validation on the expression.
@@ -77,7 +77,7 @@ Status Filter::Make(SchemaPtr schema, ConditionPtr condition,
}
// Set the object cache for LLVM
- llvm_gen->SetLLVMObjectCache(obj_cache);
+ ARROW_RETURN_NOT_OK(llvm_gen->SetLLVMObjectCache(obj_cache));
ARROW_RETURN_NOT_OK(llvm_gen->Build({condition},
SelectionVector::Mode::MODE_NONE));
@@ -119,7 +119,7 @@ Status Filter::Evaluate(const arrow::RecordBatch& batch,
return out_selection->PopulateFromBitMap(result, bitmap_size, num_rows - 1);
}
-std::string Filter::DumpIR() { return llvm_generator_->DumpIR(); }
+const std::string& Filter::DumpIR() { return llvm_generator_->ir(); }
void Filter::SetBuiltFromCache(bool flag) { built_from_cache_ = flag; }
diff --git a/cpp/src/gandiva/filter.h b/cpp/src/gandiva/filter.h
index cc536bca1b..b4043d93c8 100644
--- a/cpp/src/gandiva/filter.h
+++ b/cpp/src/gandiva/filter.h
@@ -76,7 +76,7 @@ class GANDIVA_EXPORT Filter {
Status Evaluate(const arrow::RecordBatch& batch,
std::shared_ptr<SelectionVector> out_selection);
- std::string DumpIR();
+ const std::string& DumpIR();
void SetBuiltFromCache(bool flag);
diff --git a/cpp/src/gandiva/llvm_generator.cc
b/cpp/src/gandiva/llvm_generator.cc
index 41cbe0ffe3..62ebab08f4 100644
--- a/cpp/src/gandiva/llvm_generator.cc
+++ b/cpp/src/gandiva/llvm_generator.cc
@@ -42,15 +42,15 @@ LLVMGenerator::LLVMGenerator(bool cached,
function_registry_(std::move(function_registry)),
enable_ir_traces_(false) {}
-Status LLVMGenerator::Make(const std::shared_ptr<Configuration>& config, bool
cached,
- std::unique_ptr<LLVMGenerator>* llvm_generator) {
- std::unique_ptr<LLVMGenerator> llvmgen_obj(
+Result<std::unique_ptr<LLVMGenerator>> LLVMGenerator::Make(
+ const std::shared_ptr<Configuration>& config, bool cached,
+ std::optional<std::reference_wrapper<GandivaObjectCache>> object_cache) {
+ std::unique_ptr<LLVMGenerator> llvm_generator(
new LLVMGenerator(cached, config->function_registry()));
- ARROW_RETURN_NOT_OK(Engine::Make(config, cached, &(llvmgen_obj->engine_)));
- *llvm_generator = std::move(llvmgen_obj);
-
- return Status::OK();
+ ARROW_ASSIGN_OR_RAISE(llvm_generator->engine_,
+ Engine::Make(config, cached, object_cache));
+ return llvm_generator;
}
std::shared_ptr<Cache<ExpressionCacheKey, std::shared_ptr<llvm::MemoryBuffer>>>
@@ -62,8 +62,8 @@ LLVMGenerator::GetCache() {
return shared_cache;
}
-void LLVMGenerator::SetLLVMObjectCache(GandivaObjectCache& object_cache) {
- engine_->SetLLVMObjectCache(object_cache);
+Status LLVMGenerator::SetLLVMObjectCache(GandivaObjectCache& object_cache) {
+ return engine_->SetLLVMObjectCache(object_cache);
}
Status LLVMGenerator::Add(const ExpressionPtr expr, const FieldDescriptorPtr
output) {
@@ -73,7 +73,7 @@ Status LLVMGenerator::Add(const ExpressionPtr expr, const
FieldDescriptorPtr out
ValueValidityPairPtr value_validity;
ARROW_RETURN_NOT_OK(decomposer.Decompose(*expr->root(), &value_validity));
// Generate the IR function for the decomposed expression.
- std::unique_ptr<CompiledExpr> compiled_expr(new CompiledExpr(value_validity,
output));
+ auto compiled_expr = std::make_unique<CompiledExpr>(value_validity, output);
std::string fn_name = "expr_" + std::to_string(idx) + "_" +
std::to_string(static_cast<int>(selection_vector_mode_));
if (!cached_) {
@@ -103,7 +103,8 @@ Status LLVMGenerator::Build(const ExpressionVector& exprs,
SelectionVector::Mode
// setup the jit functions for each expression.
for (auto& compiled_expr : compiled_exprs_) {
auto fn_name = compiled_expr->GetFunctionName(mode);
- auto jit_fn =
reinterpret_cast<EvalFunc>(engine_->CompiledFunction(fn_name));
+ ARROW_ASSIGN_OR_RAISE(auto fn_ptr, engine_->CompiledFunction(fn_name));
+ auto jit_fn = reinterpret_cast<EvalFunc>(fn_ptr);
compiled_expr->SetJITFunction(selection_vector_mode_, jit_fn);
}
diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h
index 250ab78fbf..0c532998e8 100644
--- a/cpp/src/gandiva/llvm_generator.h
+++ b/cpp/src/gandiva/llvm_generator.h
@@ -18,7 +18,9 @@
#pragma once
#include <cstdint>
+#include <functional>
#include <memory>
+#include <optional>
#include <string>
#include <vector>
@@ -47,15 +49,17 @@ class FunctionHolder;
class GANDIVA_EXPORT LLVMGenerator {
public:
/// \brief Factory method to initialize the generator.
- static Status Make(const std::shared_ptr<Configuration>& config, bool cached,
- std::unique_ptr<LLVMGenerator>* llvm_generator);
+ static Result<std::unique_ptr<LLVMGenerator>> Make(
+ const std::shared_ptr<Configuration>& config, bool cached,
+ std::optional<std::reference_wrapper<GandivaObjectCache>> object_cache =
+ std::nullopt);
/// \brief Get the cache to be used for LLVM ObjectCache.
static std::shared_ptr<Cache<ExpressionCacheKey,
std::shared_ptr<llvm::MemoryBuffer>>>
GetCache();
/// \brief Set LLVM ObjectCache.
- void SetLLVMObjectCache(GandivaObjectCache& object_cache);
+ Status SetLLVMObjectCache(GandivaObjectCache& object_cache);
/// \brief Build the code for the expression trees for default mode with a
LLVM
/// ObjectCache. Each element in the vector represents an expression tree
@@ -79,7 +83,7 @@ class GANDIVA_EXPORT LLVMGenerator {
SelectionVector::Mode selection_vector_mode() { return
selection_vector_mode_; }
LLVMTypes* types() { return engine_->types(); }
llvm::Module* module() { return engine_->module(); }
- std::string DumpIR() { return engine_->DumpIR(); }
+ const std::string& ir() { return engine_->ir(); }
private:
explicit LLVMGenerator(bool cached,
diff --git a/cpp/src/gandiva/llvm_generator_test.cc
b/cpp/src/gandiva/llvm_generator_test.cc
index 853d8ae6c3..79654e7b78 100644
--- a/cpp/src/gandiva/llvm_generator_test.cc
+++ b/cpp/src/gandiva/llvm_generator_test.cc
@@ -47,8 +47,7 @@ class TestLLVMGenerator : public ::testing::Test {
auto external_registry = std::make_shared<FunctionRegistry>();
auto config = config_factory(std::move(external_registry));
- std::unique_ptr<LLVMGenerator> generator;
- ASSERT_OK(LLVMGenerator::Make(config, false, &generator));
+ ASSERT_OK_AND_ASSIGN(auto generator, LLVMGenerator::Make(config, false));
auto module = generator->module();
ASSERT_OK(generator->engine_->LoadFunctionIRs());
@@ -58,8 +57,7 @@ class TestLLVMGenerator : public ::testing::Test {
// Verify that a valid pc function exists for every function in the registry.
TEST_F(TestLLVMGenerator, VerifyPCFunctions) {
- std::unique_ptr<LLVMGenerator> generator;
- ASSERT_OK(LLVMGenerator::Make(TestConfiguration(), false, &generator));
+ ASSERT_OK_AND_ASSIGN(auto generator,
LLVMGenerator::Make(TestConfiguration(), false));
llvm::Module* module = generator->module();
ASSERT_OK(generator->engine_->LoadFunctionIRs());
@@ -70,8 +68,8 @@ TEST_F(TestLLVMGenerator, VerifyPCFunctions) {
TEST_F(TestLLVMGenerator, TestAdd) {
// Setup LLVM generator to do an arithmetic add of two vectors
- std::unique_ptr<LLVMGenerator> generator;
- ASSERT_OK(LLVMGenerator::Make(TestConfiguration(), false, &generator));
+ ASSERT_OK_AND_ASSIGN(auto generator,
+ LLVMGenerator::Make(TestConfigWithIrDumping(), false));
Annotator annotator;
auto field0 = std::make_shared<arrow::Field>("f0", arrow::int32());
@@ -100,18 +98,22 @@ TEST_F(TestLLVMGenerator, TestAdd) {
auto field_sum = std::make_shared<arrow::Field>("out", arrow::int32());
auto desc_sum = annotator.CheckAndAddInputFieldDescriptor(field_sum);
- std::string fn_name = "codegen";
+ // LLVM 10 doesn't like the expr function name to be the same as the module
name when
+ // LLJIT is used
+ std::string fn_name = "llvm_gen_test_add_expr";
ASSERT_OK(generator->engine_->LoadFunctionIRs());
ASSERT_OK(generator->CodeGenExprValue(func_dex, 4, desc_sum, 0, fn_name,
SelectionVector::MODE_NONE));
ASSERT_OK(generator->engine_->FinalizeModule());
- auto ir = generator->engine_->DumpIR();
+ auto const& ir = generator->engine_->ir();
EXPECT_THAT(ir, testing::HasSubstr("vector.body"));
- EvalFunc eval_func = (EvalFunc)generator->engine_->CompiledFunction(fn_name);
+ ASSERT_OK_AND_ASSIGN(auto fn_ptr,
generator->engine_->CompiledFunction(fn_name));
+ ASSERT_TRUE(fn_ptr);
+ auto eval_func = reinterpret_cast<EvalFunc>(fn_ptr);
constexpr size_t kNumRecords = 4;
std::array<uint32_t, kNumRecords> a0{1, 2, 3, 4};
std::array<uint32_t, kNumRecords> a1{5, 6, 7, 8};
@@ -126,6 +128,7 @@ TEST_F(TestLLVMGenerator, TestAdd) {
reinterpret_cast<uint8_t*>(out.data()),
reinterpret_cast<uint8_t*>(&out_bitmap),
};
std::array<int64_t, 6> addr_offsets{0, 0, 0, 0, 0, 0};
+
eval_func(addrs.data(), addr_offsets.data(), nullptr, nullptr, nullptr,
0 /* dummy context ptr */, kNumRecords);
diff --git a/cpp/src/gandiva/projector.cc b/cpp/src/gandiva/projector.cc
index e717e825df..ec0302146f 100644
--- a/cpp/src/gandiva/projector.cc
+++ b/cpp/src/gandiva/projector.cc
@@ -80,8 +80,8 @@ Status Projector::Make(SchemaPtr schema, const
ExpressionVector& exprs,
GandivaObjectCache obj_cache(cache, cache_key);
// Build LLVM generator, and generate code for the specified expressions
- std::unique_ptr<LLVMGenerator> llvm_gen;
- ARROW_RETURN_NOT_OK(LLVMGenerator::Make(configuration, is_cached,
&llvm_gen));
+ ARROW_ASSIGN_OR_RAISE(auto llvm_gen,
+ LLVMGenerator::Make(configuration, is_cached,
obj_cache));
// Run the validation on the expressions.
// Return if any of the expression is invalid since
@@ -95,7 +95,7 @@ Status Projector::Make(SchemaPtr schema, const
ExpressionVector& exprs,
}
// Set the object cache for LLVM
- llvm_gen->SetLLVMObjectCache(obj_cache);
+ ARROW_RETURN_NOT_OK(llvm_gen->SetLLVMObjectCache(obj_cache));
ARROW_RETURN_NOT_OK(llvm_gen->Build(exprs, selection_vector_mode));
@@ -281,7 +281,7 @@ Status Projector::ValidateArrayDataCapacity(const
arrow::ArrayData& array_data,
return Status::OK();
}
-std::string Projector::DumpIR() { return llvm_generator_->DumpIR(); }
+const std::string& Projector::DumpIR() { return llvm_generator_->ir(); }
void Projector::SetBuiltFromCache(bool flag) { built_from_cache_ = flag; }
diff --git a/cpp/src/gandiva/projector.h b/cpp/src/gandiva/projector.h
index 6801a7c9f3..f1ae7e4dc8 100644
--- a/cpp/src/gandiva/projector.h
+++ b/cpp/src/gandiva/projector.h
@@ -118,7 +118,7 @@ class GANDIVA_EXPORT Projector {
const SelectionVector* selection_vector,
const ArrayDataVector& output) const;
- std::string DumpIR();
+ const std::string& DumpIR();
void SetBuiltFromCache(bool flag);
diff --git a/cpp/src/gandiva/tests/micro_benchmarks.cc
b/cpp/src/gandiva/tests/micro_benchmarks.cc
index f126b769b2..450e691323 100644
--- a/cpp/src/gandiva/tests/micro_benchmarks.cc
+++ b/cpp/src/gandiva/tests/micro_benchmarks.cc
@@ -16,6 +16,7 @@
// under the License.
#include <stdlib.h>
+
#include "arrow/memory_pool.h"
#include "arrow/status.h"
#include "arrow/testing/gtest_util.h"
@@ -420,6 +421,35 @@ static void DoDecimalAdd2(benchmark::State& state, int32_t
precision, int32_t sc
ASSERT_OK(status);
}
+static void TimedTestExprCompilation(benchmark::State& state) {
+ int64_t iteration = 0;
+ for (auto _ : state) {
+ // schema for input fields
+ auto field0 = field("f0", int64());
+ auto field1 = field("f1", int64());
+ auto literal = TreeExprBuilder::MakeLiteral(iteration);
+ auto schema = arrow::schema({field0, field1});
+
+ // output field
+ auto field_add = field("c1", int64());
+ auto field_less_than = field("c2", boolean());
+
+ // Build expression
+ auto add_func = TreeExprBuilder::MakeFunction(
+ "add", {TreeExprBuilder::MakeField(field0), literal}, int64());
+ auto less_than_func = TreeExprBuilder::MakeFunction(
+ "less_than", {TreeExprBuilder::MakeField(field1), literal}, boolean());
+
+ auto expr_0 = TreeExprBuilder::MakeExpression(add_func, field_add);
+ auto expr_1 = TreeExprBuilder::MakeExpression(less_than_func,
field_less_than);
+
+ std::shared_ptr<Projector> projector;
+ ASSERT_OK(Projector::Make(schema, {expr_0, expr_1}, TestConfiguration(),
&projector));
+
+ ++iteration;
+ }
+}
+
static void DecimalAdd2Fast(benchmark::State& state) {
// use lesser precision to test the fast-path
DoDecimalAdd2(state, DecimalTypeUtil::kMaxPrecision - 6, 18);
@@ -460,6 +490,7 @@ static void DecimalAdd3Large(benchmark::State& state) {
DoDecimalAdd3(state, DecimalTypeUtil::kMaxPrecision, 18, true);
}
+BENCHMARK(TimedTestExprCompilation)->Unit(benchmark::kMicrosecond);
BENCHMARK(TimedTestAdd3)->Unit(benchmark::kMicrosecond);
BENCHMARK(TimedTestBigNested)->Unit(benchmark::kMicrosecond);
BENCHMARK(TimedTestExtractYear)->Unit(benchmark::kMicrosecond);
diff --git a/cpp/src/gandiva/tests/test_util.cc
b/cpp/src/gandiva/tests/test_util.cc
index 959ea3cd7a..2ee49ffae0 100644
--- a/cpp/src/gandiva/tests/test_util.cc
+++ b/cpp/src/gandiva/tests/test_util.cc
@@ -30,6 +30,10 @@ std::shared_ptr<Configuration> TestConfiguration() {
return ConfigurationBuilder::DefaultConfiguration();
}
+std::shared_ptr<Configuration> TestConfigWithIrDumping() {
+ return ConfigurationBuilder().build_with_ir_dumping(true);
+}
+
#ifndef GANDIVA_EXTENSION_TEST_DIR
#define GANDIVA_EXTENSION_TEST_DIR "."
#endif
diff --git a/cpp/src/gandiva/tests/test_util.h
b/cpp/src/gandiva/tests/test_util.h
index 69d63732ae..d8181fe675 100644
--- a/cpp/src/gandiva/tests/test_util.h
+++ b/cpp/src/gandiva/tests/test_util.h
@@ -98,6 +98,8 @@ static inline ArrayPtr MakeArrowTypeArray(const
std::shared_ptr<arrow::DataType>
std::shared_ptr<Configuration> TestConfiguration();
+std::shared_ptr<Configuration> TestConfigWithIrDumping();
+
// helper function to create a Configuration with an external function
registered to the
// given function registry
std::shared_ptr<Configuration> TestConfigWithFunctionRegistry(
diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx
index 35bbf5018f..2202ec64f2 100644
--- a/python/pyarrow/gandiva.pyx
+++ b/python/pyarrow/gandiva.pyx
@@ -36,6 +36,7 @@ from pyarrow.includes.libgandiva cimport (
CNode, CProjector, CFilter,
CSelectionVector,
_ensure_selection_mode,
+ CConfiguration,
CConfigurationBuilder,
TreeExprBuilder_MakeExpression,
TreeExprBuilder_MakeFunction,
@@ -583,9 +584,47 @@ cdef class TreeExprBuilder(_Weakrefable):
condition.node)
return Condition.create(r)
+cdef class Configuration(_Weakrefable):
+ cdef:
+ shared_ptr[CConfiguration] configuration
+
+ def __cinit__(self, bint optimize=True, bint dump_ir=False):
+ """
+ Initialize the configuration with specified options.
+
+ Parameters
+ ----------
+ optimize : bool, default True
+ Whether to enable optimizations.
+ dump_ir : bool, default False
+ Whether to dump LLVM IR.
+ """
+ self.configuration = CConfigurationBuilder().build()
+ self.configuration.get().set_optimize(optimize)
+ self.configuration.get().set_dump_ir(dump_ir)
+
+ @staticmethod
+ cdef create(shared_ptr[CConfiguration] configuration):
+ """
+ Create a Configuration instance from an existing CConfiguration
pointer.
+
+ Parameters
+ ----------
+ configuration : shared_ptr[CConfiguration]
+ Existing CConfiguration pointer.
+
+ Returns
+ -------
+ Configuration instance
+ """
+ cdef Configuration self = Configuration.__new__(Configuration)
+ self.configuration = configuration
+ return self
+
cpdef make_projector(Schema schema, children, MemoryPool pool,
- str selection_mode="NONE"):
+ str selection_mode="NONE",
+ Configuration configuration=None):
"""
Construct a projection using expressions.
@@ -602,6 +641,8 @@ cpdef make_projector(Schema schema, children, MemoryPool
pool,
Memory pool used to allocate output arrays.
selection_mode : str, default "NONE"
Possible values are NONE, UINT16, UINT32, UINT64.
+ configuration : pyarrow.gandiva.Configuration, default None
+ Configuration for the projector.
Returns
-------
@@ -612,6 +653,9 @@ cpdef make_projector(Schema schema, children, MemoryPool
pool,
c_vector[shared_ptr[CGandivaExpression]] c_children
shared_ptr[CProjector] result
+ if configuration is None:
+ configuration = Configuration()
+
for child in children:
if child is None:
raise TypeError("Expressions must not be None")
@@ -620,12 +664,13 @@ cpdef make_projector(Schema schema, children, MemoryPool
pool,
check_status(
Projector_Make(schema.sp_schema, c_children,
_ensure_selection_mode(selection_mode),
- CConfigurationBuilder.DefaultConfiguration(),
+ configuration.configuration,
&result))
return Projector.create(result, pool)
-cpdef make_filter(Schema schema, Condition condition):
+cpdef make_filter(Schema schema, Condition condition,
+ Configuration configuration=None):
"""
Construct a filter based on a condition.
@@ -638,6 +683,8 @@ cpdef make_filter(Schema schema, Condition condition):
Schema for the record batches, and the condition.
condition : pyarrow.gandiva.Condition
Filter condition.
+ configuration : pyarrow.gandiva.Configuration, default None
+ Configuration for the filter.
Returns
-------
@@ -646,8 +693,12 @@ cpdef make_filter(Schema schema, Condition condition):
cdef shared_ptr[CFilter] result
if condition is None:
raise TypeError("Condition must not be None")
+
+ if configuration is None:
+ configuration = Configuration()
+
check_status(
- Filter_Make(schema.sp_schema, condition.condition, &result))
+ Filter_Make(schema.sp_schema, condition.condition,
configuration.configuration, &result))
return Filter.create(result)
diff --git a/python/pyarrow/includes/libgandiva.pxd
b/python/pyarrow/includes/libgandiva.pxd
index fa3b72bad6..7d76576bef 100644
--- a/python/pyarrow/includes/libgandiva.pxd
+++ b/python/pyarrow/includes/libgandiva.pxd
@@ -252,6 +252,7 @@ cdef extern from "gandiva/filter.h" namespace "gandiva"
nogil:
cdef CStatus Filter_Make \
"gandiva::Filter::Make"(
shared_ptr[CSchema] schema, shared_ptr[CCondition] condition,
+ shared_ptr[CConfiguration] configuration,
shared_ptr[CFilter]* filter)
cdef extern from "gandiva/function_signature.h" namespace "gandiva" nogil:
@@ -278,9 +279,20 @@ cdef extern from "gandiva/expression_registry.h" namespace
"gandiva" nogil:
cdef extern from "gandiva/configuration.h" namespace "gandiva" nogil:
cdef cppclass CConfiguration" gandiva::Configuration":
- pass
+
+ CConfiguration()
+
+ CConfiguration(bint optimize, bint dump_ir)
+
+ void set_optimize(bint optimize)
+
+ void set_dump_ir(bint dump_ir)
cdef cppclass CConfigurationBuilder \
" gandiva::ConfigurationBuilder":
@staticmethod
shared_ptr[CConfiguration] DefaultConfiguration()
+
+ CConfigurationBuilder()
+
+ shared_ptr[CConfiguration] build()
diff --git a/python/pyarrow/tests/test_gandiva.py
b/python/pyarrow/tests/test_gandiva.py
index 241cac4d83..80d119a485 100644
--- a/python/pyarrow/tests/test_gandiva.py
+++ b/python/pyarrow/tests/test_gandiva.py
@@ -47,8 +47,9 @@ def test_tree_exp_builder():
assert expr.result().type == pa.int32()
+ config = gandiva.Configuration(dump_ir=True)
projector = gandiva.make_projector(
- schema, [expr], pa.default_memory_pool())
+ schema, [expr], pa.default_memory_pool(), "NONE", config)
# Gandiva generates compute kernel function named `@expr_X`
assert projector.llvm_ir.find("@expr_") != -1
@@ -104,7 +105,8 @@ def test_filter():
assert condition.result().type == pa.bool_()
- filter = gandiva.make_filter(table.schema, condition)
+ config = gandiva.Configuration(dump_ir=True)
+ filter = gandiva.make_filter(table.schema, condition, config)
# Gandiva generates compute kernel function named `@expr_X`
assert filter.llvm_ir.find("@expr_") != -1