This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new e40efd8648 GH-48159 [C++][Gandiva] Projector make is significantly
slower after move to OrcJIT (#49063)
e40efd8648 is described below
commit e40efd86489c673d5b0203ec943c704d92930f2f
Author: Logan Riggs <[email protected]>
AuthorDate: Fri Jan 30 00:34:22 2026 -0800
GH-48159 [C++][Gandiva] Projector make is significantly slower after move
to OrcJIT (#49063)
### Rationale for this change
Reduces LLVM TargetMachine object creation from 3 to 1. This object is
expensive to create and the extra copies weren't needed.
### What changes are included in this PR?
Refactor the Engine class to only create one target machine and pass that
to the necessary functions.
Before the change (3 TargetMachines created):
First TargetMachine: In Engine::Make(), MakeTargetMachineBuilder() is
called, then BuildJIT() is called. Inside LLJITBuilder::create(), when
prepareForConstruction() runs, if no DataLayout was set, it calls
JTMB->getDefaultDataLayoutForTarget() which creates a temporary TargetMachine
just to get the DataLayout.
Second TargetMachine: Inside BuildJIT(), when setCompileFunctionCreator is
used with the lambda, that lambda calls JTMB.createTargetMachine() to create a
TargetMachine for the TMOwningSimpleCompiler.
Third TargetMachine: Back in Engine::Make(), after BuildJIT() returns,
there's an explicit call to jtmb.createTargetMachine() to create
target_machine_ for the Engine.
After the change (1 TargetMachine created):
The key changes are:
Create TargetMachine first: The code now creates the TargetMachine
explicitly at the start of the Engine in Engine::Make. That machine is passed
to BuildJIT. In BuildJiIT that machine's DataLayout is sent to LLJITBuilder
which prevents prepareForConstruction() from calling
getDefaultDataLayoutForTarget() (which would create a temporary TargetMachine).
Use SimpleCompiler instead of TMOwningSimpleCompiler:
SimpleCompiler takes a reference to an existing TargetMachine rather than
owning one, so no new TargetMachine is created.
A shared_ptr is used to ensure that TargetMachine stays around for the
lifetime of the LLJIT instance.
### Are these changes tested?
Yes, unit and integration.
### Are there any user-facing changes?
No.
* GitHub Issue: #48159
Lead-authored-by: [email protected] <[email protected]>
Co-authored-by: Logan Riggs <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
cpp/src/gandiva/engine.cc | 32 +++++++++++++++++++++-----------
cpp/src/gandiva/engine.h | 6 ++++--
2 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc
index a718a80060..496722b1ea 100644
--- a/cpp/src/gandiva/engine.cc
+++ b/cpp/src/gandiva/engine.cc
@@ -219,7 +219,10 @@ Status UseJITLinkIfEnabled(llvm::orc::LLJITBuilder&
jit_builder) {
Result<std::unique_ptr<llvm::orc::LLJIT>> BuildJIT(
llvm::orc::JITTargetMachineBuilder jtmb,
- std::optional<std::reference_wrapper<GandivaObjectCache>>& object_cache) {
+ std::shared_ptr<llvm::TargetMachine> target_machine,
+ std::optional<std::reference_wrapper<GandivaObjectCache>> object_cache) {
+ auto data_layout = target_machine->createDataLayout();
+
llvm::orc::LLJITBuilder jit_builder;
#ifdef JIT_LINK_SUPPORTED
@@ -227,20 +230,20 @@ Result<std::unique_ptr<llvm::orc::LLJIT>> BuildJIT(
#endif
jit_builder.setJITTargetMachineBuilder(std::move(jtmb));
+ jit_builder.setDataLayout(std::make_optional(data_layout));
+
if (object_cache.has_value()) {
jit_builder.setCompileFunctionCreator(
- [&object_cache](llvm::orc::JITTargetMachineBuilder JTMB)
+ [tm = std::move(target_machine),
+ &object_cache](llvm::orc::JITTargetMachineBuilder JTMB)
->
llvm::Expected<std::unique_ptr<llvm::orc::IRCompileLayer::IRCompiler>> {
- auto target_machine = JTMB.createTargetMachine();
- if (!target_machine) {
- return target_machine.takeError();
- }
// after compilation, the object code will be stored into the given
object
// cache
- return std::make_unique<llvm::orc::TMOwningSimpleCompiler>(
- std::move(*target_machine), &object_cache.value().get());
+ return std::make_unique<llvm::orc::SimpleCompiler>(*tm,
+
&object_cache.value().get());
});
}
+
auto maybe_jit = jit_builder.create();
ARROW_ASSIGN_OR_RAISE(auto jit,
AsArrowResult(maybe_jit, "Could not create LLJIT
instance: "));
@@ -317,7 +320,7 @@ void Engine::InitOnce() {
Engine::Engine(const std::shared_ptr<Configuration>& conf,
std::unique_ptr<llvm::orc::LLJIT> lljit,
- std::unique_ptr<llvm::TargetMachine> target_machine, bool
cached)
+ std::shared_ptr<llvm::TargetMachine> target_machine, bool
cached)
: context_(std::make_unique<llvm::LLVMContext>()),
lljit_(std::move(lljit)),
ir_builder_(std::make_unique<llvm::IRBuilder<>>(*context_)),
@@ -367,14 +370,21 @@ Result<std::unique_ptr<Engine>> Engine::Make(
std::optional<std::reference_wrapper<GandivaObjectCache>> object_cache) {
std::call_once(llvm_init_once_flag, InitOnce);
+ // Create the target machine
ARROW_ASSIGN_OR_RAISE(auto jtmb, MakeTargetMachineBuilder(*conf));
- ARROW_ASSIGN_OR_RAISE(auto jit, BuildJIT(jtmb, object_cache));
auto maybe_tm = jtmb.createTargetMachine();
ARROW_ASSIGN_OR_RAISE(auto target_machine,
AsArrowResult(maybe_tm, "Could not create target
machine: "));
+ auto shared_target_machine =
+ std::shared_ptr<llvm::TargetMachine>(std::move(target_machine));
+
+ // Build the LLJIT instance
+ ARROW_ASSIGN_OR_RAISE(auto jit,
+ BuildJIT(std::move(jtmb), shared_target_machine,
object_cache));
+
std::unique_ptr<Engine> engine{
- new Engine(conf, std::move(jit), std::move(target_machine), cached)};
+ new Engine(conf, std::move(jit), std::move(shared_target_machine),
cached)};
ARROW_RETURN_NOT_OK(engine->Init());
return engine;
diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h
index 3c8914a7b4..20165787cb 100644
--- a/cpp/src/gandiva/engine.h
+++ b/cpp/src/gandiva/engine.h
@@ -96,7 +96,7 @@ class GANDIVA_EXPORT Engine {
private:
Engine(const std::shared_ptr<Configuration>& conf,
std::unique_ptr<llvm::orc::LLJIT> lljit,
- std::unique_ptr<llvm::TargetMachine> target_machine, bool cached);
+ std::shared_ptr<llvm::TargetMachine> target_machine, bool cached);
// Post construction init. This _must_ be called after the constructor.
Status Init();
@@ -130,7 +130,9 @@ class GANDIVA_EXPORT Engine {
bool functions_loaded_ = false;
std::shared_ptr<FunctionRegistry> function_registry_;
std::string module_ir_;
- std::unique_ptr<llvm::TargetMachine> target_machine_;
+ // The lifetime of the TargetMachine is shared with LLJIT. This prevents
unnecessary
+ // duplication of this expensive object.
+ std::shared_ptr<llvm::TargetMachine> target_machine_;
const std::shared_ptr<Configuration> conf_;
};