niyue commented on code in PR #39098:
URL: https://github.com/apache/arrow/pull/39098#discussion_r1428845724
##########
cpp/src/gandiva/engine.cc:
##########
@@ -103,9 +112,135 @@ extern const size_t kPrecompiledBitcodeSize;
std::once_flag llvm_init_once_flag;
static bool llvm_init = false;
static llvm::StringRef cpu_name;
-static llvm::SmallVector<std::string, 10> cpu_attrs;
+static std::vector<std::string> cpu_attrs;
std::once_flag register_exported_funcs_flag;
+template <typename T>
+arrow::Result<T> AsArrowResult(llvm::Expected<T>& expected,
+ const std::string& error_context = "") {
+ if (!expected) {
+ return Status::CodeGenError(error_context,
llvm::toString(expected.takeError()));
+ }
+ return std::move(expected.get());
+}
+
+Result<llvm::orc::JITTargetMachineBuilder> GetTargetMachineBuilder(
+ const Configuration& conf) {
+ llvm::orc::JITTargetMachineBuilder jtmb(
+ (llvm::Triple(llvm::sys::getDefaultTargetTriple())));
+ if (conf.target_host_cpu()) {
+ jtmb.setCPU(cpu_name.str());
+ jtmb.addFeatures(cpu_attrs);
+ }
+ auto const opt_level =
+ conf.optimize() ? llvm::CodeGenOpt::Aggressive : llvm::CodeGenOpt::None;
+ jtmb.setCodeGenOptLevel(opt_level);
+ return jtmb;
+}
+
+std::string GetModuleIR(const llvm::Module& module) {
+ std::string ir;
+ llvm::raw_string_ostream stream(ir);
+ module.print(stream, nullptr);
+ return ir;
+}
+
+void AddAbsoluteSymbol(llvm::orc::LLJIT& lljit, const std::string& name,
+ void* function_ptr) {
+ llvm::orc::MangleAndInterner mangle(lljit.getExecutionSession(),
lljit.getDataLayout());
+
+ //
https://github.com/llvm/llvm-project/commit/8b1771bd9f304be39d4dcbdcccedb6d3bcd18200#diff-77984a824d9182e5c67a481740f3bc5da78d5bd4cf6e1716a083ddb30a4a4931
+ // LLVM 17 introduced ExecutorSymbolDef and move most of ORC APIs to
ExecutorAddr
+#if LLVM_VERSION_MAJOR >= 17
+ llvm::orc::ExecutorSymbolDef symbol(
+ llvm::orc::ExecutorAddr(reinterpret_cast<uint64_t>(function_ptr)),
+ llvm::JITSymbolFlags::Exported);
+#else
+ llvm::JITEvaluatedSymbol
symbol(reinterpret_cast<llvm::JITTargetAddress>(function_ptr),
+ llvm::JITSymbolFlags::Exported);
+#endif
+
+ auto error = lljit.getMainJITDylib().define(
+ llvm::orc::absoluteSymbols({{mangle(name), symbol}}));
+ llvm::cantFail(std::move(error));
+}
+
+// add current process symbol to dylib
+// LLVM >= 18 does this automatically
+void AddProcessSymbol(llvm::orc::LLJIT& lljit) {
+ lljit.getMainJITDylib().addGenerator(
+
llvm::cantFail(llvm::orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
+ lljit.getDataLayout().getGlobalPrefix())));
+ // the `atexit` symbol cannot be found for ASAN
+#if defined(__SANITIZE_ADDRESS__) || \
+ (defined(__has_feature) && (__has_feature(address_sanitizer)))
+ if (!lljit.lookup("atexit")) {
+ AddAbsoluteSymbol(lljit, "atexit", reinterpret_cast<void*>(atexit));
+ }
+#endif
+}
+
+#if JIT_LINK_SUPPORTED
+Result<std::unique_ptr<llvm::jitlink::InProcessMemoryManager>>
CreateMemmoryManager() {
+ auto maybe_mem_manager = llvm::jitlink::InProcessMemoryManager::Create();
+ ARROW_ASSIGN_OR_RAISE(
+ auto memory_manager,
+ AsArrowResult(maybe_mem_manager, "Could not create memory manager: "));
+ return memory_manager;
+}
+
+Status UseJitLinkIfEnabled(llvm::orc::LLJITBuilder& jit_builder) {
+ static auto maybe_use_jit_link =
::arrow::internal::GetEnvVar("GANDIVA_USE_JIT_LINK");
+ if (maybe_use_jit_link.ok()) {
+ ARROW_ASSIGN_OR_RAISE(static auto memory_manager, CreateMemmoryManager());
+ jit_builder.setObjectLinkingLayerCreator(
+ [&](llvm::orc::ExecutionSession& ES, const llvm::Triple& TT) {
+ return std::make_unique<llvm::orc::ObjectLinkingLayer>(ES,
*memory_manager);
+ });
+ }
+ return Status::OK();
+}
+#endif
+
+Result<std::unique_ptr<llvm::orc::LLJIT>> BuildJIT(
+ llvm::orc::JITTargetMachineBuilder jtmb,
+ std::optional<std::reference_wrapper<GandivaObjectCache>>& object_cache) {
+ auto jit_builder = llvm::orc::LLJITBuilder();
+
+#if JIT_LINK_SUPPORTED
+ ARROW_RETURN_NOT_OK(UseJitLinkIfEnabled(jit_builder));
+#endif
+
+ jit_builder.setJITTargetMachineBuilder(std::move(jtmb));
+ if (object_cache.has_value()) {
+ jit_builder.setCompileFunctionCreator(
+ [&object_cache](llvm::orc::JITTargetMachineBuilder JTMB)
+ ->
llvm::Expected<std::unique_ptr<llvm::orc::IRCompileLayer::IRCompiler>> {
+ auto target_machine = JTMB.createTargetMachine();
+ if (!target_machine) {
+ return target_machine.takeError();
+ }
+ // after compilation, the object code will be stored into the given
object cache
+ return std::make_unique<llvm::orc::TMOwningSimpleCompiler>(
+ std::move(*target_machine), &object_cache.value().get());
+ });
+ }
+ auto maybe_jit = jit_builder.create();
+ ARROW_ASSIGN_OR_RAISE(auto jit,
+ AsArrowResult(maybe_jit, "Could not create LLJIT
instance: "));
+
+ AddProcessSymbol(*jit);
+ return jit;
+}
+
+void Engine::SetLLVMObjectCache(GandivaObjectCache& object_cache) {
+ auto cached_buffer = object_cache.getObject(nullptr);
+ if (cached_buffer) {
+ auto error = lljit_->addObjectFile(std::move(cached_buffer));
+ DCHECK(!error) << "Failed to load object cache: " <<
llvm::toString(std::move(error));
Review Comment:
Updated
##########
cpp/src/gandiva/engine.cc:
##########
@@ -127,28 +262,34 @@ void Engine::InitOnce() {
}
}
ARROW_LOG(INFO) << "Detected CPU Name : " << cpu_name.str();
- ARROW_LOG(INFO) << "Detected CPU Features:" << cpu_attrs_str;
+ ARROW_LOG(INFO) << "Detected CPU Features: [" << cpu_attrs_str << "]";
llvm_init = true;
}
Engine::Engine(const std::shared_ptr<Configuration>& conf,
- std::unique_ptr<llvm::LLVMContext> ctx,
- std::unique_ptr<llvm::ExecutionEngine> engine, llvm::Module*
module,
- bool cached)
- : context_(std::move(ctx)),
- execution_engine_(std::move(engine)),
+ std::unique_ptr<llvm::orc::LLJIT> lljit,
+ std::unique_ptr<llvm::TargetMachine> target_machine, bool
cached)
+ : context_(std::make_unique<llvm::LLVMContext>()),
+ lljit_(std::move(lljit)),
ir_builder_(std::make_unique<llvm::IRBuilder<>>(*context_)),
- module_(module),
types_(*context_),
optimize_(conf->optimize()),
cached_(cached),
- function_registry_(conf->function_registry()) {}
+ function_registry_(conf->function_registry()),
+ target_machine_(std::move(target_machine)),
+ conf_(conf) {
+ // LLVM 10 doesn't like the expr function name to be the same as the module
name
+ auto module_id = "gdv_module_" +
std::to_string(reinterpret_cast<int64_t>(this));
Review Comment:
Fixed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]