This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new f4c3a1e5a IMPALA-11459: Use new LLVM Pass Manager
f4c3a1e5a is described below

commit f4c3a1e5a3783706be034fddbd7264ca86eb40d7
Author: Michael Smith <[email protected]>
AuthorDate: Mon Jun 5 16:25:30 2023 -0700

    IMPALA-11459: Use new LLVM Pass Manager
    
    LLVM developed a new pass manager -
    https://llvm.org/docs/NewPassManager.html - to overcome some of the
    limitations of LegacyPassManager. It offers improved optimization
    performance by reusing analysis across all types and levels of
    optimization passes. It also appears to be better maintained in future
    releases of LLVM.
    
    Switches to using the new PassManager via PassBuilder and a
    ModulePassManager. Breaks out PruneModule into a separate
    FunctionPruneTime timer to more easily track any regressions there.
    
    Change-Id: I947a5b067da50c18f62c3f9af9876463e542f58a
    Reviewed-on: http://gerrit.cloudera.org:8080/20014
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-by: Michael Smith <[email protected]>
---
 be/src/codegen/llvm-codegen.cc | 89 +++++++++++++++++-------------------------
 be/src/codegen/llvm-codegen.h  |  5 ++-
 be/src/service/fe-support.cc   |  3 +-
 be/src/service/impalad-main.cc |  2 +-
 cmake_modules/FindLlvm.cmake   |  2 +-
 5 files changed, 43 insertions(+), 58 deletions(-)

diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index 5a0d608ff..8bc449602 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -26,10 +26,6 @@
 #include <boost/assert/source_location.hpp>
 #include <gutil/strings/substitute.h>
 
-#include <llvm/ADT/Triple.h>
-#include <llvm/Analysis/InstructionSimplify.h>
-#include <llvm/Analysis/Passes.h>
-#include <llvm/Analysis/TargetTransformInfo.h>
 #include <llvm/Bitcode/BitcodeReader.h>
 #include <llvm/Bitcode/BitcodeWriter.h>
 #include <llvm/ExecutionEngine/ExecutionEngine.h>
@@ -41,18 +37,17 @@
 #include <llvm/IR/Function.h>
 #include <llvm/IR/GlobalVariable.h>
 #include <llvm/IR/InstIterator.h>
-#include <llvm/IR/LegacyPassManager.h>
-#include <llvm/IR/NoFolder.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Linker/Linker.h>
+#include <llvm/Passes/PassBuilder.h>
+#include <llvm/Support/CommandLine.h>
 #include <llvm/Support/DynamicLibrary.h>
 #include <llvm/Support/ErrorHandling.h>
 #include <llvm/Support/Host.h>
-#include <llvm/Support/TargetRegistry.h>
 #include <llvm/Support/TargetSelect.h>
 #include <llvm/Support/raw_ostream.h>
-#include <llvm/Transforms/IPO.h>
-#include <llvm/Transforms/IPO/PassManagerBuilder.h>
+#include <llvm/Transforms/IPO/GlobalDCE.h>
+#include <llvm/Transforms/IPO/Internalize.h>
 #include <llvm/Transforms/Scalar.h>
 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
 #include <llvm/Transforms/Utils/Cloning.h>
@@ -155,8 +150,11 @@ const map<int64_t, std::string> 
LlvmCodeGen::cpu_flag_mappings_{
   LOG(FATAL) << "LLVM hit fatal error: " << reason.c_str();
 }
 
-Status LlvmCodeGen::InitializeLlvm(bool load_backend) {
+Status LlvmCodeGen::InitializeLlvm(const char* procname, bool load_backend) {
   DCHECK(!llvm_initialized_);
+  // Treat all functions as having the inline hint
+  std::array<const char*, 2> argv = { { procname, "-inline-threshold=325" } };
+  CHECK(llvm::cl::ParseCommandLineOptions(argv.size(), argv.data()));
   llvm::remove_fatal_error_handler();
   llvm::install_fatal_error_handler(LlvmCodegenHandleError);
   // These functions can *only* be called once per process and are used to set 
up
@@ -234,6 +232,7 @@ LlvmCodeGen::LlvmCodeGen(FragmentState* state, ObjectPool* 
pool,
   module_bitcode_gen_timer_ = ADD_TIMER(profile_, "ModuleBitcodeGenTime");
   module_bitcode_size_ = ADD_COUNTER(profile_, "ModuleBitcodeSize", 
TUnit::BYTES);
   ir_generation_timer_ = ADD_TIMER(profile_, "IrGenerationTime");
+  function_prune_timer_ = ADD_TIMER(profile_, "FunctionPruneTime");
   optimization_timer_ = ADD_TIMER(profile_, "OptimizationTime");
   compile_timer_ = ADD_TIMER(profile_, "CompileTime");
   main_thread_timer_ = ADD_TIMER(profile_, "MainThreadCodegenTime");
@@ -1239,10 +1238,7 @@ Status LlvmCodeGen::StoreCache(CodeGenCacheKey& 
cache_key) {
 }
 
 void LlvmCodeGen::PruneModule() {
-  SCOPED_TIMER(optimization_timer_);
-  llvm::TargetIRAnalysis target_analysis =
-      execution_engine_->getTargetMachine()->getTargetIRAnalysis();
-
+  SCOPED_TIMER(function_prune_timer_);
   // Before running any other optimization passes, run the internalize pass, 
giving it
   // the names of all functions registered by AddFunctionToJit(), followed by 
the
   // global dead code elimination pass. This causes all functions not 
registered to be
@@ -1252,15 +1248,18 @@ void LlvmCodeGen::PruneModule() {
   for (auto& entry : fns_to_jit_compile_) {
     exported_fn_names.insert(entry.first->getName().str());
   }
-  unique_ptr<llvm::legacy::PassManager> module_pass_manager(
-      new llvm::legacy::PassManager());
-  
module_pass_manager->add(llvm::createTargetTransformInfoWrapperPass(target_analysis));
-  module_pass_manager->add(
-      llvm::createInternalizePass([&exported_fn_names](const 
llvm::GlobalValue& gv) {
+
+  llvm::ModuleAnalysisManager module_analysis_manager;
+  llvm::PassBuilder pass_builder(execution_engine_->getTargetMachine());
+  pass_builder.registerModuleAnalyses(module_analysis_manager);
+
+  llvm::ModulePassManager module_pass_manager;
+  module_pass_manager.addPass(
+      llvm::InternalizePass([&exported_fn_names](const llvm::GlobalValue& gv) {
         return exported_fn_names.find(gv.getName().str()) != 
exported_fn_names.end();
       }));
-  module_pass_manager->add(llvm::createGlobalDCEPass());
-  module_pass_manager->run(*module_);
+  module_pass_manager.addPass(llvm::GlobalDCEPass());
+  module_pass_manager.run(*module_, module_analysis_manager);
 }
 
 Status LlvmCodeGen::FinalizeModule() {
@@ -1404,28 +1403,25 @@ Status 
LlvmCodeGen::FinalizeModuleAsync(RuntimeProfile::EventSequence* event_seq
 Status LlvmCodeGen::OptimizeModule() {
   SCOPED_TIMER(optimization_timer_);
 
+  llvm::LoopAnalysisManager LAM;
+  llvm::FunctionAnalysisManager FAM;
+  llvm::CGSCCAnalysisManager CGAM;
+  llvm::ModuleAnalysisManager MAM;
+
   // This pass manager will construct optimizations passes that are "typical" 
for
   // c/c++ programs.  We're relying on llvm to pick the best passes for us.
   // TODO: we can likely muck with this to get better compile speeds or write
   // our own passes.  Our subexpression elimination optimization can be rolled 
into
   // a pass.
-  llvm::PassManagerBuilder pass_builder;
-  // 2 maps to -O2
-  // TODO: should we switch to 3? (3 may not produce different IR than 2 while 
taking
-  // longer, but we should check)
-  pass_builder.OptLevel = 2;
-  // Don't optimize for code size (this corresponds to -O2/-O3)
-  pass_builder.SizeLevel = 0;
-  // Use a threshold equivalent to adding InlineHint on all functions.
-  // This results in slightly better performance than the default threshold 
(225).
-  pass_builder.Inliner = llvm::createFunctionInliningPass(325);
-
-  // The TargetIRAnalysis pass is required to provide information about the 
target
-  // machine to optimisation passes, e.g. the cost model.
-  llvm::TargetIRAnalysis target_analysis =
-      execution_engine_->getTargetMachine()->getTargetIRAnalysis();
-  unique_ptr<llvm::legacy::PassManager> module_pass_manager(
-      new llvm::legacy::PassManager());
+  llvm::PassBuilder pass_builder(execution_engine_->getTargetMachine());
+  pass_builder.registerModuleAnalyses(MAM);
+  pass_builder.registerCGSCCAnalyses(CGAM);
+  pass_builder.registerFunctionAnalyses(FAM);
+  pass_builder.registerLoopAnalyses(LAM);
+  pass_builder.crossRegisterProxies(LAM, FAM, CGAM, MAM);
+
+  llvm::ModulePassManager pass_manager = 
pass_builder.buildPerModuleDefaultPipeline(
+      llvm::PassBuilder::OptimizationLevel::O2);
 
   // Update counters before final optimization, but after removing unused 
functions. This
   // gives us a rough measure of how much work the optimization and 
compilation must do.
@@ -1442,23 +1438,8 @@ Status LlvmCodeGen::OptimizeModule() {
     return mem_tracker_->MemLimitExceeded(NULL, msg, estimated_memory);
   }
 
-  // Create and run function pass manager
-  unique_ptr<llvm::legacy::FunctionPassManager> fn_pass_manager(
-      new llvm::legacy::FunctionPassManager(module_));
-  
fn_pass_manager->add(llvm::createTargetTransformInfoWrapperPass(target_analysis));
-  pass_builder.populateFunctionPassManager(*fn_pass_manager);
-  fn_pass_manager->doInitialization();
-  for (llvm::Module::iterator it = module_->begin(), end = module_->end(); it 
!= end;
-       ++it) {
-    if (!it->isDeclaration()) fn_pass_manager->run(*it);
-  }
-  fn_pass_manager->doFinalization();
-
   // Create and run module pass manager
-  module_pass_manager.reset(new llvm::legacy::PassManager());
-  
module_pass_manager->add(llvm::createTargetTransformInfoWrapperPass(target_analysis));
-  pass_builder.populateModulePassManager(*module_pass_manager);
-  module_pass_manager->run(*module_);
+  pass_manager.run(*module_, MAM);
   if (FLAGS_print_llvm_ir_instruction_count) {
     for (int i = 0; i < fns_to_jit_compile_.size(); ++i) {
       InstructionCounter counter;
diff --git a/be/src/codegen/llvm-codegen.h b/be/src/codegen/llvm-codegen.h
index af93e3f1d..9175ddc05 100644
--- a/be/src/codegen/llvm-codegen.h
+++ b/be/src/codegen/llvm-codegen.h
@@ -167,7 +167,7 @@ class LlvmCodeGen {
   /// the current object and not be able to find the backend symbols
   /// TODO: this can probably be removed after impalad refactor where the java
   /// side is not loading the be explicitly anymore.
-  static Status InitializeLlvm(bool load_backend = false);
+  static Status InitializeLlvm(const char* procname = "main", bool 
load_backend = false);
 
   /// Creates a codegen instance for Impala initialized with the 
cross-compiled Impala IR.
   /// 'codegen' will contain the created object on success.
@@ -843,6 +843,9 @@ class LlvmCodeGen {
   /// FragmentInstanceState during its 'CODEGEN_START' state.
   RuntimeProfile::Counter* ir_generation_timer_;
 
+  /// Time spent pruning unused functions.
+  RuntimeProfile::Counter* function_prune_timer_;
+
   /// Time spent optimizing the module.
   RuntimeProfile::Counter* optimization_timer_;
 
diff --git a/be/src/service/fe-support.cc b/be/src/service/fe-support.cc
index c0eb3f083..4cb359455 100644
--- a/be/src/service/fe-support.cc
+++ b/be/src/service/fe-support.cc
@@ -78,7 +78,8 @@ Java_org_apache_impala_service_FeSupport_NativeFeInit(
   // exceptions to the FE.
   InitCommonRuntime(1, &name, true,
       external_fe ? TestInfo::NON_TEST : TestInfo::FE_TEST, external_fe);
-  THROW_IF_ERROR(LlvmCodeGen::InitializeLlvm(true), env, 
JniUtil::internal_exc_class());
+  THROW_IF_ERROR(
+      LlvmCodeGen::InitializeLlvm(name, true), env, 
JniUtil::internal_exc_class());
   ExecEnv* exec_env = new ExecEnv(external_fe); // This also caches it from 
the process.
   THROW_IF_ERROR(exec_env->InitForFeSupport(), env, 
JniUtil::internal_exc_class());
 }
diff --git a/be/src/service/impalad-main.cc b/be/src/service/impalad-main.cc
index df51e8570..0390714de 100644
--- a/be/src/service/impalad-main.cc
+++ b/be/src/service/impalad-main.cc
@@ -59,7 +59,7 @@ DECLARE_bool(is_coordinator);
 int ImpaladMain(int argc, char** argv) {
   InitCommonRuntime(argc, argv, true);
 
-  ABORT_IF_ERROR(LlvmCodeGen::InitializeLlvm());
+  ABORT_IF_ERROR(LlvmCodeGen::InitializeLlvm(argv[0]));
   JniUtil::InitLibhdfs();
   ABORT_IF_ERROR(HBaseTableScanner::Init());
   ABORT_IF_ERROR(HBaseTable::InitJNI());
diff --git a/cmake_modules/FindLlvm.cmake b/cmake_modules/FindLlvm.cmake
index 6db74442d..e1c833b0e 100644
--- a/cmake_modules/FindLlvm.cmake
+++ b/cmake_modules/FindLlvm.cmake
@@ -85,7 +85,7 @@ execute_process(
 # Get the link libs we need.  llvm has many and we don't want to link all of 
the libs
 # if we don't need them.
 execute_process(
-  COMMAND ${LLVM_CONFIG_EXECUTABLE} --libnames core mcjit native ipo bitreader 
target linker analysis debuginfodwarf
+  COMMAND ${LLVM_CONFIG_EXECUTABLE} --libnames core mcjit native ipo bitreader 
target linker analysis debuginfodwarf passes
   OUTPUT_VARIABLE LLVM_MODULE_LIBS
   OUTPUT_STRIP_TRAILING_WHITESPACE
 )

Reply via email to