llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-x86 Author: Aiden Grossman (boomanaiden154) <details> <summary>Changes</summary> To enable the eventual migration of everything to the NewPM. There are two tests using this pass that rely on it running on functions marked optnone. Leave these as TODOs for now, probably coming back to them in the future when we have a way to override the optnone pass instrumentation through a CLI flag or something. --- Full diff: https://github.com/llvm/llvm-project/pull/165084.diff 13 Files Affected: - (modified) llvm/lib/Target/X86/X86.h (+13-1) - (modified) llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp (+1) - (modified) llvm/lib/Target/X86/X86LowerAMXType.cpp (+57-39) - (modified) llvm/lib/Target/X86/X86PassRegistry.def (+6-1) - (modified) llvm/lib/Target/X86/X86TargetMachine.cpp (+1-1) - (modified) llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll (+2-1) - (modified) llvm/test/CodeGen/X86/AMX/amx-combine.ll (+2-1) - (modified) llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll (+2-1) - (modified) llvm/test/CodeGen/X86/AMX/amx-type.ll (+2-1) - (modified) llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll (+2-1) - (modified) llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll (+2-1) - (modified) llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll (+3-1) - (modified) llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll (+2-1) ``````````diff diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 706ab2b62bc1b..bac26baa6322e 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -14,7 +14,10 @@ #ifndef LLVM_LIB_TARGET_X86_X86_H #define LLVM_LIB_TARGET_X86_X86_H +#include "llvm/IR/Analysis.h" +#include "llvm/IR/PassManager.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Target/TargetMachine.h" namespace llvm { @@ -162,7 +165,16 @@ FunctionPass *createX86WinEHUnwindV2Pass(); /// The pass transforms load/store <256 x i32> to AMX load/store intrinsics /// or split the data to two <128 x i32>. -FunctionPass *createX86LowerAMXTypePass(); +class X86LowerAMXTypePass : public PassInfoMixin<X86LowerAMXTypePass> { +private: + const TargetMachine &TM; + +public: + X86LowerAMXTypePass(const TargetMachine &TM) : TM(TM) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); +}; + +FunctionPass *createX86LowerAMXTypeLegacyPass(); /// The pass transforms amx intrinsics to scalar operation if the function has /// optnone attribute or it is O0. diff --git a/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp b/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp index d979517e12af6..2c0443da673a8 100644 --- a/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp +++ b/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp @@ -10,6 +10,7 @@ /// TODO: Port CodeGen passes to new pass manager. //===----------------------------------------------------------------------===// +#include "X86.h" #include "X86ISelDAGToDAG.h" #include "X86TargetMachine.h" diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp index 0ba71ada8638e..74c9a46ab5c00 100644 --- a/llvm/lib/Target/X86/X86LowerAMXType.cpp +++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp @@ -46,12 +46,14 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Analysis.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsX86.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -64,7 +66,7 @@ using namespace llvm; using namespace PatternMatch; -#define DEBUG_TYPE "lower-amx-type" +#define DEBUG_TYPE "x86-lower-amx-type" static bool isAMXCast(Instruction *II) { return match(II, @@ -137,7 +139,7 @@ static Instruction *getFirstNonAllocaInTheEntryBlock(Function &F) { class ShapeCalculator { private: - TargetMachine *TM = nullptr; + const TargetMachine *TM = nullptr; // In AMX intrinsics we let Shape = {Row, Col}, but the // RealCol = Col / ElementSize. We may use the RealCol @@ -145,7 +147,7 @@ class ShapeCalculator { std::map<Value *, Value *> Col2Row, Row2Col; public: - ShapeCalculator(TargetMachine *TargetM) : TM(TargetM) {} + ShapeCalculator(const TargetMachine *TargetM) : TM(TargetM) {} std::pair<Value *, Value *> getShape(IntrinsicInst *II, unsigned OpNo); std::pair<Value *, Value *> getShape(PHINode *Phi); Value *getRowFromCol(Instruction *II, Value *V, unsigned Granularity); @@ -1432,8 +1434,58 @@ bool X86LowerAMXCast::transformAllAMXCast() { return Change; } +bool lowerAmxType(Function &F, const TargetMachine *TM, + TargetLibraryInfo *TLI) { + // Performance optimization: most code doesn't use AMX, so return early if + // there are no instructions that produce AMX values. This is sufficient, as + // AMX arguments and constants are not allowed -- so any producer of an AMX + // value must be an instruction. + // TODO: find a cheaper way for this, without looking at all instructions. + if (!containsAMXCode(F)) + return false; + + bool C = false; + ShapeCalculator SC(TM); + X86LowerAMXCast LAC(F, &SC); + C |= LAC.combineAMXcast(TLI); + // There might be remaining AMXcast after combineAMXcast and they should be + // handled elegantly. + C |= LAC.transformAllAMXCast(); + + X86LowerAMXType LAT(F, &SC); + C |= LAT.visit(); + + // Prepare for fast register allocation at O0. + // Todo: May better check the volatile model of AMX code, not just + // by checking Attribute::OptimizeNone and CodeGenOptLevel::None. + if (TM->getOptLevel() == CodeGenOptLevel::None) { + // If Front End not use O0 but the Mid/Back end use O0, (e.g. + // "Clang -O2 -S -emit-llvm t.c" + "llc t.ll") we should make + // sure the amx data is volatile, that is nessary for AMX fast + // register allocation. + if (!F.hasFnAttribute(Attribute::OptimizeNone)) { + X86VolatileTileData VTD(F); + C = VTD.volatileTileData() || C; + } + } + + return C; +} + } // anonymous namespace +PreservedAnalyses X86LowerAMXTypePass::run(Function &F, + FunctionAnalysisManager &FAM) { + TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F); + bool Changed = lowerAmxType(F, &TM, &TLI); + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA = PreservedAnalyses::none(); + PA.preserveSet<CFGAnalyses>(); + return PA; +} + namespace { class X86LowerAMXTypeLegacyPass : public FunctionPass { @@ -1443,44 +1495,10 @@ class X86LowerAMXTypeLegacyPass : public FunctionPass { X86LowerAMXTypeLegacyPass() : FunctionPass(ID) {} bool runOnFunction(Function &F) override { - // Performance optimization: most code doesn't use AMX, so return early if - // there are no instructions that produce AMX values. This is sufficient, as - // AMX arguments and constants are not allowed -- so any producer of an AMX - // value must be an instruction. - // TODO: find a cheaper way for this, without looking at all instructions. - if (!containsAMXCode(F)) - return false; - - bool C = false; TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); - - ShapeCalculator SC(TM); - X86LowerAMXCast LAC(F, &SC); - C |= LAC.combineAMXcast(TLI); - // There might be remaining AMXcast after combineAMXcast and they should be - // handled elegantly. - C |= LAC.transformAllAMXCast(); - - X86LowerAMXType LAT(F, &SC); - C |= LAT.visit(); - - // Prepare for fast register allocation at O0. - // Todo: May better check the volatile model of AMX code, not just - // by checking Attribute::OptimizeNone and CodeGenOptLevel::None. - if (TM->getOptLevel() == CodeGenOptLevel::None) { - // If Front End not use O0 but the Mid/Back end use O0, (e.g. - // "Clang -O2 -S -emit-llvm t.c" + "llc t.ll") we should make - // sure the amx data is volatile, that is nessary for AMX fast - // register allocation. - if (!F.hasFnAttribute(Attribute::OptimizeNone)) { - X86VolatileTileData VTD(F); - C = VTD.volatileTileData() || C; - } - } - - return C; + return lowerAmxType(F, TM, TLI); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -1501,6 +1519,6 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(X86LowerAMXTypeLegacyPass, DEBUG_TYPE, PassName, false, false) -FunctionPass *llvm::createX86LowerAMXTypePass() { +FunctionPass *llvm::createX86LowerAMXTypeLegacyPass() { return new X86LowerAMXTypeLegacyPass(); } diff --git a/llvm/lib/Target/X86/X86PassRegistry.def b/llvm/lib/Target/X86/X86PassRegistry.def index 3f2a4331c41f2..1b83374bf9718 100644 --- a/llvm/lib/Target/X86/X86PassRegistry.def +++ b/llvm/lib/Target/X86/X86PassRegistry.def @@ -12,11 +12,16 @@ // NOTE: NO INCLUDE GUARD DESIRED! +#ifndef FUNCTION_PASS +#define FUNCTION_PASS(NAME, CREATE_PASS) +#endif +FUNCTION_PASS("x86-lower-amx-type", X86LowerAMXTypePass(*this)) +#undef FUNCTION_PASS + #ifndef DUMMY_FUNCTION_PASS #define DUMMY_FUNCTION_PASS(NAME, CREATE_PASS) #endif DUMMY_FUNCTION_PASS("lower-amx-intrinsics", X86LowerAMXIntrinsics(*this)) -DUMMY_FUNCTION_PASS("lower-amx-type", X86LowerAMXTypePass(*this)) DUMMY_FUNCTION_PASS("x86-partial-reduction", X86PartialReduction()) DUMMY_FUNCTION_PASS("x86-winehstate", WinEHStatePass()) #undef DUMMY_FUNCTION_PASS diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 8dd6f3d97ccea..9a76abcd351bf 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -423,7 +423,7 @@ void X86PassConfig::addIRPasses() { // We add both pass anyway and when these two passes run, we skip the pass // based on the option level and option attribute. addPass(createX86LowerAMXIntrinsicsPass()); - addPass(createX86LowerAMXTypePass()); + addPass(createX86LowerAMXTypeLegacyPass()); TargetPassConfig::addIRPasses(); diff --git a/llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll b/llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll index faa119cd037f1..5f0682abbea12 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s +; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s +; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s define void @undef_2phi(ptr%buf) { ; CHECK-LABEL: @undef_2phi( diff --git a/llvm/test/CodeGen/X86/AMX/amx-combine.ll b/llvm/test/CodeGen/X86/AMX/amx-combine.ll index 07f489c633c55..72e072dd15761 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-combine.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-combine.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s +; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s +; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s define void @combine_store(ptr%p) { ; CHECK-LABEL: @combine_store( diff --git a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll b/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll index 6c536f11d4bb1..4ac406c1603ee 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -lower-amx-type -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -x86-lower-amx-type -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -passes=x86-lower-amx-type -S | FileCheck %s @buf = dso_local global [1024 x i8] zeroinitializer, align 16 @buf2 = dso_local global [1024 x i8] zeroinitializer, align 16 diff --git a/llvm/test/CodeGen/X86/AMX/amx-type.ll b/llvm/test/CodeGen/X86/AMX/amx-type.ll index 1d9af2b13cdfd..294195a6541bf 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-type.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-type.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s +; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s +; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s %struct.__tile_str = type { i16, i16, <256 x i32> } diff --git a/llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll b/llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll index b70668f7a3dea..cdce783d0a237 100644 --- a/llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll +++ b/llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s +; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s +; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s define void @combine_amx_cast_inside_bb() { ; CHECK-LABEL: @combine_amx_cast_inside_bb( diff --git a/llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll b/llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll index 3a5b424540ff1..0b419bb8573d5 100644 --- a/llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll +++ b/llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s +; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s +; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s %struct.__tile_str = type { i16, i16, <256 x i32> } diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll index 52641c65c90e9..16a93310da7ad 100644 --- a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll +++ b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py - ; RUN: opt --codegen-opt-level=0 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s + ; RUN: opt --codegen-opt-level=0 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s + ; TODO: Test this with the NewPM when we can force this pass to run despite optnone. + ; RUN-TODO: opt --codegen-opt-level=0 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S &> /tmp/test2.ll | FileCheck %s @buf = dso_local global [2048 x i8] zeroinitializer, align 16 diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll index 346d46b6b16c2..96966264e0515 100644 --- a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll +++ b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s +; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s +; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s @buf = dso_local global [2048 x i8] zeroinitializer, align 16 @buf2 = dso_local global [2048 x i8] zeroinitializer, align 16 `````````` </details> https://github.com/llvm/llvm-project/pull/165084 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
