llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-arm Author: None (llvmbot) <details> <summary>Changes</summary> Backport fab06fae0064a2f1208331f9c355a26a4f9777f0 Requested by: @<!-- -->nikic --- Full diff: https://github.com/llvm/llvm-project/pull/177974.diff 3 Files Affected: - (modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp (+49) - (modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.h (+132-34) - (added) llvm/test/Transforms/Inline/ARM/inline-dotprod.ll (+35) ``````````diff diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 88a7fb185bf16..b947c8a10e2d8 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -107,6 +107,55 @@ bool ARMTTIImpl::areInlineCompatible(const Function *Caller, // the callers'. bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) == (CalleeBits & InlineFeaturesAllowed); + + LLVM_DEBUG({ + if (!MatchExact || !MatchSubset) { + dbgs() << "=== Inline compatibility debug ===\n"; + dbgs() << "Caller: " << Caller->getName() << "\n"; + dbgs() << "Callee: " << Callee->getName() << "\n"; + + // Bit diffs + FeatureBitset MissingInCaller = CalleeBits & ~CallerBits; // callee-only + FeatureBitset ExtraInCaller = CallerBits & ~CalleeBits; // caller-only + + // Counts + dbgs() << "Only-in-caller bit count: " << ExtraInCaller.count() << "\n"; + dbgs() << "Only-in-callee bit count: " << MissingInCaller.count() << "\n"; + + dbgs() << "Only-in-caller feature indices ["; + { + bool First = true; + for (size_t I = 0, E = ExtraInCaller.size(); I < E; ++I) { + if (ExtraInCaller.test(I)) { + if (!First) + dbgs() << ", "; + dbgs() << I; + First = false; + } + } + } + dbgs() << "]\n"; + + dbgs() << "Only-in-callee feature indices ["; + { + bool First = true; + for (size_t I = 0, E = MissingInCaller.size(); I < E; ++I) { + if (MissingInCaller.test(I)) { + if (!First) + dbgs() << ", "; + dbgs() << I; + First = false; + } + } + } + dbgs() << "]\n"; + + // Indices map to features as found in + // llvm-project/(your_build)/lib/Target/ARM/ARMGenSubtargetInfo.inc + dbgs() << "MatchExact=" << (MatchExact ? "true" : "false") + << " MatchSubset=" << (MatchSubset ? "true" : "false") << "\n"; + } + }); return MatchExact && MatchSubset; } diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index a23256364dd9a..fafd2d44a818c 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -40,13 +40,13 @@ class Type; class Value; namespace TailPredication { - enum Mode { - Disabled = 0, - EnabledNoReductions, - Enabled, - ForceEnabledNoReductions, - ForceEnabled - }; +enum Mode { + Disabled = 0, + EnabledNoReductions, + Enabled, + ForceEnabledNoReductions, + ForceEnabled +}; } // For controlling conversion of memcpy into Tail Predicated loop. @@ -64,37 +64,135 @@ class ARMTTIImpl final : public BasicTTIImplBase<ARMTTIImpl> { const ARMTargetLowering *TLI; // Currently the following features are excluded from InlineFeaturesAllowed. - // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32 + // ModeThumb, FeatureNoARM, ModeSoftFloat. // Depending on whether they are set or unset, different // instructions/registers are available. For example, inlining a callee with // -thumb-mode in a caller with +thumb-mode, may cause the assembler to // fail if the callee uses ARM only instructions, e.g. in inline asm. - const FeatureBitset InlineFeaturesAllowed = { - ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2, - ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8, - ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb, - ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex, - ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc, - ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt, - ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS, - ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing, - ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32, - ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR, - ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits, - ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg, - ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx, - ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs, - ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign, - ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx, - ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb, - ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR, - ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack, - ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP, - ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass, - ARM::FeatureAClass, ARM::FeatureStrictAlign, ARM::FeatureLongCalls, - ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt, - ARM::FeatureNoNegativeImmediates - }; + const FeatureBitset InlineFeaturesAllowed = {ARM::Feature8MSecExt, + ARM::FeatureAClass, + ARM::FeatureAES, + ARM::FeatureAcquireRelease, + ARM::FeatureAvoidMOVsShOp, + ARM::FeatureAvoidMULS, + ARM::FeatureAvoidPartialCPSR, + ARM::FeatureBF16, + ARM::FeatureCRC, + ARM::FeatureCheapPredicableCPSR, + ARM::FeatureCheckVLDnAlign, + ARM::FeatureCrypto, + ARM::FeatureD32, + ARM::FeatureDB, + ARM::FeatureDFB, + ARM::FeatureDSP, + ARM::FeatureDontWidenVMOVS, + ARM::FeatureDotProd, + ARM::FeatureExecuteOnly, + ARM::FeatureExpandMLx, + ARM::FeatureFP16, + ARM::FeatureFP16FML, + ARM::FeatureFP64, + ARM::FeatureFPAO, + ARM::FeatureFPARMv8, + ARM::FeatureFPARMv8_D16, + ARM::FeatureFPARMv8_D16_SP, + ARM::FeatureFPARMv8_SP, + ARM::FeatureFPRegs, + ARM::FeatureFPRegs16, + ARM::FeatureFPRegs64, + ARM::FeatureFullFP16, + ARM::FeatureFuseAES, + ARM::FeatureFuseLiterals, + ARM::FeatureHWDivARM, + ARM::FeatureHWDivThumb, + ARM::FeatureHasNoBranchPredictor, + ARM::FeatureHasRetAddrStack, + ARM::FeatureHasSlowFPVFMx, + ARM::FeatureHasSlowFPVMLx, + ARM::FeatureHasVMLxHazards, + ARM::FeatureLOB, + ARM::FeatureLongCalls, + ARM::FeatureMClass, + ARM::FeatureMP, + ARM::FeatureMVEVectorCostFactor1, + ARM::FeatureMVEVectorCostFactor2, + ARM::FeatureMVEVectorCostFactor4, + ARM::FeatureMatMulInt8, + ARM::FeatureMuxedUnits, + ARM::FeatureNEON, + ARM::FeatureNEONForFP, + ARM::FeatureNEONForFPMovs, + ARM::FeatureNoMovt, + ARM::FeatureNoNegativeImmediates, + ARM::FeatureNoPostRASched, + ARM::FeaturePerfMon, + ARM::FeaturePref32BitThumb, + ARM::FeaturePrefISHSTBarrier, + ARM::FeaturePreferBranchAlign32, + ARM::FeaturePreferBranchAlign64, + ARM::FeaturePreferVMOVSR, + ARM::FeatureProfUnpredicate, + ARM::FeatureRAS, + ARM::FeatureRClass, + ARM::FeatureReserveR9, + ARM::FeatureSB, + ARM::FeatureSHA2, + ARM::FeatureSlowFPBrcc, + ARM::FeatureSlowLoadDSubreg, + ARM::FeatureSlowOddRegister, + ARM::FeatureSlowVDUP32, + ARM::FeatureSlowVGETLNi32, + ARM::FeatureSplatVFPToNeon, + ARM::FeatureStrictAlign, + ARM::FeatureThumb2, + ARM::FeatureTrustZone, + ARM::FeatureUseMIPipeliner, + ARM::FeatureUseMISched, + ARM::FeatureUseWideStrideVFP, + ARM::FeatureV7Clrex, + ARM::FeatureVFP2, + ARM::FeatureVFP2_SP, + ARM::FeatureVFP3, + ARM::FeatureVFP3_D16, + ARM::FeatureVFP3_D16_SP, + ARM::FeatureVFP3_SP, + ARM::FeatureVFP4, + ARM::FeatureVFP4_D16, + ARM::FeatureVFP4_D16_SP, + ARM::FeatureVFP4_SP, + ARM::FeatureVMLxForwarding, + ARM::FeatureVirtualization, + ARM::FeatureZCZeroing, + ARM::HasMVEFloatOps, + ARM::HasMVEIntegerOps, + ARM::HasV5TEOps, + ARM::HasV5TOps, + ARM::HasV6KOps, + ARM::HasV6MOps, + ARM::HasV6Ops, + ARM::HasV6T2Ops, + ARM::HasV7Ops, + ARM::HasV8MBaselineOps, + ARM::HasV8MMainlineOps, + ARM::HasV8Ops, + ARM::HasV8_1MMainlineOps, + ARM::HasV8_1aOps, + ARM::HasV8_2aOps, + ARM::HasV8_3aOps, + ARM::HasV8_4aOps, + ARM::HasV8_5aOps, + ARM::HasV8_6aOps, + ARM::HasV8_7aOps, + ARM::HasV8_8aOps, + ARM::HasV8_9aOps, + ARM::HasV9_0aOps, + ARM::HasV9_1aOps, + ARM::HasV9_2aOps, + ARM::HasV9_3aOps, + ARM::HasV9_4aOps, + ARM::HasV9_5aOps, + ARM::HasV9_6aOps, + ARM::HasV9_7aOps}; const ARMSubtarget *getST() const { return ST; } const ARMTargetLowering *getTLI() const { return TLI; } diff --git a/llvm/test/Transforms/Inline/ARM/inline-dotprod.ll b/llvm/test/Transforms/Inline/ARM/inline-dotprod.ll new file mode 100644 index 0000000000000..2f8dbb7f01822 --- /dev/null +++ b/llvm/test/Transforms/Inline/ARM/inline-dotprod.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -passes=inline | FileCheck %s +; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s + +declare i32 @foo(...) #0 + +define i32 @callee() #0 { +entry: + %call = call i32 (...) @foo() + ret i32 %call +} + +define i32 @dotcallee() #1 { +entry: + %call = call i32 (...) @foo() + ret i32 %call +} + +define i32 @dotcaller() #1 { +entry: + %call = call i32 @callee() + ret i32 %call +; CHECK-LABEL: dotcaller +; CHECK: call i32 (...) @foo() +} + +define i32 @caller() #0 { +entry: + %call = call i32 @dotcallee() + ret i32 %call +; CHECK-LABEL: caller +; CHECK: call i32 @dotcallee() +} + +attributes #0 = { "target-cpu"="generic" "target-features"="+dsp,+neon" } +attributes #1 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+dotprod" } `````````` </details> https://github.com/llvm/llvm-project/pull/177974 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
