llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: None (llvmbot)

<details>
<summary>Changes</summary>

Backport fab06fae0064a2f1208331f9c355a26a4f9777f0

Requested by: @<!-- -->nikic

---
Full diff: https://github.com/llvm/llvm-project/pull/177974.diff


3 Files Affected:

- (modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp (+49) 
- (modified) llvm/lib/Target/ARM/ARMTargetTransformInfo.h (+132-34) 
- (added) llvm/test/Transforms/Inline/ARM/inline-dotprod.ll (+35) 


``````````diff
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp 
b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 88a7fb185bf16..b947c8a10e2d8 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -107,6 +107,55 @@ bool ARMTTIImpl::areInlineCompatible(const Function 
*Caller,
   // the callers'.
   bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
                      (CalleeBits & InlineFeaturesAllowed);
+
+  LLVM_DEBUG({
+    if (!MatchExact || !MatchSubset) {
+      dbgs() << "=== Inline compatibility debug ===\n";
+      dbgs() << "Caller: " << Caller->getName() << "\n";
+      dbgs() << "Callee: " << Callee->getName() << "\n";
+
+      // Bit diffs
+      FeatureBitset MissingInCaller = CalleeBits & ~CallerBits; // callee-only
+      FeatureBitset ExtraInCaller = CallerBits & ~CalleeBits;   // caller-only
+
+      // Counts
+      dbgs() << "Only-in-caller bit count: " << ExtraInCaller.count() << "\n";
+      dbgs() << "Only-in-callee bit count: " << MissingInCaller.count() << 
"\n";
+
+      dbgs() << "Only-in-caller feature indices [";
+      {
+        bool First = true;
+        for (size_t I = 0, E = ExtraInCaller.size(); I < E; ++I) {
+          if (ExtraInCaller.test(I)) {
+            if (!First)
+              dbgs() << ", ";
+            dbgs() << I;
+            First = false;
+          }
+        }
+      }
+      dbgs() << "]\n";
+
+      dbgs() << "Only-in-callee feature indices [";
+      {
+        bool First = true;
+        for (size_t I = 0, E = MissingInCaller.size(); I < E; ++I) {
+          if (MissingInCaller.test(I)) {
+            if (!First)
+              dbgs() << ", ";
+            dbgs() << I;
+            First = false;
+          }
+        }
+      }
+      dbgs() << "]\n";
+
+      // Indices map to features as found in
+      // llvm-project/(your_build)/lib/Target/ARM/ARMGenSubtargetInfo.inc
+      dbgs() << "MatchExact=" << (MatchExact ? "true" : "false")
+             << " MatchSubset=" << (MatchSubset ? "true" : "false") << "\n";
+    }
+  });
   return MatchExact && MatchSubset;
 }
 
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h 
b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index a23256364dd9a..fafd2d44a818c 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -40,13 +40,13 @@ class Type;
 class Value;
 
 namespace TailPredication {
-  enum Mode {
-    Disabled = 0,
-    EnabledNoReductions,
-    Enabled,
-    ForceEnabledNoReductions,
-    ForceEnabled
-  };
+enum Mode {
+  Disabled = 0,
+  EnabledNoReductions,
+  Enabled,
+  ForceEnabledNoReductions,
+  ForceEnabled
+};
 }
 
 // For controlling conversion of memcpy into Tail Predicated loop.
@@ -64,37 +64,135 @@ class ARMTTIImpl final : public 
BasicTTIImplBase<ARMTTIImpl> {
   const ARMTargetLowering *TLI;
 
   // Currently the following features are excluded from InlineFeaturesAllowed.
-  // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
+  // ModeThumb, FeatureNoARM, ModeSoftFloat.
   // Depending on whether they are set or unset, different
   // instructions/registers are available. For example, inlining a callee with
   // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
   // fail if the callee uses ARM only instructions, e.g. in inline asm.
-  const FeatureBitset InlineFeaturesAllowed = {
-      ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
-      ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
-      ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
-      ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
-      ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
-      ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
-      ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
-      ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
-      ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
-      ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
-      ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
-      ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
-      ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
-      ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
-      ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
-      ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
-      ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
-      ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
-      ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
-      ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
-      ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
-      ARM::FeatureAClass, ARM::FeatureStrictAlign, ARM::FeatureLongCalls,
-      ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt,
-      ARM::FeatureNoNegativeImmediates
-  };
+  const FeatureBitset InlineFeaturesAllowed = {ARM::Feature8MSecExt,
+                                               ARM::FeatureAClass,
+                                               ARM::FeatureAES,
+                                               ARM::FeatureAcquireRelease,
+                                               ARM::FeatureAvoidMOVsShOp,
+                                               ARM::FeatureAvoidMULS,
+                                               ARM::FeatureAvoidPartialCPSR,
+                                               ARM::FeatureBF16,
+                                               ARM::FeatureCRC,
+                                               ARM::FeatureCheapPredicableCPSR,
+                                               ARM::FeatureCheckVLDnAlign,
+                                               ARM::FeatureCrypto,
+                                               ARM::FeatureD32,
+                                               ARM::FeatureDB,
+                                               ARM::FeatureDFB,
+                                               ARM::FeatureDSP,
+                                               ARM::FeatureDontWidenVMOVS,
+                                               ARM::FeatureDotProd,
+                                               ARM::FeatureExecuteOnly,
+                                               ARM::FeatureExpandMLx,
+                                               ARM::FeatureFP16,
+                                               ARM::FeatureFP16FML,
+                                               ARM::FeatureFP64,
+                                               ARM::FeatureFPAO,
+                                               ARM::FeatureFPARMv8,
+                                               ARM::FeatureFPARMv8_D16,
+                                               ARM::FeatureFPARMv8_D16_SP,
+                                               ARM::FeatureFPARMv8_SP,
+                                               ARM::FeatureFPRegs,
+                                               ARM::FeatureFPRegs16,
+                                               ARM::FeatureFPRegs64,
+                                               ARM::FeatureFullFP16,
+                                               ARM::FeatureFuseAES,
+                                               ARM::FeatureFuseLiterals,
+                                               ARM::FeatureHWDivARM,
+                                               ARM::FeatureHWDivThumb,
+                                               
ARM::FeatureHasNoBranchPredictor,
+                                               ARM::FeatureHasRetAddrStack,
+                                               ARM::FeatureHasSlowFPVFMx,
+                                               ARM::FeatureHasSlowFPVMLx,
+                                               ARM::FeatureHasVMLxHazards,
+                                               ARM::FeatureLOB,
+                                               ARM::FeatureLongCalls,
+                                               ARM::FeatureMClass,
+                                               ARM::FeatureMP,
+                                               
ARM::FeatureMVEVectorCostFactor1,
+                                               
ARM::FeatureMVEVectorCostFactor2,
+                                               
ARM::FeatureMVEVectorCostFactor4,
+                                               ARM::FeatureMatMulInt8,
+                                               ARM::FeatureMuxedUnits,
+                                               ARM::FeatureNEON,
+                                               ARM::FeatureNEONForFP,
+                                               ARM::FeatureNEONForFPMovs,
+                                               ARM::FeatureNoMovt,
+                                               
ARM::FeatureNoNegativeImmediates,
+                                               ARM::FeatureNoPostRASched,
+                                               ARM::FeaturePerfMon,
+                                               ARM::FeaturePref32BitThumb,
+                                               ARM::FeaturePrefISHSTBarrier,
+                                               ARM::FeaturePreferBranchAlign32,
+                                               ARM::FeaturePreferBranchAlign64,
+                                               ARM::FeaturePreferVMOVSR,
+                                               ARM::FeatureProfUnpredicate,
+                                               ARM::FeatureRAS,
+                                               ARM::FeatureRClass,
+                                               ARM::FeatureReserveR9,
+                                               ARM::FeatureSB,
+                                               ARM::FeatureSHA2,
+                                               ARM::FeatureSlowFPBrcc,
+                                               ARM::FeatureSlowLoadDSubreg,
+                                               ARM::FeatureSlowOddRegister,
+                                               ARM::FeatureSlowVDUP32,
+                                               ARM::FeatureSlowVGETLNi32,
+                                               ARM::FeatureSplatVFPToNeon,
+                                               ARM::FeatureStrictAlign,
+                                               ARM::FeatureThumb2,
+                                               ARM::FeatureTrustZone,
+                                               ARM::FeatureUseMIPipeliner,
+                                               ARM::FeatureUseMISched,
+                                               ARM::FeatureUseWideStrideVFP,
+                                               ARM::FeatureV7Clrex,
+                                               ARM::FeatureVFP2,
+                                               ARM::FeatureVFP2_SP,
+                                               ARM::FeatureVFP3,
+                                               ARM::FeatureVFP3_D16,
+                                               ARM::FeatureVFP3_D16_SP,
+                                               ARM::FeatureVFP3_SP,
+                                               ARM::FeatureVFP4,
+                                               ARM::FeatureVFP4_D16,
+                                               ARM::FeatureVFP4_D16_SP,
+                                               ARM::FeatureVFP4_SP,
+                                               ARM::FeatureVMLxForwarding,
+                                               ARM::FeatureVirtualization,
+                                               ARM::FeatureZCZeroing,
+                                               ARM::HasMVEFloatOps,
+                                               ARM::HasMVEIntegerOps,
+                                               ARM::HasV5TEOps,
+                                               ARM::HasV5TOps,
+                                               ARM::HasV6KOps,
+                                               ARM::HasV6MOps,
+                                               ARM::HasV6Ops,
+                                               ARM::HasV6T2Ops,
+                                               ARM::HasV7Ops,
+                                               ARM::HasV8MBaselineOps,
+                                               ARM::HasV8MMainlineOps,
+                                               ARM::HasV8Ops,
+                                               ARM::HasV8_1MMainlineOps,
+                                               ARM::HasV8_1aOps,
+                                               ARM::HasV8_2aOps,
+                                               ARM::HasV8_3aOps,
+                                               ARM::HasV8_4aOps,
+                                               ARM::HasV8_5aOps,
+                                               ARM::HasV8_6aOps,
+                                               ARM::HasV8_7aOps,
+                                               ARM::HasV8_8aOps,
+                                               ARM::HasV8_9aOps,
+                                               ARM::HasV9_0aOps,
+                                               ARM::HasV9_1aOps,
+                                               ARM::HasV9_2aOps,
+                                               ARM::HasV9_3aOps,
+                                               ARM::HasV9_4aOps,
+                                               ARM::HasV9_5aOps,
+                                               ARM::HasV9_6aOps,
+                                               ARM::HasV9_7aOps};
 
   const ARMSubtarget *getST() const { return ST; }
   const ARMTargetLowering *getTLI() const { return TLI; }
diff --git a/llvm/test/Transforms/Inline/ARM/inline-dotprod.ll 
b/llvm/test/Transforms/Inline/ARM/inline-dotprod.ll
new file mode 100644
index 0000000000000..2f8dbb7f01822
--- /dev/null
+++ b/llvm/test/Transforms/Inline/ARM/inline-dotprod.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -passes=inline | FileCheck %s
+; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -passes='cgscc(inline)' | 
FileCheck %s
+
+declare i32 @foo(...) #0
+
+define i32 @callee() #0 {
+entry:
+  %call = call i32 (...) @foo()
+  ret i32 %call
+}
+
+define i32 @dotcallee() #1 {
+entry:
+  %call = call i32 (...) @foo()
+  ret i32 %call
+}
+
+define i32 @dotcaller() #1 {
+entry:
+  %call = call i32 @callee()
+  ret i32 %call
+; CHECK-LABEL: dotcaller
+; CHECK: call i32 (...) @foo()
+}
+
+define i32 @caller() #0 {
+entry:
+  %call = call i32 @dotcallee()
+  ret i32 %call
+; CHECK-LABEL: caller
+; CHECK: call i32 @dotcallee()
+}
+
+attributes #0 = { "target-cpu"="generic" "target-features"="+dsp,+neon" }
+attributes #1 = { "target-cpu"="generic" 
"target-features"="+dsp,+neon,+dotprod" }

``````````

</details>


https://github.com/llvm/llvm-project/pull/177974
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to