https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/137628

Backport 98b895d

Requested by: @JonPsson1

>From c854e4ec6031904cd3030dbd6d06e4d86ec7484f Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <pauls...@linux.ibm.com>
Date: Mon, 28 Apr 2025 07:04:07 -0600
Subject: [PATCH] [SystemZ] Fix compile time regression in
 adjustInliningThreshold(). (#137527)

Instead of always iterating over all GlobalVariable:s in the Module to
find the case where both Caller and Callee is using the same GV heavily,
first scan Callee (only if less than 200 instructions) for all GVs used
more than 10 times, and then do the counting for the Caller for just
those relevant GVs.

The limit of 200 instructions makes sense as this aims to inline a
relatively small function using a GV +10 times.

This resolves the compile time problem with zig where it is on main
(compared to removing the heuristic) a 380% increase, but with this
change <0.5% increase (total user compile time with opt).

Fixes #134714.

(cherry picked from commit 98b895da30c03b7061b8740d91c0e7998e69d091)
---
 .../SystemZ/SystemZTargetTransformInfo.cpp    | 48 ++++++++++++-------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp 
b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 2b94832939419..7eec38b79cb82 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Debug.h"
@@ -80,7 +81,6 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const 
CallBase *CB) const {
   const Function *Callee = CB->getCalledFunction();
   if (!Callee)
     return 0;
-  const Module *M = Caller->getParent();
 
   // Increase the threshold if an incoming argument is used only as a memcpy
   // source.
@@ -92,25 +92,37 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const 
CallBase *CB) const {
     }
   }
 
-  // Give bonus for globals used much in both caller and callee.
-  std::set<const GlobalVariable *> CalleeGlobals;
-  std::set<const GlobalVariable *> CallerGlobals;
-  for (const GlobalVariable &Global : M->globals())
-    for (const User *U : Global.users())
-      if (const Instruction *User = dyn_cast<Instruction>(U)) {
-        if (User->getParent()->getParent() == Callee)
-          CalleeGlobals.insert(&Global);
-        if (User->getParent()->getParent() == Caller)
-          CallerGlobals.insert(&Global);
+  // Give bonus for globals used much in both caller and a relatively small
+  // callee.
+  unsigned InstrCount = 0;
+  SmallDenseMap<const Value *, unsigned> Ptr2NumUses;
+  for (auto &I : instructions(Callee)) {
+    if (++InstrCount == 200) {
+      Ptr2NumUses.clear();
+      break;
+    }
+    if (const auto *SI = dyn_cast<StoreInst>(&I)) {
+      if (!SI->isVolatile())
+        if (auto *GV = dyn_cast<GlobalVariable>(SI->getPointerOperand()))
+          Ptr2NumUses[GV]++;
+    } else if (const auto *LI = dyn_cast<LoadInst>(&I)) {
+      if (!LI->isVolatile())
+        if (auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand()))
+          Ptr2NumUses[GV]++;
+    } else if (const auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+      if (auto *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand())) {
+        unsigned NumStores = 0, NumLoads = 0;
+        countNumMemAccesses(GEP, NumStores, NumLoads, Callee);
+        Ptr2NumUses[GV] += NumLoads + NumStores;
       }
-  for (auto *GV : CalleeGlobals)
-    if (CallerGlobals.count(GV)) {
-      unsigned CalleeStores = 0, CalleeLoads = 0;
+    }
+  }
+
+  for (auto [Ptr, NumCalleeUses] : Ptr2NumUses)
+    if (NumCalleeUses > 10) {
       unsigned CallerStores = 0, CallerLoads = 0;
-      countNumMemAccesses(GV, CalleeStores, CalleeLoads, Callee);
-      countNumMemAccesses(GV, CallerStores, CallerLoads, Caller);
-      if ((CalleeStores + CalleeLoads) > 10 &&
-          (CallerStores + CallerLoads) > 10) {
+      countNumMemAccesses(Ptr, CallerStores, CallerLoads, Caller);
+      if (CallerStores + CallerLoads > 10) {
         Bonus = 1000;
         break;
       }

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to