[clang] [clang-tools-extra] [llvm] [coro][pgo] Do not insert counters in the `suspend` block (PR #71262)

2023-11-15 Thread Mircea Trofin via cfe-commits

https://github.com/mtrofin closed 
https://github.com/llvm/llvm-project/pull/71262
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang-tools-extra] [llvm] [coro][pgo] Do not insert counters in the `suspend` block (PR #71262)

2023-11-15 Thread Mircea Trofin via cfe-commits

https://github.com/mtrofin edited 
https://github.com/llvm/llvm-project/pull/71262
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang-tools-extra] [llvm] [coro][pgo] Do not insert counters in the `suspend` block (PR #71262)

2023-11-15 Thread Mircea Trofin via cfe-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/71262

>From 184936c339ea73ccfc4349e023ff165aa9f8392e Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Fri, 3 Nov 2023 18:19:15 -0700
Subject: [PATCH 1/4] [coro][pgp] Do not insert counters in the `suspend` block

If we do, we can't lower the suspend call to a tail call. If this
happened in a loop, it can lead to stack overflow (this was encountered
in a benchmark, as an extreme case)

We can instrument the other 2 edges instead, as long as they also don't
point to the same basic block.
---
 .../llvm/Transforms/Instrumentation/CFGMST.h  | 65 +++
 .../Coroutines/coro-split-musttail.ll |  7 +-
 .../Coroutines/coro-split-musttail1.ll| 12 ++--
 .../Coroutines/coro-split-musttail10.ll   |  1 +
 .../Coroutines/coro-split-musttail11.ll   |  1 +
 .../Coroutines/coro-split-musttail12.ll   |  1 +
 .../Coroutines/coro-split-musttail13.ll   |  1 +
 .../Coroutines/coro-split-musttail2.ll|  1 +
 .../Coroutines/coro-split-musttail3.ll| 12 ++--
 .../Coroutines/coro-split-musttail4.ll|  1 +
 .../Coroutines/coro-split-musttail5.ll|  1 +
 .../Coroutines/coro-split-musttail6.ll|  1 +
 .../Coroutines/coro-split-musttail7.ll|  1 +
 13 files changed, 83 insertions(+), 22 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h 
b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
index 6ed8a6c6eaf0197..1c5b7ba6d0ed364 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
@@ -19,6 +19,8 @@
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -121,31 +123,70 @@ template  class CFGMST {
 
 static const uint32_t CriticalEdgeMultiplier = 1000;
 
+auto GetCoroSuspendSwitch =
+[&](const Instruction *TI) -> const SwitchInst * {
+  if (!F.isPresplitCoroutine())
+return nullptr;
+  if (auto *SWInst = dyn_cast(TI))
+if (auto *Intrinsic = dyn_cast(SWInst->getCondition()))
+  if (Intrinsic->getIntrinsicID() == Intrinsic::coro_suspend)
+return SWInst;
+  return nullptr;
+};
+
 for (BasicBlock  : F) {
   Instruction *TI = BB.getTerminator();
+  const SwitchInst *CoroSuspendSwitch = GetCoroSuspendSwitch(TI);
   uint64_t BBWeight =
   (BFI != nullptr ? BFI->getBlockFreq().getFrequency() : 2);
   uint64_t Weight = 2;
   if (int successors = TI->getNumSuccessors()) {
 for (int i = 0; i != successors; ++i) {
   BasicBlock *TargetBB = TI->getSuccessor(i);
-  bool Critical = isCriticalEdge(TI, i);
-  uint64_t scaleFactor = BBWeight;
-  if (Critical) {
-if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
-  scaleFactor *= CriticalEdgeMultiplier;
-else
-  scaleFactor = UINT64_MAX;
+  const bool Critical = isCriticalEdge(TI, i);
+  const bool IsCoroSuspendTarget =
+  CoroSuspendSwitch &&
+  CoroSuspendSwitch->getDefaultDest() == TargetBB;
+  // We must not add instrumentation to the BB representing the
+  // "suspend" path, else CoroSplit won't be able to lower
+  // llvm.coro.suspend to a tail call. We do want profiling info for
+  // the other branches (resume/destroy). So we do 2 things:
+  // 1. we prefer instrumenting those other edges by setting the weight
+  //of the "suspend" edge to max, and
+  // 2. we mark the edge as "Removed" to guarantee it is not considered
+  //for instrumentation. That could technically happen:
+  //(from test/Transforms/Coroutines/coro-split-musttail.ll)
+  //
+  // %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+  // switch i8 %suspend, label %exit [
+  //   i8 0, label %await.ready
+  //   i8 1, label %exit
+  // ]
+  if (IsCoroSuspendTarget) {
+Weight = UINT64_MAX;
+  } else {
+bool Critical = isCriticalEdge(TI, i);
+uint64_t scaleFactor = BBWeight;
+if (Critical) {
+  if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
+scaleFactor *= CriticalEdgeMultiplier;
+  else
+scaleFactor = UINT64_MAX;
+}
+if (BPI != nullptr)
+  Weight =
+  BPI->getEdgeProbability(, TargetBB).scale(scaleFactor);
+if (Weight == 0)
+  Weight++;
   }
-  if (BPI != nullptr)
-Weight = BPI->getEdgeProbability(,