https://github.com/melver updated 
https://github.com/llvm/llvm-project/pull/169358

>From d596966645e76f444cea4f6e4377848209b6cbc5 Mon Sep 17 00:00:00 2001
From: Marco Elver <[email protected]>
Date: Mon, 24 Nov 2025 17:36:32 +0100
Subject: [PATCH 1/3] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
 =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.8-beta.1

[skip ci]
---
 clang/test/CodeGen/memprof-pgho.cpp           | 61 ++++++++++++++
 .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 80 ++++++++++---------
 .../InstCombine/simplify-libcalls-new.ll      | 13 +++
 3 files changed, 117 insertions(+), 37 deletions(-)
 create mode 100644 clang/test/CodeGen/memprof-pgho.cpp

diff --git a/clang/test/CodeGen/memprof-pgho.cpp 
b/clang/test/CodeGen/memprof-pgho.cpp
new file mode 100644
index 0000000000000..73bec2b303f13
--- /dev/null
+++ b/clang/test/CodeGen/memprof-pgho.cpp
@@ -0,0 +1,61 @@
+// Test end-to-end optimization pipeline with PGHO, that it does not interfere
+// with other allocation instrumentation features.
+//
+// RUN: split-file %s %t
+// RUN: llvm-profdata merge %t/memprof.yaml -o %t/use.profdata
+// RUN: %clang_cc1 -O2 -debug-info-kind=limited 
-fmemory-profile-use=%t/use.profdata -mllvm -optimize-hot-cold-new \
+// RUN:            %t/src.cpp -triple x86_64-linux-gnu -emit-llvm -o - | 
FileCheck %s --check-prefixes=CHECK,DEFAULT
+// RUN: %clang_cc1 -O2 -fsanitize=alloc-token -debug-info-kind=limited 
-fmemory-profile-use=%t/use.profdata -mllvm -optimize-hot-cold-new \
+// RUN:             %t/src.cpp -triple x86_64-linux-gnu -emit-llvm -o - | 
FileCheck %s --check-prefixes=CHECK,ALLOCTOKEN
+
+//--- memprof.yaml
+---
+HeapProfileRecords:
+  - GUID: 0x7f8d88fcc70a347b
+    AllocSites:
+    - Callstack:
+      - { Function: 0x7f8d88fcc70a347b, LineOffset: 1, Column: 10, 
IsInlineFrame: false }
+      - { Function: 0xdb956436e78dd5fa, LineOffset: 1, Column: 13, 
IsInlineFrame: false }
+      MemInfoBlock:
+        AllocCount: 1
+        TotalAccessCount: 0
+        MinAccessCount: 0
+        MaxAccessCount: 0
+        TotalSize: 10
+        MinSize: 10
+        MaxSize: 10
+        AllocTimestamp: 100
+        DeallocTimestamp: 100
+        TotalLifetime: 100000
+        MinLifetime: 100000
+        MaxLifetime: 100000
+        AllocCpuId: 0
+        DeallocCpuId: 0
+        NumMigratedCpu: 0
+        NumLifetimeOverlaps: 0
+        NumSameAllocCpu: 0
+        NumSameDeallocCpu: 0
+        DataTypeId: 0
+        TotalAccessDensity: 0
+        MinAccessDensity: 0
+        MaxAccessDensity: 0
+        TotalLifetimeAccessDensity: 0
+        MinLifetimeAccessDensity: 0
+        MaxLifetimeAccessDensity: 0
+        AccessHistogramSize: 0
+        AccessHistogram: 0
+...
+
+//--- src.cpp
+// CHECK-LABEL: define{{.*}} ptr @_Z3foov()
+// DEFAULT:    call {{.*}} ptr @_Znam12__hot_cold_t(i64 10, i8 -128)
+// ALLOCTOKEN: call {{.*}} ptr @__alloc_token__Znam12__hot_cold_t(i64 10, i8 
-128, i64 1538840549748785101){{.*}} !alloc_token
+char *foo() {
+  return new char[10];
+}
+
+int main() {
+  char *a = foo();
+  delete[] a;
+  return 0;
+}
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp 
b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 4f4e64b1c7b70..4ba4e484fb77d 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1806,119 +1806,125 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, 
IRBuilderBase &B,
   // better to replace the hinted call with a non hinted call, to avoid the
   // extra parameter and the if condition check of the hint value in the
   // allocator. This can be considered in the future.
+  Value *NewCall = nullptr;
   switch (Func) {
   case LibFunc_Znwm12__hot_cold_t:
     if (OptimizeExistingHotColdNew)
-      return emitHotColdNew(CI->getArgOperand(0), B, TLI,
-                            LibFunc_Znwm12__hot_cold_t, HotCold);
+      NewCall = emitHotColdNew(CI->getArgOperand(0), B, TLI,
+                               LibFunc_Znwm12__hot_cold_t, HotCold);
     break;
   case LibFunc_Znwm:
-    return emitHotColdNew(CI->getArgOperand(0), B, TLI,
-                          LibFunc_Znwm12__hot_cold_t, HotCold);
+    NewCall = emitHotColdNew(CI->getArgOperand(0), B, TLI,
+                             LibFunc_Znwm12__hot_cold_t, HotCold);
     break;
   case LibFunc_Znam12__hot_cold_t:
     if (OptimizeExistingHotColdNew)
-      return emitHotColdNew(CI->getArgOperand(0), B, TLI,
-                            LibFunc_Znam12__hot_cold_t, HotCold);
+      NewCall = emitHotColdNew(CI->getArgOperand(0), B, TLI,
+                               LibFunc_Znam12__hot_cold_t, HotCold);
     break;
   case LibFunc_Znam:
-    return emitHotColdNew(CI->getArgOperand(0), B, TLI,
-                          LibFunc_Znam12__hot_cold_t, HotCold);
+    NewCall = emitHotColdNew(CI->getArgOperand(0), B, TLI,
+                             LibFunc_Znam12__hot_cold_t, HotCold);
     break;
   case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
     if (OptimizeExistingHotColdNew)
-      return emitHotColdNewNoThrow(
+      NewCall = emitHotColdNewNoThrow(
           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
           LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold);
     break;
   case LibFunc_ZnwmRKSt9nothrow_t:
-    return emitHotColdNewNoThrow(CI->getArgOperand(0), CI->getArgOperand(1), B,
-                                 TLI, LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t,
-                                 HotCold);
+    NewCall = emitHotColdNewNoThrow(
+        CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
+        LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, HotCold);
     break;
   case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
     if (OptimizeExistingHotColdNew)
-      return emitHotColdNewNoThrow(
+      NewCall = emitHotColdNewNoThrow(
           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
           LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold);
     break;
   case LibFunc_ZnamRKSt9nothrow_t:
-    return emitHotColdNewNoThrow(CI->getArgOperand(0), CI->getArgOperand(1), B,
-                                 TLI, LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t,
-                                 HotCold);
+    NewCall = emitHotColdNewNoThrow(
+        CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
+        LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, HotCold);
     break;
   case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
     if (OptimizeExistingHotColdNew)
-      return emitHotColdNewAligned(
+      NewCall = emitHotColdNewAligned(
           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
           LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold);
     break;
   case LibFunc_ZnwmSt11align_val_t:
-    return emitHotColdNewAligned(CI->getArgOperand(0), CI->getArgOperand(1), B,
-                                 TLI, 
LibFunc_ZnwmSt11align_val_t12__hot_cold_t,
-                                 HotCold);
+    NewCall = emitHotColdNewAligned(
+        CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
+        LibFunc_ZnwmSt11align_val_t12__hot_cold_t, HotCold);
     break;
   case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
     if (OptimizeExistingHotColdNew)
-      return emitHotColdNewAligned(
+      NewCall = emitHotColdNewAligned(
           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
           LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold);
     break;
   case LibFunc_ZnamSt11align_val_t:
-    return emitHotColdNewAligned(CI->getArgOperand(0), CI->getArgOperand(1), B,
-                                 TLI, 
LibFunc_ZnamSt11align_val_t12__hot_cold_t,
-                                 HotCold);
+    NewCall = emitHotColdNewAligned(
+        CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
+        LibFunc_ZnamSt11align_val_t12__hot_cold_t, HotCold);
     break;
   case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
     if (OptimizeExistingHotColdNew)
-      return emitHotColdNewAlignedNoThrow(
+      NewCall = emitHotColdNewAlignedNoThrow(
           CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
           TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
           HotCold);
     break;
   case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
-    return emitHotColdNewAlignedNoThrow(
+    NewCall = emitHotColdNewAlignedNoThrow(
         CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
         TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold);
     break;
   case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
     if (OptimizeExistingHotColdNew)
-      return emitHotColdNewAlignedNoThrow(
+      NewCall = emitHotColdNewAlignedNoThrow(
           CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
           TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t,
           HotCold);
     break;
   case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
-    return emitHotColdNewAlignedNoThrow(
+    NewCall = emitHotColdNewAlignedNoThrow(
         CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
         TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold);
     break;
   case LibFunc_size_returning_new:
-    return emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI,
-                                       LibFunc_size_returning_new_hot_cold,
-                                       HotCold);
+    NewCall = emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI,
+                                          LibFunc_size_returning_new_hot_cold,
+                                          HotCold);
     break;
   case LibFunc_size_returning_new_hot_cold:
     if (OptimizeExistingHotColdNew)
-      return emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI,
-                                         LibFunc_size_returning_new_hot_cold,
-                                         HotCold);
+      NewCall = emitHotColdSizeReturningNew(CI->getArgOperand(0), B, TLI,
+                                            
LibFunc_size_returning_new_hot_cold,
+                                            HotCold);
     break;
   case LibFunc_size_returning_new_aligned:
-    return emitHotColdSizeReturningNewAligned(
+    NewCall = emitHotColdSizeReturningNewAligned(
         CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
         LibFunc_size_returning_new_aligned_hot_cold, HotCold);
     break;
   case LibFunc_size_returning_new_aligned_hot_cold:
     if (OptimizeExistingHotColdNew)
-      return emitHotColdSizeReturningNewAligned(
+      NewCall = emitHotColdSizeReturningNewAligned(
           CI->getArgOperand(0), CI->getArgOperand(1), B, TLI,
           LibFunc_size_returning_new_aligned_hot_cold, HotCold);
     break;
   default:
     return nullptr;
   }
-  return nullptr;
+
+  if (auto *NewCI = dyn_cast_or_null<Instruction>(NewCall))
+    if (MDNode *MD = CI->getMetadata(LLVMContext::MD_alloc_token))
+      NewCI->setMetadata(LLVMContext::MD_alloc_token, MD);
+
+  return NewCall;
 }
 
 
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll 
b/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll
index 5a4fb04f5f2c0..2765c75f4d5e0 100644
--- a/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-libcalls-new.ll
@@ -610,6 +610,16 @@ define void @size_returning_aligned_update_test() {
   ret void
 }
 
+;; Check that !alloc_token is preserved.
+; HOTCOLD-LABEL: @new_alloc_token()
+define void @new_alloc_token() {
+  ;; Attribute cold converted to __hot_cold_t cold value.
+  ; HOTCOLD: @_Znwm12__hot_cold_t(i64 10, i8 [[COLD]]), !alloc_token 
![[ALLOC_TOKEN:[0-9]+]]
+  %call = call ptr @_Znwm(i64 10) #0, !alloc_token !0
+  call void @dummy(ptr %call)
+  ret void
+}
+
 ;; So that instcombine doesn't optimize out the call.
 declare void @dummy(ptr)
 
@@ -649,3 +659,6 @@ attributes #5 = { "memprof" = "hot" }
 attributes #8 = { "memprof" = "ambiguous" }
 
 attributes #6 = { nobuiltin allocsize(0) "memprof"="cold" }
+
+; CHECK: [[ALLOC_TOKEN]] = !{!"MyType", i1 false}
+!0 = !{!"MyType", i1 false}

>From 451e91c58d92250121da33136a6db9597644f50d Mon Sep 17 00:00:00 2001
From: Marco Elver <[email protected]>
Date: Tue, 25 Nov 2025 15:31:35 +0100
Subject: [PATCH 2/3] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
 =?UTF-8?q?anges=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.8-beta.1

[skip ci]
---
 llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp 
b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 4ba4e484fb77d..d1548694baa27 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1921,8 +1921,7 @@ Value *LibCallSimplifier::optimizeNew(CallInst *CI, 
IRBuilderBase &B,
   }
 
   if (auto *NewCI = dyn_cast_or_null<Instruction>(NewCall))
-    if (MDNode *MD = CI->getMetadata(LLVMContext::MD_alloc_token))
-      NewCI->setMetadata(LLVMContext::MD_alloc_token, MD);
+    NewCI->copyMetadata(*CI);
 
   return NewCall;
 }

>From a161378a10a3e51a7ddaf78b84dba4d5d4fb14c7 Mon Sep 17 00:00:00 2001
From: Marco Elver <[email protected]>
Date: Thu, 27 Nov 2025 15:59:14 +0100
Subject: [PATCH 3/3] address review comments

Created using spr 1.3.8-beta.1
---
 clang/lib/CodeGen/BackendUtil.cpp             |  3 +-
 clang/test/CodeGen/lto-newpm-pipeline.c       |  6 +-
 .../Transforms/Instrumentation/AllocToken.h   |  2 +-
 llvm/lib/Passes/PassBuilderPipelines.cpp      | 22 ++++++-
 .../Transforms/Instrumentation/AllocToken.cpp | 66 ++++++++++---------
 .../CodeGen/AArch64/print-pipeline-passes.ll  |  2 +-
 .../AllocToken/module-flags.ll                | 15 +++--
 llvm/test/Other/new-pm-O0-defaults.ll         | 12 ++--
 llvm/test/Other/new-pm-defaults.ll            |  1 +
 llvm/test/Other/new-pm-lto-defaults.ll        |  1 -
 10 files changed, 76 insertions(+), 54 deletions(-)

diff --git a/clang/lib/CodeGen/BackendUtil.cpp 
b/clang/lib/CodeGen/BackendUtil.cpp
index 5590d217e96ff..1462c686f4053 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -239,7 +239,8 @@ static AllocTokenOptions getAllocTokenOptions(const 
LangOptions &LangOpts,
   AllocTokenOptions Opts;
   if (LangOpts.AllocTokenMode)
     Opts.Mode = *LangOpts.AllocTokenMode;
-  Opts.MaxTokens = LangOpts.AllocTokenMax;
+  if (LangOpts.AllocTokenMax)
+    Opts.MaxTokens = *LangOpts.AllocTokenMax;
   Opts.Extended = CGOpts.SanitizeAllocTokenExtended;
   Opts.FastABI = CGOpts.SanitizeAllocTokenFastABI;
   return Opts;
diff --git a/clang/test/CodeGen/lto-newpm-pipeline.c 
b/clang/test/CodeGen/lto-newpm-pipeline.c
index dceaaf136ebfc..5673c72b49eff 100644
--- a/clang/test/CodeGen/lto-newpm-pipeline.c
+++ b/clang/test/CodeGen/lto-newpm-pipeline.c
@@ -33,11 +33,10 @@
 // CHECK-FULL-O0-NEXT: Running analysis: ProfileSummaryAnalysis
 // CHECK-FULL-O0-NEXT: Running pass: CoroConditionalWrapper
 // CHECK-FULL-O0-NEXT: Running pass: AllocTokenPass
-// CHECK-FULL-O0-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
-// CHECK-FULL-O0-NEXT: Running analysis: TargetLibraryAnalysis
 // CHECK-FULL-O0-NEXT: Running pass: CanonicalizeAliasesPass
 // CHECK-FULL-O0-NEXT: Running pass: NameAnonGlobalPass
 // CHECK-FULL-O0-NEXT: Running pass: AnnotationRemarksPass
+// CHECK-FULL-O0-NEXT: Running analysis: TargetLibraryAnalysis
 // CHECK-FULL-O0-NEXT: Running pass: VerifierPass
 // CHECK-FULL-O0-NEXT: Running pass: BitcodeWriterPass
 
@@ -49,11 +48,10 @@
 // CHECK-THIN-O0-NEXT: Running analysis: ProfileSummaryAnalysis
 // CHECK-THIN-O0-NEXT: Running pass: CoroConditionalWrapper
 // CHECK-THIN-O0-NEXT: Running pass: AllocTokenPass
-// CHECK-THIN-O0-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
-// CHECK-THIN-O0-NEXT: Running analysis: TargetLibraryAnalysis
 // CHECK-THIN-O0-NEXT: Running pass: CanonicalizeAliasesPass
 // CHECK-THIN-O0-NEXT: Running pass: NameAnonGlobalPass
 // CHECK-THIN-O0-NEXT: Running pass: AnnotationRemarksPass
+// CHECK-THIN-O0-NEXT: Running analysis: TargetLibraryAnalysis
 // CHECK-THIN-O0-NEXT: Running pass: VerifierPass
 // CHECK-THIN-O0-NEXT: Running pass: ThinLTOBitcodeWriterPass
 
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AllocToken.h 
b/llvm/include/llvm/Transforms/Instrumentation/AllocToken.h
index 077703c214745..299fc03c5d96b 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AllocToken.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AllocToken.h
@@ -25,7 +25,7 @@ class Module;
 
 struct AllocTokenOptions {
   AllocTokenMode Mode = DefaultAllocTokenMode;
-  std::optional<uint64_t> MaxTokens;
+  uint64_t MaxTokens = 0;
   bool FastABI = false;
   bool Extended = false;
   AllocTokenOptions() = default;
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp 
b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 4e68344136c38..c6beb3fdf09bd 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1452,7 +1452,6 @@ ModulePassManager
 PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
                                              ThinOrFullLTOPhase LTOPhase) {
   const bool LTOPreLink = isLTOPreLink(LTOPhase);
-  const bool LTOPostLink = isLTOPostLink(LTOPhase);
   ModulePassManager MPM;
 
   // Run partial inlining pass to partially inline functions that have
@@ -1617,7 +1616,9 @@ 
PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
                                                 
PTO.EagerlyInvalidateAnalyses));
 
-  if (LTOPostLink)
+  // AllocToken transforms heap allocation calls; this needs to run late after
+  // other allocation call transformations (such as those in InstCombine).
+  if (!LTOPreLink)
     MPM.addPass(AllocTokenPass());
 
   invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
@@ -1858,7 +1859,11 @@ ModulePassManager 
PassBuilder::buildThinLTODefaultPipeline(
     MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
                                    lowertypetests::DropTestKind::Assume));
     MPM.addPass(buildCoroWrapper(ThinOrFullLTOPhase::ThinLTOPostLink));
+
+    // AllocToken transforms heap allocation calls; this needs to run late 
after
+    // other allocation call transformations (such as those in InstCombine).
     MPM.addPass(AllocTokenPass());
+
     // Drop available_externally and unreferenced globals. This is necessary
     // with ThinLTO in order to avoid leaving undefined references to dead
     // globals in the object file.
@@ -1919,6 +1924,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel 
Level,
                                    lowertypetests::DropTestKind::Assume));
 
     MPM.addPass(buildCoroWrapper(ThinOrFullLTOPhase::FullLTOPostLink));
+
+    // AllocToken transforms heap allocation calls; this needs to run late 
after
+    // other allocation call transformations (such as those in InstCombine).
     MPM.addPass(AllocTokenPass());
 
     invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
@@ -2007,6 +2015,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel 
Level,
                                    lowertypetests::DropTestKind::Assume));
 
     MPM.addPass(buildCoroWrapper(ThinOrFullLTOPhase::FullLTOPostLink));
+
+    // AllocToken transforms heap allocation calls; this needs to run late 
after
+    // other allocation call transformations (such as those in InstCombine).
     MPM.addPass(AllocTokenPass());
 
     invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
@@ -2242,6 +2253,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel 
Level,
     MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
 
   MPM.addPass(CoroCleanupPass());
+
+  // AllocToken transforms heap allocation calls; this needs to run late after
+  // other allocation call transformations (such as those in InstCombine).
   MPM.addPass(AllocTokenPass());
 
   invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
@@ -2360,7 +2374,9 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel 
Level,
 
   MPM.addPass(buildCoroWrapper(Phase));
 
-  if (isLTOPostLink(Phase))
+  // AllocToken transforms heap allocation calls; this needs to run late after
+  // other allocation call transformations (such as those in InstCombine).
+  if (!isLTOPreLink(Phase))
     MPM.addPass(AllocTokenPass());
 
   invokeOptimizerLastEPCallbacks(MPM, Level, Phase);
diff --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp 
b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
index 1be0ab802e177..13f7a46af5481 100644
--- a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp
@@ -67,24 +67,24 @@ cl::opt<std::string> ClFuncPrefix("alloc-token-prefix",
                                   cl::desc("The allocation function prefix"),
                                   cl::Hidden, cl::init("__alloc_token_"));
 
-cl::opt<uint64_t>
+cl::opt<std::optional<uint64_t>, false, cl::parser<uint64_t>>
     ClMaxTokens("alloc-token-max",
                 cl::desc("Maximum number of tokens (0 = target SIZE_MAX)"),
-                cl::Hidden, cl::init(0));
+                cl::Hidden, cl::init(std::nullopt));
 
-cl::opt<bool>
+cl::opt<std::optional<bool>, false, cl::parser<bool>>
     ClFastABI("alloc-token-fast-abi",
               cl::desc("The token ID is encoded in the function name"),
-              cl::Hidden, cl::init(false));
+              cl::Hidden, cl::init(std::nullopt));
 
 // Instrument libcalls only by default - compatible allocators only need to 
take
 // care of providing standard allocation functions. With extended coverage, 
also
 // instrument non-libcall allocation function calls with !alloc_token
 // metadata.
-cl::opt<bool>
+cl::opt<std::optional<bool>, false, cl::parser<bool>>
     ClExtended("alloc-token-extended",
                cl::desc("Extend coverage to custom allocation functions"),
-               cl::Hidden, cl::init(false));
+               cl::Hidden, cl::init(std::nullopt));
 
 // C++ defines ::operator new (and variants) as replaceable (vs. standard
 // library versions), which are nobuiltin, and are therefore not covered by
@@ -237,11 +237,6 @@ class TypeHashPointerSplitMode : public TypeHashMode {
 // Apply opt overrides and module flags.
 static AllocTokenOptions resolveOptions(AllocTokenOptions Opts,
                                         const Module &M) {
-  if (!Opts.MaxTokens.has_value())
-    Opts.MaxTokens = ClMaxTokens;
-  Opts.FastABI |= ClFastABI;
-  Opts.Extended |= ClExtended;
-
   auto IntModuleFlagOrNull = [&](StringRef Key) {
     return mdconst::extract_or_null<ConstantInt>(M.getModuleFlag(Key));
   };
@@ -249,16 +244,21 @@ static AllocTokenOptions resolveOptions(AllocTokenOptions 
Opts,
   if (auto *S = 
dyn_cast_or_null<MDString>(M.getModuleFlag("alloc-token-mode")))
     if (auto Mode = getAllocTokenModeFromString(S->getString()))
       Opts.Mode = *Mode;
-
   if (auto *Val = IntModuleFlagOrNull("alloc-token-max"))
     Opts.MaxTokens = Val->getZExtValue();
-
   if (auto *Val = IntModuleFlagOrNull("alloc-token-fast-abi"))
     Opts.FastABI |= Val->isOne();
-
   if (auto *Val = IntModuleFlagOrNull("alloc-token-extended"))
     Opts.Extended |= Val->isOne();
 
+  // Allow overriding options from command line options.
+  if (ClMaxTokens.has_value())
+    Opts.MaxTokens = *ClMaxTokens;
+  if (ClFastABI.has_value())
+    Opts.FastABI = *ClFastABI;
+  if (ClExtended.has_value())
+    Opts.Extended = *ClExtended;
+
   return Opts;
 }
 
@@ -268,19 +268,19 @@ class AllocToken {
                       ModuleAnalysisManager &MAM)
       : Options(resolveOptions(std::move(Opts), M)), Mod(M),
         FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
-        Mode(IncrementMode(*IntPtrTy, *Options.MaxTokens)) {
+        Mode(IncrementMode(*IntPtrTy, Options.MaxTokens)) {
     switch (Options.Mode) {
     case TokenMode::Increment:
       break;
     case TokenMode::Random:
-      Mode.emplace<RandomMode>(*IntPtrTy, *Options.MaxTokens,
+      Mode.emplace<RandomMode>(*IntPtrTy, Options.MaxTokens,
                                M.createRNG(DEBUG_TYPE));
       break;
     case TokenMode::TypeHash:
-      Mode.emplace<TypeHashMode>(*IntPtrTy, *Options.MaxTokens);
+      Mode.emplace<TypeHashMode>(*IntPtrTy, Options.MaxTokens);
       break;
     case TokenMode::TypeHashPointerSplit:
-      Mode.emplace<TypeHashPointerSplitMode>(*IntPtrTy, *Options.MaxTokens);
+      Mode.emplace<TypeHashPointerSplitMode>(*IntPtrTy, Options.MaxTokens);
       break;
     }
   }
@@ -337,8 +337,6 @@ bool AllocToken::instrumentFunction(Function &F) {
   if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
     return false;
 
-  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-  auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
   SmallVector<std::pair<CallBase *, LibFunc>, 4> AllocCalls;
   SmallVector<IntrinsicInst *, 4> IntrinsicInsts;
 
@@ -347,6 +345,10 @@ bool AllocToken::instrumentFunction(Function &F) {
       F.hasFnAttribute(Attribute::SanitizeAllocToken) &&
       !F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation);
 
+  // Get TLI only when required.
+  const TargetLibraryInfo *TLI =
+      InstrumentFunction ? &FAM.getResult<TargetLibraryAnalysis>(F) : nullptr;
+
   // Collect all allocation calls to avoid iterator invalidation.
   for (Instruction &I : instructions(F)) {
     // Collect all alloc_token_* intrinsics.
@@ -362,26 +364,28 @@ bool AllocToken::instrumentFunction(Function &F) {
     auto *CB = dyn_cast<CallBase>(&I);
     if (!CB)
       continue;
-    if (std::optional<LibFunc> Func = shouldInstrumentCall(*CB, TLI))
+    if (std::optional<LibFunc> Func = shouldInstrumentCall(*CB, *TLI))
       AllocCalls.emplace_back(CB, Func.value());
   }
 
+  // Return early to avoid unnecessarily instantiating the ORE.
+  if (AllocCalls.empty() && IntrinsicInsts.empty())
+    return false;
+
+  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
   bool Modified = false;
 
-  if (!AllocCalls.empty()) {
-    for (auto &[CB, Func] : AllocCalls)
-      Modified |= replaceAllocationCall(CB, Func, ORE, TLI);
-    if (Modified)
-      NumFunctionsModified++;
-  }
+  for (auto &[CB, Func] : AllocCalls)
+    Modified |= replaceAllocationCall(CB, Func, ORE, *TLI);
 
-  if (!IntrinsicInsts.empty()) {
-    for (auto *II : IntrinsicInsts)
-      replaceIntrinsicInst(II, ORE);
+  for (auto *II : IntrinsicInsts) {
+    replaceIntrinsicInst(II, ORE);
     Modified = true;
-    NumFunctionsModified++;
   }
 
+  if (Modified)
+    NumFunctionsModified++;
+
   return Modified;
 }
 
diff --git a/llvm/test/CodeGen/AArch64/print-pipeline-passes.ll 
b/llvm/test/CodeGen/AArch64/print-pipeline-passes.ll
index 5852f97a63798..86090324c770c 100644
--- a/llvm/test/CodeGen/AArch64/print-pipeline-passes.ll
+++ b/llvm/test/CodeGen/AArch64/print-pipeline-passes.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -mtriple=aarch64 -S -passes='default<O2>' -print-pipeline-passes < 
%s | FileCheck %s
 
 ; CHECK: loop-idiom-vectorize
-; O0: 
{{^}}function(ee-instrument<>),always-inline,coro-cond(coro-early,cgscc(coro-split),coro-cleanup,globaldce),function(annotation-remarks),verify,print{{$}}
+; O0: 
{{^}}function(ee-instrument<>),always-inline,coro-cond(coro-early,cgscc(coro-split),coro-cleanup,globaldce),alloc-token,function(annotation-remarks),verify,print{{$}}
 
 define void @foo() {
 entry:
diff --git a/llvm/test/Instrumentation/AllocToken/module-flags.ll 
b/llvm/test/Instrumentation/AllocToken/module-flags.ll
index d92c22ba35fb2..7b86510fe6eaf 100644
--- a/llvm/test/Instrumentation/AllocToken/module-flags.ll
+++ b/llvm/test/Instrumentation/AllocToken/module-flags.ll
@@ -1,6 +1,7 @@
 ; Test that all supported module flags are retrieved correctly.
 ;
-; RUN: opt < %s -passes='inferattrs,alloc-token' -S | FileCheck %s
+; RUN: opt < %s -passes='inferattrs,alloc-token' -S | FileCheck %s 
--check-prefixes=CHECK,DEFAULT
+; RUN: opt < %s -passes='inferattrs,alloc-token' -alloc-token-max=2 
-alloc-token-fast-abi=0 -alloc-token-extended=0 -S | FileCheck %s 
--check-prefixes=CHECK,OVERRIDE
 
 target datalayout = 
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 
@@ -10,10 +11,14 @@ declare ptr @my_malloc(i64)
 
 define void @test() sanitize_alloc_token {
 ; CHECK-LABEL: define void @test(
-; CHECK: call ptr @__alloc_token_0_malloc(i64 8)
-; CHECK: call ptr @__alloc_token_1__Znwm(i64 8)
-; CHECK: call ptr @__alloc_token_2_malloc(i64 8)
-; CHECK: call ptr @__alloc_token_0_my_malloc(i64 8)
+; DEFAULT: call ptr @__alloc_token_0_malloc(i64 8)
+; DEFAULT: call ptr @__alloc_token_1__Znwm(i64 8)
+; DEFAULT: call ptr @__alloc_token_2_malloc(i64 8)
+; DEFAULT: call ptr @__alloc_token_0_my_malloc(i64 8)
+; OVERRIDE: call ptr @__alloc_token_malloc(i64 8, i64 0)
+; OVERRIDE: call ptr @__alloc_token__Znwm(i64 8, i64 1)
+; OVERRIDE: call ptr @__alloc_token_malloc(i64 8, i64 0)
+; OVERRIDE: call ptr @my_malloc(i64 8)
   %1 = call ptr @malloc(i64 8)
   %2 = call ptr @_Znwm(i64 8)
   %3 = call ptr @malloc(i64 8)
diff --git a/llvm/test/Other/new-pm-O0-defaults.ll 
b/llvm/test/Other/new-pm-O0-defaults.ll
index 8087f745680b9..a7f43d1fc4591 100644
--- a/llvm/test/Other/new-pm-O0-defaults.ll
+++ b/llvm/test/Other/new-pm-O0-defaults.ll
@@ -9,13 +9,13 @@
 
 ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -debug-pass-manager 
\
 ; RUN:     -passes='default<O0>' -S %s 2>&1 \
-; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-DEFAULT,CHECK-CORO
+; RUN:     | FileCheck %s 
--check-prefixes=CHECK,CHECK-DEFAULT,CHECK-CORO,CHECK-ALLOCTOKEN
 ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -debug-pass-manager 
-enable-matrix \
 ; RUN:     -passes='default<O0>' -S %s 2>&1 \
-; RUN:     | FileCheck %s 
--check-prefixes=CHECK,CHECK-DEFAULT,CHECK-MATRIX,CHECK-CORO
+; RUN:     | FileCheck %s 
--check-prefixes=CHECK,CHECK-DEFAULT,CHECK-MATRIX,CHECK-CORO,CHECK-ALLOCTOKEN
 ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -debug-pass-manager 
-debug-info-for-profiling \
 ; RUN:     -passes='default<O0>' -S %s 2>&1 \
-; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-DIS,CHECK-CORO
+; RUN:     | FileCheck %s 
--check-prefixes=CHECK,CHECK-DIS,CHECK-CORO,CHECK-ALLOCTOKEN
 ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -debug-pass-manager 
\
 ; RUN:     -passes='thinlto-pre-link<O0>' -S %s 2>&1 \
 ; RUN:     | FileCheck %s 
--check-prefixes=CHECK,CHECK-DEFAULT,CHECK-PRE-LINK,CHECK-CORO
@@ -41,14 +41,13 @@
 ; CHECK-MATRIX: Running pass: LowerMatrixIntrinsicsPass
 ; CHECK-MATRIX-NEXT: Running analysis: TargetIRAnalysis
 ; CHECK-CORO-NEXT: Running pass: CoroConditionalWrapper
+; CHECK-ALLOCTOKEN-NEXT: Running pass: AllocTokenPass
 ; CHECK-PRE-LINK: Running pass: CanonicalizeAliasesPass
 ; CHECK-PRE-LINK-NEXT: Running pass: NameAnonGlobalPass
 ; CHECK-THINLTO: Running pass: LowerTypeTestsPass
 ; CHECK-THINLTO-NEXT: Running pass: CoroConditionalWrapper
 ; CHECK-THINLTO-NEXT: Running pass: AllocTokenPass
 ; CHECK-THINLTO-NEXT: Running analysis: InnerAnalysisManagerProxy
-; CHECK-THINLTO-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
-; CHECK-THINLTO-NEXT: Running analysis: TargetLibraryAnalysis
 ; CHECK-THINLTO-NEXT: Running pass: EliminateAvailableExternallyPass
 ; CHECK-THINLTO-NEXT: Running pass: GlobalDCEPass
 ; CHECK-LTO: Running pass: CrossDSOCFIPass on [module]
@@ -58,11 +57,10 @@
 ; CHECK-LTO-NEXT: Running pass: LowerTypeTestsPass
 ; CHECK-LTO-NEXT: CoroConditionalWrapper
 ; CHECK-LTO-NEXT: Running pass: AllocTokenPass
-; CHECK-LTO-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
-; CHECK-LTO-NEXT: Running analysis: TargetLibraryAnalysis
 ; CHECK-CORO-NEXT: Running pass: AnnotationRemarksPass
 ; CHECK-CORO-NEXT: Running analysis: TargetLibraryAnalysis
 ; CHECK-LTO-NEXT: Running pass: AnnotationRemarksPass
+; CHECK-LTO-NEXT: Running analysis: TargetLibraryAnalysis
 ; CHECK-NEXT: Running pass: PrintModulePass
 
 ; Make sure we get the IR back out without changes when we print the module.
diff --git a/llvm/test/Other/new-pm-defaults.ll 
b/llvm/test/Other/new-pm-defaults.ll
index 1f437a662cc96..f074b2fdd3ab8 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -285,6 +285,7 @@
 ; CHECK-O-NEXT: Running pass: DivRemPairsPass
 ; CHECK-O-NEXT: Running pass: TailCallElimPass
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
+; CHECK-DEFAULT-NEXT: Running pass: AllocToken
 ; CHECK-EP-OPTIMIZER-LAST: Running pass: NoOpModulePass
 ; CHECK-HOT-COLD-SPLIT-NEXT: Running pass: HotColdSplittingPass
 ; CHECK-IR-OUTLINER-NEXT: Running pass: IROutlinerPass
diff --git a/llvm/test/Other/new-pm-lto-defaults.ll 
b/llvm/test/Other/new-pm-lto-defaults.ll
index 0dea345a8125a..de0feca55e5b2 100644
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -164,7 +164,6 @@
 ; CHECK-O1-NEXT: Running pass: CoroConditionalWrapper
 ; CHECK-O23SZ-NEXT: Running pass: CoroCleanupPass
 ; CHECK-O-NEXT: Running pass: AllocTokenPass
-; CHECK-O1-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
 ; CHECK-EP-NEXT: Running pass: NoOpModulePass
 ; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo
 ; CHECK-O-NEXT: Running pass: PrintModulePass

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to