date:20240809

[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/100523

>From ef1f347a20205255e14735e0809c2168f4124556 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 25 Jul 2024 10:38:11 +0400
Subject: [PATCH] TTI: Check legalization cost of abs nodes

Also adjust the AMDGPU cost.
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h  |  32 +-
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  |   9 +-
 llvm/test/Analysis/CostModel/AMDGPU/abs.ll| 368 +-
 .../Analysis/CostModel/AMDGPU/arith-ssat.ll   |  32 +-
 .../Analysis/CostModel/AMDGPU/arith-usat.ll   |  32 +-
 5 files changed, 242 insertions(+), 231 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 8a14c8a37577ad..c2bc1353ee8838 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2120,20 +2120,9 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 case Intrinsic::vector_reduce_fminimum:
   return 
thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
  VecOpTy, ICA.getFlags(), 
CostKind);
-case Intrinsic::abs: {
-  // abs(X) = select(icmp(X,0),X,sub(0,X))
-  Type *CondTy = RetTy->getWithNewBitWidth(1);
-  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
-  InstructionCost Cost = 0;
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
-  Pred, CostKind);
-  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
-  Pred, CostKind);
-  // TODO: Should we add an OperandValueProperties::OP_Zero property?
-  Cost += thisT()->getArithmeticInstrCost(
- BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, 
TTI::OP_None});
-  return Cost;
-}
+case Intrinsic::abs:
+  ISD = ISD::ABS;
+  break;
 case Intrinsic::smax:
   ISD = ISD::SMAX;
   break;
@@ -2402,6 +2391,21 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, 
CostKind);
   return Cost;
 }
+case Intrinsic::abs: {
+  // abs(X) = select(icmp(X,0),X,sub(0,X))
+  Type *CondTy = RetTy->getWithNewBitWidth(1);
+  CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+  InstructionCost Cost = 0;
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+  Pred, CostKind);
+  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, 
CondTy,
+  Pred, CostKind);
+  // TODO: Should we add an OperandValueProperties::OP_Zero property?
+  Cost += thisT()->getArithmeticInstrCost(
+  BinaryOperator::Sub, RetTy, CostKind,
+  {TTI::OK_UniformConstantValue, TTI::OP_None});
+  return Cost;
+}
 case Intrinsic::fptosi_sat:
 case Intrinsic::fptoui_sat: {
   if (Tys.empty())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 8d4ff64ac5adcf..c6aadf0b503012 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -696,6 +696,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID 
ID) {
   case Intrinsic::usub_sat:
   case Intrinsic::sadd_sat:
   case Intrinsic::ssub_sat:
+  case Intrinsic::abs:
 return true;
   default:
 return false;
@@ -724,7 +725,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
   if (SLT == MVT::f64)
 return LT.first * NElts * get64BitInstrCost(CostKind);
 
-  if ((ST->has16BitInsts() && SLT == MVT::f16) ||
+  if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
   (ST->hasPackedFP32Ops() && SLT == MVT::f32))
 NElts = (NElts + 1) / 2;
 
@@ -752,11 +753,17 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
   case Intrinsic::usub_sat:
   case Intrinsic::sadd_sat:
   case Intrinsic::ssub_sat: {
+// TODO: Full rate for i32/i16
 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
 if (any_of(ValidSatTys, [<](MVT M) { return M == LT.second; }))
   NElts = 1;
 break;
   }
+  case Intrinsic::abs:
+// Expansion takes 2 instructions for VALU
+if (SLT == MVT::i16 || SLT == MVT::i32)
+  InstRate = 2 * getFullRateInstrCost();
+break;
   default:
 break;
   }
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
index f65615b07abc0f..b86e99558377bb 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -14,116 +14,116 @@ define void @abs_nonpoison() {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = 
call

[llvm-branch-commits] [clang] [llvm] release/19.x: Revert "demangle function names in trace files (#87626)" (PR #102552)

2024-08-09 Thread Nikita Popov via llvm-branch-commits


https://github.com/nikic approved this pull request.


https://github.com/llvm/llvm-project/pull/102552
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

### Merge activity

* **Aug 9, 4:27 AM EDT**: @arsenm started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/100523).


https://github.com/llvm/llvm-project/pull/100523
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Correct costs of saturating add/sub intrinsics (PR #100808)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

### Merge activity

* **Aug 9, 4:27 AM EDT**: @arsenm started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/100808).


https://github.com/llvm/llvm-project/pull/100808
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LAA] Refine stride checks for SCEVs during dependence analysis. (#99… (PR #102201)

2024-08-09 Thread Michael Kruse via llvm-branch-commits


https://github.com/Meinersbur approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/102201
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics (PR #102599)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/102599

This will be needed to continue generating the raw instruction in the flat case.

>From be3f530768b923491b5747bac5b005779bd46a7e Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 9 Aug 2024 14:51:41 +0400
Subject: [PATCH] AMDGPU: Add noalias.addrspace metadata when autoupgrading
 atomic intrinsics

This will be needed to continue generating the raw instruction in the flat case.
---
 llvm/lib/IR/AutoUpgrade.cpp| 13 +-
 llvm/test/Bitcode/amdgcn-atomic.ll | 39 --
 2 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index ec719754183d5d..3f6ccbebd35ef2 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -34,9 +34,11 @@
 #include "llvm/IR/IntrinsicsWebAssembly.h"
 #include "llvm/IR/IntrinsicsX86.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
+#include "llvm/Support/AMDGPUAddrSpace.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Regex.h"
@@ -4096,11 +4098,20 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef 
Name, CallBase *CI,
   AtomicRMWInst *RMW =
   Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
 
-  if (PtrTy->getAddressSpace() != 3) {
+  unsigned AddrSpace = PtrTy->getAddressSpace();
+  if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
 RMW->setMetadata("amdgpu.no.fine.grained.memory",
  MDNode::get(F->getContext(), {}));
   }
 
+  if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
+MDBuilder MDB(F->getContext());
+MDNode *RangeNotPrivate =
+MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS),
+APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1));
+RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
+  }
+
   if (IsVolatile)
 RMW->setVolatile(true);
 
diff --git a/llvm/test/Bitcode/amdgcn-atomic.ll 
b/llvm/test/Bitcode/amdgcn-atomic.ll
index a114c27bafd4a2..5feba38e635f32 100644
--- a/llvm/test/Bitcode/amdgcn-atomic.ll
+++ b/llvm/test/Bitcode/amdgcn-atomic.ll
@@ -2,10 +2,10 @@
 
 
 define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 0, 
i32 0, i1 false)
 
-  ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
   %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, 
i32 43, i32 0, i32 0, i1 false)
 
   ; CHECK: atomicrmw uinc_wrap ptr addrspace(3) %ptr3, i32 46 
syncscope("agent") seq_cst, align 4{{$}}
@@ -26,10 +26,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, 
ptr addrspace(3) %ptr
 }
 
 define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, 
i32 0, i1 false)
 
-  ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
   %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, 
i32 43, i32 0, i32 0, i1 false)
 
   ; CHECK: atomicrmw udec_wrap ptr addrspace(3) %ptr3, i32 46 
syncscope("agent") seq_cst, align 4{{$}}
@@ -51,49 +51,49 @@ define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, 
ptr addrspace(3) %ptr
 
 ; Test some invalid ordering handling
 define void @ordering(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") 
seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") 
seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 
-1, i32 0, i1 true)
 
-  ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1,

[llvm-branch-commits] [llvm] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics (PR #102599)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/102599?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#102599** https://app.graphite.dev/github/pr/llvm/llvm-project/102599?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#102461** https://app.graphite.dev/github/pr/llvm/llvm-project/102461?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>: 1 other dependent PR 
([#102462](https://github.com/llvm/llvm-project/pull/102462) https://app.graphite.dev/github/pr/llvm/llvm-project/102462?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>)
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/102599
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics (PR #102599)

2024-08-09 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes

This will be needed to continue generating the raw instruction in the flat case.

---
Full diff: https://github.com/llvm/llvm-project/pull/102599.diff


2 Files Affected:

- (modified) llvm/lib/IR/AutoUpgrade.cpp (+12-1) 
- (modified) llvm/test/Bitcode/amdgcn-atomic.ll (+21-18) 


``diff
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index ec719754183d5d..3f6ccbebd35ef2 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -34,9 +34,11 @@
 #include "llvm/IR/IntrinsicsWebAssembly.h"
 #include "llvm/IR/IntrinsicsX86.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
+#include "llvm/Support/AMDGPUAddrSpace.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Regex.h"
@@ -4096,11 +4098,20 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef 
Name, CallBase *CI,
   AtomicRMWInst *RMW =
   Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
 
-  if (PtrTy->getAddressSpace() != 3) {
+  unsigned AddrSpace = PtrTy->getAddressSpace();
+  if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
 RMW->setMetadata("amdgpu.no.fine.grained.memory",
  MDNode::get(F->getContext(), {}));
   }
 
+  if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
+MDBuilder MDB(F->getContext());
+MDNode *RangeNotPrivate =
+MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS),
+APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1));
+RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
+  }
+
   if (IsVolatile)
 RMW->setVolatile(true);
 
diff --git a/llvm/test/Bitcode/amdgcn-atomic.ll 
b/llvm/test/Bitcode/amdgcn-atomic.ll
index a114c27bafd4a2..5feba38e635f32 100644
--- a/llvm/test/Bitcode/amdgcn-atomic.ll
+++ b/llvm/test/Bitcode/amdgcn-atomic.ll
@@ -2,10 +2,10 @@
 
 
 define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 0, 
i32 0, i1 false)
 
-  ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
   %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, 
i32 43, i32 0, i32 0, i1 false)
 
   ; CHECK: atomicrmw uinc_wrap ptr addrspace(3) %ptr3, i32 46 
syncscope("agent") seq_cst, align 4{{$}}
@@ -26,10 +26,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, 
ptr addrspace(3) %ptr
 }
 
 define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, 
i32 0, i1 false)
 
-  ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
   %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, 
i32 43, i32 0, i32 0, i1 false)
 
   ; CHECK: atomicrmw udec_wrap ptr addrspace(3) %ptr3, i32 46 
syncscope("agent") seq_cst, align 4{{$}}
@@ -51,49 +51,49 @@ define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, 
ptr addrspace(3) %ptr
 
 ; Test some invalid ordering handling
 define void @ordering(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") 
seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") 
seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 
-1, i32 0, i1 true)
 
-  ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
   %result1 = call i32 @llvm.amdgcn

[llvm-branch-commits] [clang] [llvm] clang/AMDGPU: Set noalias.addrspace metadata on atomicrmw (PR #102462)

2024-08-09 Thread via llvm-branch-commits

@@ -647,6 +647,14 @@ class LangOptions : public LangOptionsBase {
 return ConvergentFunctions;
   }

+  /// Return true if atomicrmw operations targeting allocations in private
+  /// memory are undefined.
+  bool threadPrivateMemoryAtomicsAreUndefined() const {
+// Should be false for OpenMP.
+// TODO: Should this be true for SYCL?
+return OpenCL || CUDA;

gonzalobg wrote:

> @gonzalobg -- Does NVIDIA define what happens if atomics are used on local 
> address space?

I agree with @arsenm that this is a language property. In CUDA C++, just like 
in C++, the behavior of atomics to automatic variables is well-defined, e.g., 
this is ok:

```
__device__ void foo() {
   cuda::atomic<...> x(0);
   x.fetch_add(1); // OK
}
```

When compiling to PTX, however, `atom` requires global or shared statespaces 
(or generic to those).
That is, for local memory, LLVM must generate code that does not use `atom`.
But that's a problem for the LLVM NVPTX backend to solve.

https://github.com/llvm/llvm-project/pull/102462
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics (PR #102599)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/102599
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics (PR #102599)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/102599

>From 697de6b204b911ba4e34160328c174d9775c237c Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 9 Aug 2024 14:51:41 +0400
Subject: [PATCH] AMDGPU: Add noalias.addrspace metadata when autoupgrading
 atomic intrinsics

This will be needed to continue generating the raw instruction in the flat case.
---
 llvm/lib/IR/AutoUpgrade.cpp| 13 +-
 llvm/test/Bitcode/amdgcn-atomic.ll | 39 --
 2 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index ec719754183d5d..3f6ccbebd35ef2 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -34,9 +34,11 @@
 #include "llvm/IR/IntrinsicsWebAssembly.h"
 #include "llvm/IR/IntrinsicsX86.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
+#include "llvm/Support/AMDGPUAddrSpace.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Regex.h"
@@ -4096,11 +4098,20 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef 
Name, CallBase *CI,
   AtomicRMWInst *RMW =
   Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
 
-  if (PtrTy->getAddressSpace() != 3) {
+  unsigned AddrSpace = PtrTy->getAddressSpace();
+  if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
 RMW->setMetadata("amdgpu.no.fine.grained.memory",
  MDNode::get(F->getContext(), {}));
   }
 
+  if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
+MDBuilder MDB(F->getContext());
+MDNode *RangeNotPrivate =
+MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS),
+APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1));
+RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
+  }
+
   if (IsVolatile)
 RMW->setVolatile(true);
 
diff --git a/llvm/test/Bitcode/amdgcn-atomic.ll 
b/llvm/test/Bitcode/amdgcn-atomic.ll
index a114c27bafd4a2..5feba38e635f32 100644
--- a/llvm/test/Bitcode/amdgcn-atomic.ll
+++ b/llvm/test/Bitcode/amdgcn-atomic.ll
@@ -2,10 +2,10 @@
 
 
 define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 0, 
i32 0, i1 false)
 
-  ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
   %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, 
i32 43, i32 0, i32 0, i1 false)
 
   ; CHECK: atomicrmw uinc_wrap ptr addrspace(3) %ptr3, i32 46 
syncscope("agent") seq_cst, align 4{{$}}
@@ -26,10 +26,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, 
ptr addrspace(3) %ptr
 }
 
 define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, 
i32 0, i1 false)
 
-  ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
   %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, 
i32 43, i32 0, i32 0, i1 false)
 
   ; CHECK: atomicrmw udec_wrap ptr addrspace(3) %ptr3, i32 46 
syncscope("agent") seq_cst, align 4{{$}}
@@ -51,49 +51,49 @@ define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, 
ptr addrspace(3) %ptr
 
 ; Test some invalid ordering handling
 define void @ordering(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") 
seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") 
seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 
-1, i32 0, i1 true)
 
-  ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+

[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)

2024-08-09 Thread Sergio Afonso via llvm-branch-commits


https://github.com/skatrak created 
https://github.com/llvm/llvm-project/pull/102613

After decomposition of OpenMP compound constructs and assignment of applicable 
clauses to each leaf construct, composite constructs are then combined again 
into a single element in the construct queue. This helped later lowering stages 
easily identify composite constructs.

However, as a result of the re-composition stage, the same list of clauses is 
used to produce all MLIR operations corresponding to each leaf of the original 
composite construct. This undoes existing logic introducing implicit clauses 
and deciding to which leaf construct(s) each clause applies.

This patch removes construct re-composition logic and updates Flang lowering to 
be able to identify composite constructs from a list of leaf constructs. As a 
result, the right set of clauses is produced for each operation representing a 
leaf of a composite construct.

>From aa0403a8a137295345b066cebaab9635e4b9886f Mon Sep 17 00:00:00 2001
From: Sergio Afonso 
Date: Fri, 9 Aug 2024 12:58:27 +0100
Subject: [PATCH] [Flang][OpenMP] Prevent re-composition of composite
 constructs

After decomposition of OpenMP compound constructs and assignment of applicable
clauses to each leaf construct, composite constructs are then combined again
into a single element in the construct queue. This helped later lowering stages
easily identify composite constructs.

However, as a result of the re-composition stage, the same list of clauses is
used to produce all MLIR operations corresponding to each leaf of the original
composite construct. This undoes existing logic introducing implicit clauses
and deciding to which leaf construct(s) each clause applies.

This patch removes construct re-composition logic and updates Flang lowering to
be able to identify composite constructs from a list of leaf constructs. As a
result, the right set of clauses is produced for each operation representing a
leaf of a composite construct.
---
 flang/lib/Lower/OpenMP/Decomposer.cpp |  56 +--
 flang/lib/Lower/OpenMP/Decomposer.h   |   7 +-
 flang/lib/Lower/OpenMP/OpenMP.cpp |  93 +++-
 .../Lower/OpenMP/Todo/omp-do-simd-linear.f90  |   2 +-
 .../Lower/OpenMP/default-clause-byref.f90 |   4 +-
 flang/test/Lower/OpenMP/default-clause.f90|   4 +-
 .../Frontend/OpenMP/ConstructCompositionT.h   | 425 --
 7 files changed, 103 insertions(+), 488 deletions(-)
 delete mode 100644 llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h

diff --git a/flang/lib/Lower/OpenMP/Decomposer.cpp 
b/flang/lib/Lower/OpenMP/Decomposer.cpp
index dfd85897469e28..5a7b1078bdd414 100644
--- a/flang/lib/Lower/OpenMP/Decomposer.cpp
+++ b/flang/lib/Lower/OpenMP/Decomposer.cpp
@@ -22,7 +22,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Frontend/OpenMP/ClauseT.h"
-#include "llvm/Frontend/OpenMP/ConstructCompositionT.h"
 #include "llvm/Frontend/OpenMP/ConstructDecompositionT.h"
 #include "llvm/Frontend/OpenMP/OMP.h"
 #include "llvm/Support/raw_ostream.h"
@@ -68,12 +67,6 @@ struct ConstructDecomposition {
 };
 } // namespace
 
-static UnitConstruct mergeConstructs(uint32_t version,
- llvm::ArrayRef units) {
-  tomp::ConstructCompositionT compose(version, units);
-  return compose.merged;
-}
-
 namespace Fortran::lower::omp {
 LLVM_DUMP_METHOD llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
const UnitConstruct &uc) {
@@ -90,38 +83,33 @@ ConstructQueue buildConstructQueue(
 Fortran::lower::pft::Evaluation &eval, const parser::CharBlock &source,
 llvm::omp::Directive compound, const List &clauses) {
 
-  List constructs;
-
   ConstructDecomposition decompose(modOp, semaCtx, eval, compound, clauses);
   assert(!decompose.output.empty() && "Construct decomposition failed");
 
-  llvm::SmallVector loweringUnits;
-  std::ignore =
-  llvm::omp::getLeafOrCompositeConstructs(compound, loweringUnits);
-  uint32_t version = getOpenMPVersionAttribute(modOp);
-
-  int leafIndex = 0;
-  for (llvm::omp::Directive dir_id : loweringUnits) {
-llvm::ArrayRef leafsOrSelf =
-llvm::omp::getLeafConstructsOrSelf(dir_id);
-size_t numLeafs = leafsOrSelf.size();
-
-llvm::ArrayRef toMerge{&decompose.output[leafIndex],
-  numLeafs};
-auto &uc = constructs.emplace_back(mergeConstructs(version, toMerge));
-
-if (!transferLocations(clauses, uc.clauses)) {
-  // If some clauses are left without source information, use the
-  // directive's source.
-  for (auto &clause : uc.clauses) {
-if (clause.source.empty())
-  clause.source = source;
-  }
-}
-leafIndex += numLeafs;
+  for (UnitConstruct &uc : decompose.output) {
+assert(getLeafConstructs(uc.id).empty() && "unexpected compound 
directive");
+//  If some clauses are left without source information, use t

[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)

2024-08-09 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-flang-openmp

Author: Sergio Afonso (skatrak)


Changes

After decomposition of OpenMP compound constructs and assignment of applicable 
clauses to each leaf construct, composite constructs are then combined again 
into a single element in the construct queue. This helped later lowering stages 
easily identify composite constructs.

However, as a result of the re-composition stage, the same list of clauses is 
used to produce all MLIR operations corresponding to each leaf of the original 
composite construct. This undoes existing logic introducing implicit clauses 
and deciding to which leaf construct(s) each clause applies.

This patch removes construct re-composition logic and updates Flang lowering to 
be able to identify composite constructs from a list of leaf constructs. As a 
result, the right set of clauses is produced for each operation representing a 
leaf of a composite construct.

---

Patch is 34.12 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/102613.diff


7 Files Affected:

- (modified) flang/lib/Lower/OpenMP/Decomposer.cpp (+22-34) 
- (modified) flang/lib/Lower/OpenMP/Decomposer.h (+6-1) 
- (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+70-23) 
- (modified) flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 (+1-1) 
- (modified) flang/test/Lower/OpenMP/default-clause-byref.f90 (+2-2) 
- (modified) flang/test/Lower/OpenMP/default-clause.f90 (+2-2) 
- (removed) llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h (-425) 


``diff
diff --git a/flang/lib/Lower/OpenMP/Decomposer.cpp 
b/flang/lib/Lower/OpenMP/Decomposer.cpp
index dfd85897469e28..5a7b1078bdd414 100644
--- a/flang/lib/Lower/OpenMP/Decomposer.cpp
+++ b/flang/lib/Lower/OpenMP/Decomposer.cpp
@@ -22,7 +22,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Frontend/OpenMP/ClauseT.h"
-#include "llvm/Frontend/OpenMP/ConstructCompositionT.h"
 #include "llvm/Frontend/OpenMP/ConstructDecompositionT.h"
 #include "llvm/Frontend/OpenMP/OMP.h"
 #include "llvm/Support/raw_ostream.h"
@@ -68,12 +67,6 @@ struct ConstructDecomposition {
 };
 } // namespace
 
-static UnitConstruct mergeConstructs(uint32_t version,
- llvm::ArrayRef units) {
-  tomp::ConstructCompositionT compose(version, units);
-  return compose.merged;
-}
-
 namespace Fortran::lower::omp {
 LLVM_DUMP_METHOD llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
const UnitConstruct &uc) {
@@ -90,38 +83,33 @@ ConstructQueue buildConstructQueue(
 Fortran::lower::pft::Evaluation &eval, const parser::CharBlock &source,
 llvm::omp::Directive compound, const List &clauses) {
 
-  List constructs;
-
   ConstructDecomposition decompose(modOp, semaCtx, eval, compound, clauses);
   assert(!decompose.output.empty() && "Construct decomposition failed");
 
-  llvm::SmallVector loweringUnits;
-  std::ignore =
-  llvm::omp::getLeafOrCompositeConstructs(compound, loweringUnits);
-  uint32_t version = getOpenMPVersionAttribute(modOp);
-
-  int leafIndex = 0;
-  for (llvm::omp::Directive dir_id : loweringUnits) {
-llvm::ArrayRef leafsOrSelf =
-llvm::omp::getLeafConstructsOrSelf(dir_id);
-size_t numLeafs = leafsOrSelf.size();
-
-llvm::ArrayRef toMerge{&decompose.output[leafIndex],
-  numLeafs};
-auto &uc = constructs.emplace_back(mergeConstructs(version, toMerge));
-
-if (!transferLocations(clauses, uc.clauses)) {
-  // If some clauses are left without source information, use the
-  // directive's source.
-  for (auto &clause : uc.clauses) {
-if (clause.source.empty())
-  clause.source = source;
-  }
-}
-leafIndex += numLeafs;
+  for (UnitConstruct &uc : decompose.output) {
+assert(getLeafConstructs(uc.id).empty() && "unexpected compound 
directive");
+//  If some clauses are left without source information, use the 
directive's
+//  source.
+for (auto &clause : uc.clauses)
+  if (clause.source.empty())
+clause.source = source;
   }
 
-  return constructs;
+  return decompose.output;
+}
+
+bool matchLeafSequence(ConstructQueue::const_iterator item,
+   const ConstructQueue &queue,
+   llvm::ArrayRef directives) {
+  for (auto [dir, leaf] :
+   llvm::zip_longest(directives, llvm::make_range(item, queue.end( {
+if (!dir || !leaf)
+  return false;
+
+if (dir.value() != leaf.value().id)
+  return false;
+  }
+  return true;
 }
 
 bool isLastItemInQueue(ConstructQueue::const_iterator item,
diff --git a/flang/lib/Lower/OpenMP/Decomposer.h 
b/flang/lib/Lower/OpenMP/Decomposer.h
index e85956ffe1a231..6c90e8540d459b 100644
--- a/flang/lib/Lower/OpenMP/Decomposer.h
+++ b/flang/lib/Lower/OpenMP/Decomposer.h
@@ -10,7 +10,6 @@
 
 #include "Clauses.h"
 #include "mlir/IR/BuiltinOps.h"
-#includ

[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)

2024-08-09 Thread Sergio Afonso via llvm-branch-commits


https://github.com/skatrak edited 
https://github.com/llvm/llvm-project/pull/102613
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)

2024-08-09 Thread Krzysztof Parzyszek via llvm-branch-commits



@@ -2263,24 +2321,13 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
 
   // Composite constructs
   case llvm::omp::Directive::OMPD_distribute_parallel_do:
-genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval, loc,
- queue, item, *loopDsp);
-break;
   case llvm::omp::Directive::OMPD_distribute_parallel_do_simd:
-genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval,
- loc, queue, item, *loopDsp);
-break;
   case llvm::omp::Directive::OMPD_distribute_simd:
-genCompositeDistributeSimd(converter, symTable, semaCtx, eval, loc, queue,
-   item, *loopDsp);
-break;
   case llvm::omp::Directive::OMPD_do_simd:
-genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item,
-   *loopDsp);
-break;
   case llvm::omp::Directive::OMPD_taskloop_simd:
-genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, loc, queue,
- item, *loopDsp);
+// Composite constructs should have been split into a sequence of leaf
+// constructs and lowered by genOMPCompositeDispatch().
+llvm_unreachable("Unexpected composite construct.");
 break;
   default:

kparzysz wrote:

Maybe you could delete the specific cases with composite constructs, and add 
`assert(!isCompositeConstruct(dir))` to the default label.

https://github.com/llvm/llvm-project/pull/102613
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)

2024-08-09 Thread Krzysztof Parzyszek via llvm-branch-commits



@@ -2141,13 +2154,50 @@ static void genCompositeTaskloopSimd(
 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
 mlir::Location loc, const ConstructQueue &queue,
 ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
+  assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
   TODO(loc, "Composite TASKLOOP SIMD");
 }
 
 
//===--===//
 // Dispatch
 
//===--===//
 
+static bool genOMPCompositeDispatch(
+lower::AbstractConverter &converter, lower::SymMap &symTable,
+semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+mlir::Location loc, const ConstructQueue &queue,
+ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
+  using llvm::omp::Directive;
+  using llvm::omp::getLeafConstructs, lower::omp::matchLeafSequence;
+
+  if (matchLeafSequence(
+  item, queue,
+  getLeafConstructs(Directive::OMPD_distribute_parallel_do)))

kparzysz wrote:

Maybe `matchLeafSequence` could take the directive, and call 
`getLeafConstructs` itself.  It would make these calls a bit tidier...

https://github.com/llvm/llvm-project/pull/102613
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)

2024-08-09 Thread Krzysztof Parzyszek via llvm-branch-commits



@@ -90,38 +83,33 @@ ConstructQueue buildConstructQueue(
 Fortran::lower::pft::Evaluation &eval, const parser::CharBlock &source,
 llvm::omp::Directive compound, const List &clauses) {
 
-  List constructs;
-
   ConstructDecomposition decompose(modOp, semaCtx, eval, compound, clauses);
   assert(!decompose.output.empty() && "Construct decomposition failed");
 
-  llvm::SmallVector loweringUnits;
-  std::ignore =
-  llvm::omp::getLeafOrCompositeConstructs(compound, loweringUnits);
-  uint32_t version = getOpenMPVersionAttribute(modOp);
-
-  int leafIndex = 0;
-  for (llvm::omp::Directive dir_id : loweringUnits) {
-llvm::ArrayRef leafsOrSelf =
-llvm::omp::getLeafConstructsOrSelf(dir_id);
-size_t numLeafs = leafsOrSelf.size();
-
-llvm::ArrayRef toMerge{&decompose.output[leafIndex],
-  numLeafs};
-auto &uc = constructs.emplace_back(mergeConstructs(version, toMerge));
-
-if (!transferLocations(clauses, uc.clauses)) {
-  // If some clauses are left without source information, use the
-  // directive's source.
-  for (auto &clause : uc.clauses) {
-if (clause.source.empty())
-  clause.source = source;
-  }
-}
-leafIndex += numLeafs;
+  for (UnitConstruct &uc : decompose.output) {
+assert(getLeafConstructs(uc.id).empty() && "unexpected compound 
directive");
+//  If some clauses are left without source information, use the 
directive's
+//  source.
+for (auto &clause : uc.clauses)
+  if (clause.source.empty())
+clause.source = source;
   }
 
-  return constructs;
+  return decompose.output;
+}
+
+bool matchLeafSequence(ConstructQueue::const_iterator item,
+   const ConstructQueue &queue,
+   llvm::ArrayRef directives) {
+  for (auto [dir, leaf] :
+   llvm::zip_longest(directives, llvm::make_range(item, queue.end( {
+if (!dir || !leaf)

kparzysz wrote:

`0` is a legitimate directive id (`OMPD_allocate`).

https://github.com/llvm/llvm-project/pull/102613
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)

2024-08-09 Thread Sergio Afonso via llvm-branch-commits


https://github.com/skatrak updated 
https://github.com/llvm/llvm-project/pull/102613

>From aa0403a8a137295345b066cebaab9635e4b9886f Mon Sep 17 00:00:00 2001
From: Sergio Afonso 
Date: Fri, 9 Aug 2024 12:58:27 +0100
Subject: [PATCH 1/2] [Flang][OpenMP] Prevent re-composition of composite
 constructs

After decomposition of OpenMP compound constructs and assignment of applicable
clauses to each leaf construct, composite constructs are then combined again
into a single element in the construct queue. This helped later lowering stages
easily identify composite constructs.

However, as a result of the re-composition stage, the same list of clauses is
used to produce all MLIR operations corresponding to each leaf of the original
composite construct. This undoes existing logic introducing implicit clauses
and deciding to which leaf construct(s) each clause applies.

This patch removes construct re-composition logic and updates Flang lowering to
be able to identify composite constructs from a list of leaf constructs. As a
result, the right set of clauses is produced for each operation representing a
leaf of a composite construct.
---
 flang/lib/Lower/OpenMP/Decomposer.cpp |  56 +--
 flang/lib/Lower/OpenMP/Decomposer.h   |   7 +-
 flang/lib/Lower/OpenMP/OpenMP.cpp |  93 +++-
 .../Lower/OpenMP/Todo/omp-do-simd-linear.f90  |   2 +-
 .../Lower/OpenMP/default-clause-byref.f90 |   4 +-
 flang/test/Lower/OpenMP/default-clause.f90|   4 +-
 .../Frontend/OpenMP/ConstructCompositionT.h   | 425 --
 7 files changed, 103 insertions(+), 488 deletions(-)
 delete mode 100644 llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h

diff --git a/flang/lib/Lower/OpenMP/Decomposer.cpp 
b/flang/lib/Lower/OpenMP/Decomposer.cpp
index dfd85897469e28..5a7b1078bdd414 100644
--- a/flang/lib/Lower/OpenMP/Decomposer.cpp
+++ b/flang/lib/Lower/OpenMP/Decomposer.cpp
@@ -22,7 +22,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Frontend/OpenMP/ClauseT.h"
-#include "llvm/Frontend/OpenMP/ConstructCompositionT.h"
 #include "llvm/Frontend/OpenMP/ConstructDecompositionT.h"
 #include "llvm/Frontend/OpenMP/OMP.h"
 #include "llvm/Support/raw_ostream.h"
@@ -68,12 +67,6 @@ struct ConstructDecomposition {
 };
 } // namespace
 
-static UnitConstruct mergeConstructs(uint32_t version,
- llvm::ArrayRef units) {
-  tomp::ConstructCompositionT compose(version, units);
-  return compose.merged;
-}
-
 namespace Fortran::lower::omp {
 LLVM_DUMP_METHOD llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
const UnitConstruct &uc) {
@@ -90,38 +83,33 @@ ConstructQueue buildConstructQueue(
 Fortran::lower::pft::Evaluation &eval, const parser::CharBlock &source,
 llvm::omp::Directive compound, const List &clauses) {
 
-  List constructs;
-
   ConstructDecomposition decompose(modOp, semaCtx, eval, compound, clauses);
   assert(!decompose.output.empty() && "Construct decomposition failed");
 
-  llvm::SmallVector loweringUnits;
-  std::ignore =
-  llvm::omp::getLeafOrCompositeConstructs(compound, loweringUnits);
-  uint32_t version = getOpenMPVersionAttribute(modOp);
-
-  int leafIndex = 0;
-  for (llvm::omp::Directive dir_id : loweringUnits) {
-llvm::ArrayRef leafsOrSelf =
-llvm::omp::getLeafConstructsOrSelf(dir_id);
-size_t numLeafs = leafsOrSelf.size();
-
-llvm::ArrayRef toMerge{&decompose.output[leafIndex],
-  numLeafs};
-auto &uc = constructs.emplace_back(mergeConstructs(version, toMerge));
-
-if (!transferLocations(clauses, uc.clauses)) {
-  // If some clauses are left without source information, use the
-  // directive's source.
-  for (auto &clause : uc.clauses) {
-if (clause.source.empty())
-  clause.source = source;
-  }
-}
-leafIndex += numLeafs;
+  for (UnitConstruct &uc : decompose.output) {
+assert(getLeafConstructs(uc.id).empty() && "unexpected compound 
directive");
+//  If some clauses are left without source information, use the 
directive's
+//  source.
+for (auto &clause : uc.clauses)
+  if (clause.source.empty())
+clause.source = source;
   }
 
-  return constructs;
+  return decompose.output;
+}
+
+bool matchLeafSequence(ConstructQueue::const_iterator item,
+   const ConstructQueue &queue,
+   llvm::ArrayRef directives) {
+  for (auto [dir, leaf] :
+   llvm::zip_longest(directives, llvm::make_range(item, queue.end( {
+if (!dir || !leaf)
+  return false;
+
+if (dir.value() != leaf.value().id)
+  return false;
+  }
+  return true;
 }
 
 bool isLastItemInQueue(ConstructQueue::const_iterator item,
diff --git a/flang/lib/Lower/OpenMP/Decomposer.h 
b/flang/lib/Lower/OpenMP/Decomposer.h
index e85956ffe1a231..6c90e8540d459b 100644
--- a/flang/lib/Lower/OpenMP/Decomposer.h
+++

[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)

2024-08-09 Thread Sergio Afonso via llvm-branch-commits


https://github.com/skatrak edited 
https://github.com/llvm/llvm-project/pull/102613
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)

2024-08-09 Thread Sergio Afonso via llvm-branch-commits



@@ -2141,13 +2154,50 @@ static void genCompositeTaskloopSimd(
 semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
 mlir::Location loc, const ConstructQueue &queue,
 ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
+  assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
   TODO(loc, "Composite TASKLOOP SIMD");
 }
 
 
//===--===//
 // Dispatch
 
//===--===//
 
+static bool genOMPCompositeDispatch(
+lower::AbstractConverter &converter, lower::SymMap &symTable,
+semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+mlir::Location loc, const ConstructQueue &queue,
+ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
+  using llvm::omp::Directive;
+  using llvm::omp::getLeafConstructs, lower::omp::matchLeafSequence;
+
+  if (matchLeafSequence(
+  item, queue,
+  getLeafConstructs(Directive::OMPD_distribute_parallel_do)))

skatrak wrote:

Done.

https://github.com/llvm/llvm-project/pull/102613
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)

2024-08-09 Thread Sergio Afonso via llvm-branch-commits



@@ -90,38 +83,33 @@ ConstructQueue buildConstructQueue(
 Fortran::lower::pft::Evaluation &eval, const parser::CharBlock &source,
 llvm::omp::Directive compound, const List &clauses) {
 
-  List constructs;
-
   ConstructDecomposition decompose(modOp, semaCtx, eval, compound, clauses);
   assert(!decompose.output.empty() && "Construct decomposition failed");
 
-  llvm::SmallVector loweringUnits;
-  std::ignore =
-  llvm::omp::getLeafOrCompositeConstructs(compound, loweringUnits);
-  uint32_t version = getOpenMPVersionAttribute(modOp);
-
-  int leafIndex = 0;
-  for (llvm::omp::Directive dir_id : loweringUnits) {
-llvm::ArrayRef leafsOrSelf =
-llvm::omp::getLeafConstructsOrSelf(dir_id);
-size_t numLeafs = leafsOrSelf.size();
-
-llvm::ArrayRef toMerge{&decompose.output[leafIndex],
-  numLeafs};
-auto &uc = constructs.emplace_back(mergeConstructs(version, toMerge));
-
-if (!transferLocations(clauses, uc.clauses)) {
-  // If some clauses are left without source information, use the
-  // directive's source.
-  for (auto &clause : uc.clauses) {
-if (clause.source.empty())
-  clause.source = source;
-  }
-}
-leafIndex += numLeafs;
+  for (UnitConstruct &uc : decompose.output) {
+assert(getLeafConstructs(uc.id).empty() && "unexpected compound 
directive");
+//  If some clauses are left without source information, use the 
directive's
+//  source.
+for (auto &clause : uc.clauses)
+  if (clause.source.empty())
+clause.source = source;
   }
 
-  return constructs;
+  return decompose.output;
+}
+
+bool matchLeafSequence(ConstructQueue::const_iterator item,
+   const ConstructQueue &queue,
+   llvm::ArrayRef directives) {
+  for (auto [dir, leaf] :
+   llvm::zip_longest(directives, llvm::make_range(item, queue.end( {
+if (!dir || !leaf)

skatrak wrote:

`llvm::zip_longest` returns two `std::optional` (for when ranges are not the 
same size), which is what this is checking, not the actual integer value of the 
directive. I made some changes to avoid confusion.

https://github.com/llvm/llvm-project/pull/102613
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)

2024-08-09 Thread Sergio Afonso via llvm-branch-commits


https://github.com/skatrak commented:

Thank you Krzysztof for your comments, they should be addressed now.

https://github.com/llvm/llvm-project/pull/102613
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)

2024-08-09 Thread Sergio Afonso via llvm-branch-commits



@@ -2263,24 +2321,13 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
 
   // Composite constructs
   case llvm::omp::Directive::OMPD_distribute_parallel_do:
-genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval, loc,
- queue, item, *loopDsp);
-break;
   case llvm::omp::Directive::OMPD_distribute_parallel_do_simd:
-genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval,
- loc, queue, item, *loopDsp);
-break;
   case llvm::omp::Directive::OMPD_distribute_simd:
-genCompositeDistributeSimd(converter, symTable, semaCtx, eval, loc, queue,
-   item, *loopDsp);
-break;
   case llvm::omp::Directive::OMPD_do_simd:
-genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item,
-   *loopDsp);
-break;
   case llvm::omp::Directive::OMPD_taskloop_simd:
-genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, loc, queue,
- item, *loopDsp);
+// Composite constructs should have been split into a sequence of leaf
+// constructs and lowered by genOMPCompositeDispatch().
+llvm_unreachable("Unexpected composite construct.");
 break;
   default:

skatrak wrote:

Done. I added a bit more generic assert, since we don't expect combined 
constructs there either.

https://github.com/llvm/llvm-project/pull/102613
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations. (#102105) (PR #102641)

2024-08-09 Thread via llvm-branch-commits


https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/102641

Backport 0b745a10843fc85e579bbf459f78b3f43e7ab309

Requested by: @davemgreen

>From f83753954d8f94dc7573e210e7fb2300803c0012 Mon Sep 17 00:00:00 2001
From: David Green 
Date: Fri, 9 Aug 2024 14:25:07 +0100
Subject: [PATCH] [AArch64] Add invalid 1 x vscale costs for reductions and
 reduction-operations. (#102105)

The code-generator is currently not able to handle scalable vectors of
. The usual "fix" for this until it is supported is
to mark the costs of loads/stores with an invalid cost, preventing the
vectorizer from vectorizing at those factors. But on rare occasions
loops do not contain load/stores, only reductions.

So whilst this is still unsupported return an invalid cost to avoid
selecting vscale x 1 VFs. The cost of a reduction is not currently used
by the vectorizer so this adds the cost to the add/mul/and/or/xor or
min/max that should feed the reduction. It includes reduction costs
too, for completeness. This change will be removed when code-generation
for these types is sufficiently reliable.

Fixes #99760

(cherry picked from commit 0b745a10843fc85e579bbf459f78b3f43e7ab309)
---
 .../AArch64/AArch64TargetTransformInfo.cpp| 32 +
 .../CostModel/AArch64/arith-fp-sve.ll |  4 +++
 .../Analysis/CostModel/AArch64/cttz_elts.ll   |  2 ++
 .../Analysis/CostModel/AArch64/sve-arith.ll   | 21 +++
 .../CostModel/AArch64/sve-intrinsics.ll   | 36 +++
 .../Analysis/CostModel/AArch64/sve-min-max.ll | 12 +++
 6 files changed, 107 insertions(+)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp 
b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 45148449dfb821..6a3efd587ac3f3 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -540,7 +540,15 @@ static InstructionCost getHistogramCost(const 
IntrinsicCostAttributes &ICA) {
 InstructionCost
 AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
   TTI::TargetCostKind CostKind) {
+  // The code-generator is currently not able to handle scalable vectors
+  // of  yet, so return an invalid cost to avoid selecting
+  // it. This change will be removed when code-generation for these types is
+  // sufficiently reliable.
   auto *RetTy = ICA.getReturnType();
+  if (auto *VTy = dyn_cast(RetTy))
+if (VTy->getElementCount() == ElementCount::getScalable(1))
+  return InstructionCost::getInvalid();
+
   switch (ICA.getID()) {
   case Intrinsic::experimental_vector_histogram_add:
 if (!ST->hasSVE2())
@@ -3018,6 +3026,14 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
 ArrayRef Args,
 const Instruction *CxtI) {
 
+  // The code-generator is currently not able to handle scalable vectors
+  // of  yet, so return an invalid cost to avoid selecting
+  // it. This change will be removed when code-generation for these types is
+  // sufficiently reliable.
+  if (auto *VTy = dyn_cast(Ty))
+if (VTy->getElementCount() == ElementCount::getScalable(1))
+  return InstructionCost::getInvalid();
+
   // TODO: Handle more cost kinds.
   if (CostKind != TTI::TCK_RecipThroughput)
 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
@@ -3792,6 +3808,14 @@ InstructionCost
 AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
FastMathFlags FMF,
TTI::TargetCostKind CostKind) {
+  // The code-generator is currently not able to handle scalable vectors
+  // of  yet, so return an invalid cost to avoid selecting
+  // it. This change will be removed when code-generation for these types is
+  // sufficiently reliable.
+  if (auto *VTy = dyn_cast(Ty))
+if (VTy->getElementCount() == ElementCount::getScalable(1))
+  return InstructionCost::getInvalid();
+
   std::pair LT = getTypeLegalizationCost(Ty);
 
   if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
@@ -3836,6 +3860,14 @@ InstructionCost
 AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
std::optional FMF,
TTI::TargetCostKind CostKind) {
+  // The code-generator is currently not able to handle scalable vectors
+  // of  yet, so return an invalid cost to avoid selecting
+  // it. This change will be removed when code-generation for these types is
+  // sufficiently reliable.
+  if (auto *VTy = dyn_cast(ValTy))
+if (VTy->getElementCount() == ElementCount::getScalable(1))
+  return InstructionCost::getInvalid();
+
   if (TTI::requiresOrderedReduction(FMF)) {
 if (auto *FixedVTy = dyn_cast(ValTy)) {
   InstructionCost BaseCost =
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll 
b/llvm/test/Analysis/CostModel/AArch64/arith

[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations. (#102105) (PR #102641)

2024-08-09 Thread via llvm-branch-commits


https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/102641
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations. (#102105) (PR #102641)

2024-08-09 Thread via llvm-branch-commits


llvmbot wrote:

@pawosm-arm What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/102641
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations. (#102105) (PR #102641)

2024-08-09 Thread via llvm-branch-commits


llvmbot wrote:



@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-analysis

Author: None (llvmbot)


Changes

Backport 0b745a10843fc85e579bbf459f78b3f43e7ab309

Requested by: @davemgreen

---

Patch is 36.51 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/102641.diff


6 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+32) 
- (modified) llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll (+4) 
- (modified) llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll (+2) 
- (modified) llvm/test/Analysis/CostModel/AArch64/sve-arith.ll (+21) 
- (modified) llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll (+36) 
- (modified) llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll (+12) 


``diff
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp 
b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 45148449dfb821..6a3efd587ac3f3 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -540,7 +540,15 @@ static InstructionCost getHistogramCost(const 
IntrinsicCostAttributes &ICA) {
 InstructionCost
 AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
   TTI::TargetCostKind CostKind) {
+  // The code-generator is currently not able to handle scalable vectors
+  // of  yet, so return an invalid cost to avoid selecting
+  // it. This change will be removed when code-generation for these types is
+  // sufficiently reliable.
   auto *RetTy = ICA.getReturnType();
+  if (auto *VTy = dyn_cast(RetTy))
+if (VTy->getElementCount() == ElementCount::getScalable(1))
+  return InstructionCost::getInvalid();
+
   switch (ICA.getID()) {
   case Intrinsic::experimental_vector_histogram_add:
 if (!ST->hasSVE2())
@@ -3018,6 +3026,14 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
 ArrayRef Args,
 const Instruction *CxtI) {
 
+  // The code-generator is currently not able to handle scalable vectors
+  // of  yet, so return an invalid cost to avoid selecting
+  // it. This change will be removed when code-generation for these types is
+  // sufficiently reliable.
+  if (auto *VTy = dyn_cast(Ty))
+if (VTy->getElementCount() == ElementCount::getScalable(1))
+  return InstructionCost::getInvalid();
+
   // TODO: Handle more cost kinds.
   if (CostKind != TTI::TCK_RecipThroughput)
 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
@@ -3792,6 +3808,14 @@ InstructionCost
 AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
FastMathFlags FMF,
TTI::TargetCostKind CostKind) {
+  // The code-generator is currently not able to handle scalable vectors
+  // of  yet, so return an invalid cost to avoid selecting
+  // it. This change will be removed when code-generation for these types is
+  // sufficiently reliable.
+  if (auto *VTy = dyn_cast(Ty))
+if (VTy->getElementCount() == ElementCount::getScalable(1))
+  return InstructionCost::getInvalid();
+
   std::pair LT = getTypeLegalizationCost(Ty);
 
   if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
@@ -3836,6 +3860,14 @@ InstructionCost
 AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
std::optional FMF,
TTI::TargetCostKind CostKind) {
+  // The code-generator is currently not able to handle scalable vectors
+  // of  yet, so return an invalid cost to avoid selecting
+  // it. This change will be removed when code-generation for these types is
+  // sufficiently reliable.
+  if (auto *VTy = dyn_cast(ValTy))
+if (VTy->getElementCount() == ElementCount::getScalable(1))
+  return InstructionCost::getInvalid();
+
   if (TTI::requiresOrderedReduction(FMF)) {
 if (auto *FixedVTy = dyn_cast(ValTy)) {
   InstructionCost BaseCost =
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll 
b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll
index 18a1c31c03f748..770d3087b07522 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll
@@ -8,6 +8,7 @@ define void @fadd() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%V4F16 = fadd  undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%V8F16 = fadd  undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V16F16 = fadd  undef, undef
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V1F32 = fadd  undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%V2F32 = fadd  undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%V4F32 = fadd  undef, un

[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations. (#102105) (PR #102641)

2024-08-09 Thread Paul Osmialowski via llvm-branch-commits


https://github.com/pawosm-arm approved this pull request.


https://github.com/llvm/llvm-project/pull/102641
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/102645

This was much more difficult than I anticipated. The pass is
not in a good state, with poor test coverage. The legacy PM
does seem to be relying on maintaining the map state between
different SCCs, which seems bad. The pass is going out of its
way to avoid putting the attributes it introduces onto non-callee
functions. If it just added them, we could use them directly
instead of relying on the map, I would think.

The NewPM path uses a ModulePass; I'm not sure if we should be
using CGSCC here but there seems to be some missing infrastructure
to support backend defined ones.

>From 353885ddcab1182b25915766b9235ba411386359 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 9 Aug 2024 17:27:53 +0400
Subject: [PATCH] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager

This was much more difficult than I anticipated. The pass is
not in a good state, with poor test coverage. The legacy PM
does seem to be relying on maintaining the map state between
different SCCs, which seems bad. The pass is going out of its
way to avoid putting the attributes it introduces onto non-callee
functions. If it just added them, we could use them directly
instead of relying on the map, I would think.

The NewPM path uses a ModulePass; I'm not sure if we should be
using CGSCC here but there seems to be some missing infrastructure
to support backend defined ones.
---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |   4 +-
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |   2 +-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |   1 +
 .../Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp  | 113 ++
 .../Target/AMDGPU/AMDGPUPerfHintAnalysis.h|  62 ++
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   3 +-
 llvm/test/CodeGen/AMDGPU/perfhint.ll  |   1 +
 7 files changed, 136 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 195e2a19214e80..5b8d37a8ae7944 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -209,8 +209,8 @@ extern char &SIPreAllocateWWMRegsID;
 void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
 extern char &AMDGPUImageIntrinsicOptimizerID;
 
-void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
-extern char &AMDGPUPerfHintAnalysisID;
+void initializeAMDGPUPerfHintAnalysisLegacyPass(PassRegistry &);
+extern char &AMDGPUPerfHintAnalysisLegacyID;
 
 void initializeGCNRegPressurePrinterPass(PassRegistry &);
 extern char &GCNRegPressurePrinterID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 8579774f522309..bbb4573655ab79 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -102,7 +102,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, 
"amdgpu-isel",
   "AMDGPU DAG->DAG Pattern Instruction Selection", false,
   false)
 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
-INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysisLegacy)
 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
 #ifdef EXPENSIVE_CHECKS
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index b6a6c33d85f83c..23fb1dba7b084c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -22,6 +22,7 @@ MODULE_PASS("amdgpu-lower-buffer-fat-pointers",
 AMDGPULowerBufferFatPointersPass(*this))
 MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass())
 MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this))
+MODULE_PASS("amdgpu-perf-hint", AMDGPUPerfHintAnalysisPass(*static_cast(this)))
 MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass())
 MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass())
 #undef MODULE_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
index 1213d5e0b41db1..5797f02cb374e0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -12,12 +12,15 @@
 ///
 
//===--===//
 
-#include "AMDGPU.h"
 #include "AMDGPUPerfHintAnalysis.h"
+#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -54,12 +57,6 @@ static

[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/102645?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#102645** https://app.graphite.dev/github/pr/llvm/llvm-project/102645?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#102644** https://app.graphite.dev/github/pr/llvm/llvm-project/102644?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/102645
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)

2024-08-09 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes

This was much more difficult than I anticipated. The pass is
not in a good state, with poor test coverage. The legacy PM
does seem to be relying on maintaining the map state between
different SCCs, which seems bad. The pass is going out of its
way to avoid putting the attributes it introduces onto non-callee
functions. If it just added them, we could use them directly
instead of relying on the map, I would think.

The NewPM path uses a ModulePass; I'm not sure if we should be
using CGSCC here but there seems to be some missing infrastructure
to support backend defined ones.

---
Full diff: https://github.com/llvm/llvm-project/pull/102645.diff


7 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+2-2) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp (+89-24) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h (+40-22) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+2-1) 
- (modified) llvm/test/CodeGen/AMDGPU/perfhint.ll (+1) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 195e2a19214e8..5b8d37a8ae794 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -209,8 +209,8 @@ extern char &SIPreAllocateWWMRegsID;
 void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
 extern char &AMDGPUImageIntrinsicOptimizerID;
 
-void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
-extern char &AMDGPUPerfHintAnalysisID;
+void initializeAMDGPUPerfHintAnalysisLegacyPass(PassRegistry &);
+extern char &AMDGPUPerfHintAnalysisLegacyID;
 
 void initializeGCNRegPressurePrinterPass(PassRegistry &);
 extern char &GCNRegPressurePrinterID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 8579774f52230..bbb4573655ab7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -102,7 +102,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, 
"amdgpu-isel",
   "AMDGPU DAG->DAG Pattern Instruction Selection", false,
   false)
 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
-INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysisLegacy)
 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
 #ifdef EXPENSIVE_CHECKS
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index b6a6c33d85f83..23fb1dba7b084 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -22,6 +22,7 @@ MODULE_PASS("amdgpu-lower-buffer-fat-pointers",
 AMDGPULowerBufferFatPointersPass(*this))
 MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass())
 MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this))
+MODULE_PASS("amdgpu-perf-hint", AMDGPUPerfHintAnalysisPass(*static_cast(this)))
 MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass())
 MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass())
 #undef MODULE_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
index 1213d5e0b41db..5797f02cb374e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -12,12 +12,15 @@
 ///
 
//===--===//
 
-#include "AMDGPU.h"
 #include "AMDGPUPerfHintAnalysis.h"
+#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -54,12 +57,6 @@ static cl::opt
 STATISTIC(NumMemBound, "Number of functions marked as memory bound");
 STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave");
 
-char llvm::AMDGPUPerfHintAnalysis::ID = 0;
-char &llvm::AMDGPUPerfHintAnalysisID = AMDGPUPerfHintAnalysis::ID;
-
-INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE,
-"Analysis if a function is memory bound", true, true)
-
 namespace {
 
 struct AMDGPUPerfHint {
@@ -67,7 +64,7 @@ struct AMDGPUPerfHint {
 
 public:
   AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_,
- const TargetLowering *TLI_)
+ const SITargetLowering *TLI_)
   : FIM(FIM_), TLI(TLI_) {}
 
   bool runOnFu

[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/102645
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] release/19.x: [Arm][AArch64][Clang] Respect function's branch protection attributes (#101978) (PR #102646)

2024-08-09 Thread Rose Silicon via llvm-branch-commits


https://github.com/RSilicon created 
https://github.com/llvm/llvm-project/pull/102646

Default attributes assigned to all functions according to the command line 
parameters. Some functions might have their own attributes and we need to set 
or remove attributes accordingly.
Tests are updated to test this scenarios too.

(cherry picked from commit 9e9fa00dcb9522db3f78d921eda6a18b9ee568bb)

>From 62e4bbbaef6d003a058681cde53e909b365c21db Mon Sep 17 00:00:00 2001
From: Daniel Kiss 
Date: Fri, 9 Aug 2024 17:51:38 +0200
Subject: [PATCH] [Arm][AArch64][Clang] Respect function's branch protection
 attributes. (#101978)

Default attributes assigned to all functions according to the command
line parameters. Some functions might have their own attributes and we
need to set or remove attributes accordingly.
Tests are updated to test this scenarios too.

(cherry picked from commit 9e9fa00dcb9522db3f78d921eda6a18b9ee568bb)
---
 clang/lib/CodeGen/CGCall.cpp  |  2 +-
 clang/lib/CodeGen/TargetInfo.cpp  | 32 ---
 clang/lib/CodeGen/TargetInfo.h|  7 ++--
 .../CodeGen/aarch64-branch-protection-attr.c  | 13 +++-
 .../CodeGen/arm-branch-protection-attr-1.c|  6 
 5 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 234a9c16e39dfd..6e69e84a2344c1 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2032,7 +2032,7 @@ static void getTrivialDefaultFunctionAttributes(
   }
 
   TargetInfo::BranchProtectionInfo BPI(LangOpts);
-  TargetCodeGenInfo::setBranchProtectionFnAttributes(BPI, FuncAttrs);
+  TargetCodeGenInfo::initBranchProtectionFnAttributes(BPI, FuncAttrs);
 }
 
 /// Merges `target-features` from \TargetOpts and \F, and sets the result in
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 38faa50cf19cf2..64a9a5554caf72 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -209,13 +209,37 @@ llvm::Value *TargetCodeGenInfo::createEnqueuedBlockKernel(
 
 void TargetCodeGenInfo::setBranchProtectionFnAttributes(
 const TargetInfo::BranchProtectionInfo &BPI, llvm::Function &F) {
-  llvm::AttrBuilder FuncAttrs(F.getContext());
-  setBranchProtectionFnAttributes(BPI, FuncAttrs);
-  F.addFnAttrs(FuncAttrs);
+  // Called on already created and initialized function where attributes 
already
+  // set from command line attributes but some might need to be removed as the
+  // actual BPI is different.
+  if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) {
+F.addFnAttr("sign-return-address", BPI.getSignReturnAddrStr());
+F.addFnAttr("sign-return-address-key", BPI.getSignKeyStr());
+  } else {
+if (F.hasFnAttribute("sign-return-address"))
+  F.removeFnAttr("sign-return-address");
+if (F.hasFnAttribute("sign-return-address-key"))
+  F.removeFnAttr("sign-return-address-key");
+  }
+
+  auto AddRemoveAttributeAsSet = [&](bool Set, const StringRef &ModAttr) {
+if (Set)
+  F.addFnAttr(ModAttr);
+else if (F.hasFnAttribute(ModAttr))
+  F.removeFnAttr(ModAttr);
+  };
+
+  AddRemoveAttributeAsSet(BPI.BranchTargetEnforcement,
+  "branch-target-enforcement");
+  AddRemoveAttributeAsSet(BPI.BranchProtectionPAuthLR,
+  "branch-protection-pauth-lr");
+  AddRemoveAttributeAsSet(BPI.GuardedControlStack, "guarded-control-stack");
 }
 
-void TargetCodeGenInfo::setBranchProtectionFnAttributes(
+void TargetCodeGenInfo::initBranchProtectionFnAttributes(
 const TargetInfo::BranchProtectionInfo &BPI, llvm::AttrBuilder &FuncAttrs) 
{
+  // Only used for initializing attributes in the AttrBuilder, which will not
+  // contain any of these attributes so no need to remove anything.
   if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) {
 FuncAttrs.addAttribute("sign-return-address", BPI.getSignReturnAddrStr());
 FuncAttrs.addAttribute("sign-return-address-key", BPI.getSignKeyStr());
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 2f2138582ba1e3..156b4ff4353bee 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -414,13 +414,16 @@ class TargetCodeGenInfo {
 return nullptr;
   }
 
+  // Set the Branch Protection Attributes of the Function accordingly to the
+  // BPI. Remove attributes that contradict with current BPI.
   static void
   setBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI,
   llvm::Function &F);
 
+  // Add the Branch Protection Attributes of the FuncAttrs.
   static void
-  setBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI,
-  llvm::AttrBuilder &FuncAttrs);
+  initBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI,
+   llvm::Att

[llvm-branch-commits] [clang] release/19.x: [Arm][AArch64][Clang] Respect function's branch protection attributes. (#101978) (PR #102646)

2024-08-09 Thread Rose Silicon via llvm-branch-commits


https://github.com/RSilicon edited 
https://github.com/llvm/llvm-project/pull/102646
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] release/19.x: [Arm][AArch64][Clang] Respect function's branch protection attributes. (#101978) (PR #102646)

2024-08-09 Thread via llvm-branch-commits


llvmbot wrote:



@llvm/pr-subscribers-clang

@llvm/pr-subscribers-clang-codegen

Author: Rose Silicon (RSilicon)


Changes

Default attributes assigned to all functions according to the command line 
parameters. Some functions might have their own attributes and we need to set 
or remove attributes accordingly.
Tests are updated to test this scenarios too.

(cherry picked from commit 9e9fa00dcb9522db3f78d921eda6a18b9ee568bb)

---
Full diff: https://github.com/llvm/llvm-project/pull/102646.diff


5 Files Affected:

- (modified) clang/lib/CodeGen/CGCall.cpp (+1-1) 
- (modified) clang/lib/CodeGen/TargetInfo.cpp (+28-4) 
- (modified) clang/lib/CodeGen/TargetInfo.h (+5-2) 
- (modified) clang/test/CodeGen/aarch64-branch-protection-attr.c (+12-1) 
- (modified) clang/test/CodeGen/arm-branch-protection-attr-1.c (+6) 


``diff
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 234a9c16e39dfd..6e69e84a2344c1 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2032,7 +2032,7 @@ static void getTrivialDefaultFunctionAttributes(
   }
 
   TargetInfo::BranchProtectionInfo BPI(LangOpts);
-  TargetCodeGenInfo::setBranchProtectionFnAttributes(BPI, FuncAttrs);
+  TargetCodeGenInfo::initBranchProtectionFnAttributes(BPI, FuncAttrs);
 }
 
 /// Merges `target-features` from \TargetOpts and \F, and sets the result in
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 38faa50cf19cf2..64a9a5554caf72 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -209,13 +209,37 @@ llvm::Value *TargetCodeGenInfo::createEnqueuedBlockKernel(
 
 void TargetCodeGenInfo::setBranchProtectionFnAttributes(
 const TargetInfo::BranchProtectionInfo &BPI, llvm::Function &F) {
-  llvm::AttrBuilder FuncAttrs(F.getContext());
-  setBranchProtectionFnAttributes(BPI, FuncAttrs);
-  F.addFnAttrs(FuncAttrs);
+  // Called on already created and initialized function where attributes 
already
+  // set from command line attributes but some might need to be removed as the
+  // actual BPI is different.
+  if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) {
+F.addFnAttr("sign-return-address", BPI.getSignReturnAddrStr());
+F.addFnAttr("sign-return-address-key", BPI.getSignKeyStr());
+  } else {
+if (F.hasFnAttribute("sign-return-address"))
+  F.removeFnAttr("sign-return-address");
+if (F.hasFnAttribute("sign-return-address-key"))
+  F.removeFnAttr("sign-return-address-key");
+  }
+
+  auto AddRemoveAttributeAsSet = [&](bool Set, const StringRef &ModAttr) {
+if (Set)
+  F.addFnAttr(ModAttr);
+else if (F.hasFnAttribute(ModAttr))
+  F.removeFnAttr(ModAttr);
+  };
+
+  AddRemoveAttributeAsSet(BPI.BranchTargetEnforcement,
+  "branch-target-enforcement");
+  AddRemoveAttributeAsSet(BPI.BranchProtectionPAuthLR,
+  "branch-protection-pauth-lr");
+  AddRemoveAttributeAsSet(BPI.GuardedControlStack, "guarded-control-stack");
 }
 
-void TargetCodeGenInfo::setBranchProtectionFnAttributes(
+void TargetCodeGenInfo::initBranchProtectionFnAttributes(
 const TargetInfo::BranchProtectionInfo &BPI, llvm::AttrBuilder &FuncAttrs) 
{
+  // Only used for initializing attributes in the AttrBuilder, which will not
+  // contain any of these attributes so no need to remove anything.
   if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) {
 FuncAttrs.addAttribute("sign-return-address", BPI.getSignReturnAddrStr());
 FuncAttrs.addAttribute("sign-return-address-key", BPI.getSignKeyStr());
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 2f2138582ba1e3..156b4ff4353bee 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -414,13 +414,16 @@ class TargetCodeGenInfo {
 return nullptr;
   }
 
+  // Set the Branch Protection Attributes of the Function accordingly to the
+  // BPI. Remove attributes that contradict with current BPI.
   static void
   setBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI,
   llvm::Function &F);
 
+  // Add the Branch Protection Attributes of the FuncAttrs.
   static void
-  setBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI,
-  llvm::AttrBuilder &FuncAttrs);
+  initBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI,
+   llvm::AttrBuilder &FuncAttrs);
 
 protected:
   static std::string qualifyWindowsLibrary(StringRef Lib);
diff --git a/clang/test/CodeGen/aarch64-branch-protection-attr.c 
b/clang/test/CodeGen/aarch64-branch-protection-attr.c
index e7ae7fb1570c95..c66bce1bee6d36 100644
--- a/clang/test/CodeGen/aarch64-branch-protection-attr.c
+++ b/clang/test/CodeGen/aarch64-branch-protection-attr.c
@@ -1,6 +1,18 @@
 // REQUIRES: aarch64-registered-target
 // RUN: %c

[llvm-branch-commits] [clang] release/19.x: [Arm][AArch64][Clang] Respect function's branch protection attributes. (#101978) (PR #102646)

2024-08-09 Thread Daniel Kiss via llvm-branch-commits


https://github.com/DanielKristofKiss approved this pull request.

@RSilicon Thanks for the cherry-pick, LGTM. Did it locally too and check-clang 
passes.

https://github.com/llvm/llvm-project/pull/102646
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPUAnnotateUniformValues to new pass manager (PR #102654)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/102654

None

>From 25b920ae924a5bb9bbd5900296359f2df08becd3 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 9 Aug 2024 21:33:55 +0400
Subject: [PATCH] AMDGPU/NewPM: Port AMDGPUAnnotateUniformValues to new pass
 manager

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  13 ++-
 .../AMDGPU/AMDGPUAnnotateUniformValues.cpp| 105 +++---
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |   1 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   4 +-
 .../test/CodeGen/AMDGPU/annotate-noclobber.ll |   3 +-
 5 files changed, 81 insertions(+), 45 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 6b5754dad1770f..99b04d6e4cfc3f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -293,7 +293,14 @@ class AMDGPUAttributorPass : public 
PassInfoMixin {
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
-FunctionPass *createAMDGPUAnnotateUniformValues();
+class AMDGPUAnnotateUniformValuesPass
+: public PassInfoMixin {
+public:
+  AMDGPUAnnotateUniformValuesPass() {}
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
@@ -321,8 +328,8 @@ extern char &SIOptimizeExecMaskingPreRAID;
 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &);
 extern char &SIOptimizeVGPRLiveRangeID;
 
-void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
-extern char &AMDGPUAnnotateUniformValuesPassID;
+void initializeAMDGPUAnnotateUniformValuesLegacyPass(PassRegistry &);
+extern char &AMDGPUAnnotateUniformValuesLegacyPassID;
 
 void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
 extern char &AMDGPUCodeGenPrepareID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index 6a409f0dcbe774..fa66d53bd7c64b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -27,8 +27,8 @@ using namespace llvm;
 
 namespace {
 
-class AMDGPUAnnotateUniformValues : public FunctionPass,
-   public InstVisitor {
+class AMDGPUAnnotateUniformValues
+: public InstVisitor {
   UniformityInfo *UA;
   MemorySSA *MSSA;
   AliasAnalysis *AA;
@@ -46,37 +46,19 @@ class AMDGPUAnnotateUniformValues : public FunctionPass,
   }
 
 public:
-  static char ID;
-  AMDGPUAnnotateUniformValues() :
-FunctionPass(ID) { }
-  bool doInitialization(Module &M) override;
-  bool runOnFunction(Function &F) override;
-  StringRef getPassName() const override {
-return "AMDGPU Annotate Uniform Values";
-  }
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-AU.addRequired();
-AU.addRequired();
-AU.addRequired();
-AU.setPreservesAll();
- }
+  AMDGPUAnnotateUniformValues(UniformityInfo &UA, MemorySSA &MSSA,
+  AliasAnalysis &AA, const Function &F)
+  : UA(&UA), MSSA(&MSSA), AA(&AA),
+isEntryFunc(AMDGPU::isEntryFunctionCC(F.getCallingConv())) {}
 
   void visitBranchInst(BranchInst &I);
   void visitLoadInst(LoadInst &I);
+
+  bool changed() const { return Changed; }
 };
 
 } // End anonymous namespace
 
-INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
-  "Add AMDGPU uniform metadata", false, false)
-INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
-"Add AMDGPU uniform metadata", false, false)
-
-char AMDGPUAnnotateUniformValues::ID = 0;
-
 void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
   if (UA->isUniform(&I))
 setUniformMetadata(&I);
@@ -100,25 +82,70 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst 
&I) {
 setNoClobberMetadata(&I);
 }
 
-bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
-  return false;
+PreservedAnalyses
+AMDGPUAnnotateUniformValuesPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+  UniformityInfo &UI = FAM.getResult(F);
+  MemorySSA &MSSA = FAM.getResult(F).getMSSA();
+  AAResults &AA = FAM.getResult(F);
+
+  AMDGPUAnnotateUniformValues Impl(UI, MSSA, AA, F);
+  Impl.visit(F);
+
+  PreservedAnalyses PA = PreservedAnalyses::none();
+  if (!Impl.changed())
+return PA;
+
+  // TODO: Should preserve nearly everything
+  PA.preserveSet();
+  return PA;
 }
 
-bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
+class AMDGPUAnnotateUniformValuesLegacy : public FunctionPass {
+public:
+  static char ID;
+
+  AMDGPUAnnotateUniformValuesLegacy() : FunctionPass(ID) {}
+
+  bool doInitialization(Modul

[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPUAnnotateUniformValues to new pass manager (PR #102654)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/102654?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#102654** https://app.graphite.dev/github/pr/llvm/llvm-project/102654?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#102653** https://app.graphite.dev/github/pr/llvm/llvm-project/102653?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/102654
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPUAnnotateUniformValues to new pass manager (PR #102654)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/102654
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPUAnnotateUniformValues to new pass manager (PR #102654)

2024-08-09 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/102654.diff


5 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+10-3) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp (+66-39) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/annotate-noclobber.ll (+2-1) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 6b5754dad1770..99b04d6e4cfc3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -293,7 +293,14 @@ class AMDGPUAttributorPass : public 
PassInfoMixin {
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
-FunctionPass *createAMDGPUAnnotateUniformValues();
+class AMDGPUAnnotateUniformValuesPass
+: public PassInfoMixin {
+public:
+  AMDGPUAnnotateUniformValuesPass() {}
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
@@ -321,8 +328,8 @@ extern char &SIOptimizeExecMaskingPreRAID;
 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &);
 extern char &SIOptimizeVGPRLiveRangeID;
 
-void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
-extern char &AMDGPUAnnotateUniformValuesPassID;
+void initializeAMDGPUAnnotateUniformValuesLegacyPass(PassRegistry &);
+extern char &AMDGPUAnnotateUniformValuesLegacyPassID;
 
 void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
 extern char &AMDGPUCodeGenPrepareID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index 6a409f0dcbe77..fa66d53bd7c64 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -27,8 +27,8 @@ using namespace llvm;
 
 namespace {
 
-class AMDGPUAnnotateUniformValues : public FunctionPass,
-   public InstVisitor {
+class AMDGPUAnnotateUniformValues
+: public InstVisitor {
   UniformityInfo *UA;
   MemorySSA *MSSA;
   AliasAnalysis *AA;
@@ -46,37 +46,19 @@ class AMDGPUAnnotateUniformValues : public FunctionPass,
   }
 
 public:
-  static char ID;
-  AMDGPUAnnotateUniformValues() :
-FunctionPass(ID) { }
-  bool doInitialization(Module &M) override;
-  bool runOnFunction(Function &F) override;
-  StringRef getPassName() const override {
-return "AMDGPU Annotate Uniform Values";
-  }
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-AU.addRequired();
-AU.addRequired();
-AU.addRequired();
-AU.setPreservesAll();
- }
+  AMDGPUAnnotateUniformValues(UniformityInfo &UA, MemorySSA &MSSA,
+  AliasAnalysis &AA, const Function &F)
+  : UA(&UA), MSSA(&MSSA), AA(&AA),
+isEntryFunc(AMDGPU::isEntryFunctionCC(F.getCallingConv())) {}
 
   void visitBranchInst(BranchInst &I);
   void visitLoadInst(LoadInst &I);
+
+  bool changed() const { return Changed; }
 };
 
 } // End anonymous namespace
 
-INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
-  "Add AMDGPU uniform metadata", false, false)
-INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
-"Add AMDGPU uniform metadata", false, false)
-
-char AMDGPUAnnotateUniformValues::ID = 0;
-
 void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
   if (UA->isUniform(&I))
 setUniformMetadata(&I);
@@ -100,25 +82,70 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst 
&I) {
 setNoClobberMetadata(&I);
 }
 
-bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
-  return false;
+PreservedAnalyses
+AMDGPUAnnotateUniformValuesPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+  UniformityInfo &UI = FAM.getResult(F);
+  MemorySSA &MSSA = FAM.getResult(F).getMSSA();
+  AAResults &AA = FAM.getResult(F);
+
+  AMDGPUAnnotateUniformValues Impl(UI, MSSA, AA, F);
+  Impl.visit(F);
+
+  PreservedAnalyses PA = PreservedAnalyses::none();
+  if (!Impl.changed())
+return PA;
+
+  // TODO: Should preserve nearly everything
+  PA.preserveSet();
+  return PA;
 }
 
-bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
+class AMDGPUAnnotateUniformValuesLegacy : public FunctionPass {
+public:
+  static char ID;
+
+  AMDGPUAnnotateUniformValuesLegacy() : FunctionPass(ID) {}
+
+  bool doInitialization(Module &M) override { return false; }
+
+  bool runOnFunction(Function &F) override;
+  StringRef getPassName() const override {

[llvm-branch-commits] [BOLT] Set RawBranchCount in DataAggregator (PR #101093)

2024-08-09 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov ready_for_review 
https://github.com/llvm/llvm-project/pull/101093
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [MC][NFC] Store MCPseudoProbeFuncDesc::FuncName as StringRef (PR #100655)

2024-08-09 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov closed 
https://github.com/llvm/llvm-project/pull/100655
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)

2024-08-09 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/101094

>From f598510001859a29f6f1ff6362fb9950ab6340cd Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Mon, 29 Jul 2024 16:14:08 -0700
Subject: [PATCH 1/2] Update test to check the option with llvm-bolt with
 fdata, YAML, and pre-aggregated profile

Created using spr 1.3.4
---
 bolt/test/X86/pre-aggregated-perf.test | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/bolt/test/X86/pre-aggregated-perf.test 
b/bolt/test/X86/pre-aggregated-perf.test
index fc6f332d53dfb8..0f5137309e85d1 100644
--- a/bolt/test/X86/pre-aggregated-perf.test
+++ b/bolt/test/X86/pre-aggregated-perf.test
@@ -15,9 +15,15 @@ RUN:   --show-density --profile-density-threshold=9 \
 RUN:   --profile-density-cutoff-hot=97 \
 RUN:   --profile-use-dfs | FileCheck %s
 
-RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s
-RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s
-RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null | 
FileCheck %s
+RUN: llvm-bolt %t.exe -data %t -o %t.null \
+RUN:   --show-density --profile-density-threshold=9 \
+RUN:   --profile-density-cutoff-hot=97 | FileCheck %s
+RUN: llvm-bolt %t.exe -data %t.new -o %t.null \
+RUN:   --show-density --profile-density-threshold=9 \
+RUN:   --profile-density-cutoff-hot=97 | FileCheck %s
+RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null \
+RUN:   --show-density --profile-density-threshold=9 \
+RUN:   --profile-density-cutoff-hot=97 | FileCheck %s
 
 CHECK: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty 
execution profile
 CHECK: BOLT-INFO: Functions with density >= 9.4 account for 97.00% total 
sample counts.

>From e91907e57b39c8c79eb58b4d28d78fa253b130cb Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Mon, 29 Jul 2024 20:09:08 -0700
Subject: [PATCH 2/2] show-density init(true)

Created using spr 1.3.4
---
 bolt/lib/Passes/BinaryPasses.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index 23009bf74e0773..83fd6b2562eca8 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -224,7 +224,7 @@ static cl::opt TopCalledLimit(
 cl::init(100), cl::Hidden, cl::cat(BoltCategory));
 
 // Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
-static cl::opt ShowDensity("show-density", cl::init(false),
+static cl::opt ShowDensity("show-density", cl::init(true),
  cl::desc("show profile density details"),
  cl::Optional);
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)

2024-08-09 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/101094

>From f598510001859a29f6f1ff6362fb9950ab6340cd Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Mon, 29 Jul 2024 16:14:08 -0700
Subject: [PATCH 1/2] Update test to check the option with llvm-bolt with
 fdata, YAML, and pre-aggregated profile

Created using spr 1.3.4
---
 bolt/test/X86/pre-aggregated-perf.test | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/bolt/test/X86/pre-aggregated-perf.test 
b/bolt/test/X86/pre-aggregated-perf.test
index fc6f332d53dfb8..0f5137309e85d1 100644
--- a/bolt/test/X86/pre-aggregated-perf.test
+++ b/bolt/test/X86/pre-aggregated-perf.test
@@ -15,9 +15,15 @@ RUN:   --show-density --profile-density-threshold=9 \
 RUN:   --profile-density-cutoff-hot=97 \
 RUN:   --profile-use-dfs | FileCheck %s
 
-RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s
-RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s
-RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null | 
FileCheck %s
+RUN: llvm-bolt %t.exe -data %t -o %t.null \
+RUN:   --show-density --profile-density-threshold=9 \
+RUN:   --profile-density-cutoff-hot=97 | FileCheck %s
+RUN: llvm-bolt %t.exe -data %t.new -o %t.null \
+RUN:   --show-density --profile-density-threshold=9 \
+RUN:   --profile-density-cutoff-hot=97 | FileCheck %s
+RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null \
+RUN:   --show-density --profile-density-threshold=9 \
+RUN:   --profile-density-cutoff-hot=97 | FileCheck %s
 
 CHECK: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty 
execution profile
 CHECK: BOLT-INFO: Functions with density >= 9.4 account for 97.00% total 
sample counts.

>From e91907e57b39c8c79eb58b4d28d78fa253b130cb Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Mon, 29 Jul 2024 20:09:08 -0700
Subject: [PATCH 2/2] show-density init(true)

Created using spr 1.3.4
---
 bolt/lib/Passes/BinaryPasses.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index 23009bf74e0773..83fd6b2562eca8 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -224,7 +224,7 @@ static cl::opt TopCalledLimit(
 cl::init(100), cl::Hidden, cl::cat(BoltCategory));
 
 // Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
-static cl::opt ShowDensity("show-density", cl::init(false),
+static cl::opt ShowDensity("show-density", cl::init(true),
  cl::desc("show profile density details"),
  cl::Optional);
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)

2024-08-09 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/101094

>From f598510001859a29f6f1ff6362fb9950ab6340cd Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Mon, 29 Jul 2024 16:14:08 -0700
Subject: [PATCH 1/3] Update test to check the option with llvm-bolt with
 fdata, YAML, and pre-aggregated profile

Created using spr 1.3.4
---
 bolt/test/X86/pre-aggregated-perf.test | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/bolt/test/X86/pre-aggregated-perf.test 
b/bolt/test/X86/pre-aggregated-perf.test
index fc6f332d53dfb8..0f5137309e85d1 100644
--- a/bolt/test/X86/pre-aggregated-perf.test
+++ b/bolt/test/X86/pre-aggregated-perf.test
@@ -15,9 +15,15 @@ RUN:   --show-density --profile-density-threshold=9 \
 RUN:   --profile-density-cutoff-hot=97 \
 RUN:   --profile-use-dfs | FileCheck %s
 
-RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s
-RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s
-RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null | 
FileCheck %s
+RUN: llvm-bolt %t.exe -data %t -o %t.null \
+RUN:   --show-density --profile-density-threshold=9 \
+RUN:   --profile-density-cutoff-hot=97 | FileCheck %s
+RUN: llvm-bolt %t.exe -data %t.new -o %t.null \
+RUN:   --show-density --profile-density-threshold=9 \
+RUN:   --profile-density-cutoff-hot=97 | FileCheck %s
+RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null \
+RUN:   --show-density --profile-density-threshold=9 \
+RUN:   --profile-density-cutoff-hot=97 | FileCheck %s
 
 CHECK: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty 
execution profile
 CHECK: BOLT-INFO: Functions with density >= 9.4 account for 97.00% total 
sample counts.

>From e91907e57b39c8c79eb58b4d28d78fa253b130cb Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Mon, 29 Jul 2024 20:09:08 -0700
Subject: [PATCH 2/3] show-density init(true)

Created using spr 1.3.4
---
 bolt/lib/Passes/BinaryPasses.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index 23009bf74e0773..83fd6b2562eca8 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -224,7 +224,7 @@ static cl::opt TopCalledLimit(
 cl::init(100), cl::Hidden, cl::cat(BoltCategory));
 
 // Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
-static cl::opt ShowDensity("show-density", cl::init(false),
+static cl::opt ShowDensity("show-density", cl::init(true),
  cl::desc("show profile density details"),
  cl::Optional);
 

>From 0d5291b01264a5387f8afd9fb69baf55fdc409a7 Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Fri, 9 Aug 2024 11:17:57 -0700
Subject: [PATCH 3/3] show-density off by default

Created using spr 1.3.4
---
 bolt/lib/Passes/BinaryPasses.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index e0ad2af63a384a..0dc4a37e0ba946 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -224,7 +224,7 @@ static cl::opt TopCalledLimit(
 cl::init(100), cl::Hidden, cl::cat(BoltCategory));
 
 // Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
-static cl::opt ShowDensity("show-density", cl::init(true),
+static cl::opt ShowDensity("show-density", cl::init(false),
  cl::desc("show profile density details"),
  cl::Optional);
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][IR] Auto-generate element type verification for VectorType (PR #102449)

2024-08-09 Thread River Riddle via llvm-branch-commits


https://github.com/River707 approved this pull request.


https://github.com/llvm/llvm-project/pull/102449
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] release/19.x: [Arm][AArch64][Clang] Respect function's branch protection attributes. (#101978) (PR #102646)

2024-08-09 Thread Nikita Popov via llvm-branch-commits


https://github.com/nikic milestoned 
https://github.com/llvm/llvm-project/pull/102646
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][ODS] Verify type constraints in Types and Attributes (PR #102326)

2024-08-09 Thread Matthias Springer via llvm-branch-commits


https://github.com/matthias-springer edited 
https://github.com/llvm/llvm-project/pull/102326
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][ODS] Verify type constraints in Types and Attributes (PR #102326)

2024-08-09 Thread Matthias Springer via llvm-branch-commits


https://github.com/matthias-springer edited 
https://github.com/llvm/llvm-project/pull/102326
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][ODS] Verify type constraints in Types and Attributes (PR #102326)

2024-08-09 Thread Matthias Springer via llvm-branch-commits



@@ -153,6 +153,9 @@ class TypeConstraint {
   // The name of the C++ Type class if known, or Type if not.
   string cppClassName = cppClassNameParam;
+  // TODO: This field is sometimes called `cppClassName` and sometimes

matthias-springer wrote:

Fixed in #102657.

https://github.com/llvm/llvm-project/pull/102326
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][IR] Auto-generate element type verification for VectorType (PR #102449)

2024-08-09 Thread Matthias Springer via llvm-branch-commits


https://github.com/matthias-springer updated 
https://github.com/llvm/llvm-project/pull/102449

>From c399a1c6e82afcfcc2ad531cb49d53683f294f91 Mon Sep 17 00:00:00 2001
From: Matthias Springer 
Date: Thu, 8 Aug 2024 12:28:23 +0200
Subject: [PATCH] [mlir][IR] Auto-generate element type verification for
 VectorType

---
 mlir/include/mlir/IR/BuiltinTypes.td|  4 +++-
 mlir/lib/AsmParser/TypeParser.cpp   | 13 +++--
 mlir/test/IR/invalid-builtin-types.mlir |  6 +++---
 mlir/test/python/ir/builtin_types.py|  2 +-
 4 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/mlir/include/mlir/IR/BuiltinTypes.td 
b/mlir/include/mlir/IR/BuiltinTypes.td
index 365edcf68d8b94..4b3add2035263c 100644
--- a/mlir/include/mlir/IR/BuiltinTypes.td
+++ b/mlir/include/mlir/IR/BuiltinTypes.td
@@ -17,6 +17,7 @@
 include "mlir/IR/AttrTypeBase.td"
 include "mlir/IR/BuiltinDialect.td"
 include "mlir/IR/BuiltinTypeInterfaces.td"
+include "mlir/IR/CommonTypeConstraints.td"
 
 // TODO: Currently the types defined in this file are prefixed with `Builtin_`.
 // This is to differentiate the types here with the ones in OpBase.td. We 
should
@@ -1146,7 +1147,7 @@ def Builtin_Vector : Builtin_Type<"Vector", "vector",
   }];
   let parameters = (ins
 ArrayRefParameter<"int64_t">:$shape,
-"Type":$elementType,
+AnyTypeOf<[AnyInteger, Index, AnyFloat]>:$elementType,
 ArrayRefParameter<"bool">:$scalableDims
   );
   let builders = [
@@ -1173,6 +1174,7 @@ def Builtin_Vector : Builtin_Type<"Vector", "vector",
 /// type. In particular, vectors can consist of integer, index, or float
 /// primitives.
 static bool isValidElementType(Type t) {
+  // TODO: Auto-generate this function from $elementType.
   return ::llvm::isa(t);
 }
 
diff --git a/mlir/lib/AsmParser/TypeParser.cpp 
b/mlir/lib/AsmParser/TypeParser.cpp
index 542eaeefe57f12..f070c072c43296 100644
--- a/mlir/lib/AsmParser/TypeParser.cpp
+++ b/mlir/lib/AsmParser/TypeParser.cpp
@@ -458,31 +458,24 @@ Type Parser::parseTupleType() {
 /// static-dim-list ::= decimal-literal (`x` decimal-literal)*
 ///
 VectorType Parser::parseVectorType() {
+  SMLoc loc = getToken().getLoc();
   consumeToken(Token::kw_vector);
 
   if (parseToken(Token::less, "expected '<' in vector type"))
 return nullptr;
 
+  // Parse the dimensions.
   SmallVector dimensions;
   SmallVector scalableDims;
   if (parseVectorDimensionList(dimensions, scalableDims))
 return nullptr;
-  if (any_of(dimensions, [](int64_t i) { return i <= 0; }))
-return emitError(getToken().getLoc(),
- "vector types must have positive constant sizes"),
-   nullptr;
 
   // Parse the element type.
-  auto typeLoc = getToken().getLoc();
   auto elementType = parseType();
   if (!elementType || parseToken(Token::greater, "expected '>' in vector 
type"))
 return nullptr;
 
-  if (!VectorType::isValidElementType(elementType))
-return emitError(typeLoc, "vector elements must be int/index/float type"),
-   nullptr;
-
-  return VectorType::get(dimensions, elementType, scalableDims);
+  return getChecked(loc, dimensions, elementType, scalableDims);
 }
 
 /// Parse a dimension list in a vector type. This populates the dimension list.
diff --git a/mlir/test/IR/invalid-builtin-types.mlir 
b/mlir/test/IR/invalid-builtin-types.mlir
index 9884212e916c1f..07854a25000feb 100644
--- a/mlir/test/IR/invalid-builtin-types.mlir
+++ b/mlir/test/IR/invalid-builtin-types.mlir
@@ -120,17 +120,17 @@ func.func @illegaltype(i21312312323120) // expected-error 
{{invalid integer widt
 // -
 
 // Test no nested vector.
-// expected-error@+1 {{vector elements must be int/index/float type}}
+// expected-error@+1 {{failed to verify 'elementType': integer or index or 
floating-point}}
 func.func @vectors(vector<1 x vector<1xi32>>, vector<2x4xf32>)
 
 // -
 
-// expected-error @+1 {{vector types must have positive constant sizes}}
+// expected-error @+1 {{vector types must have positive constant sizes but got 
0}}
 func.func @zero_vector_type() -> vector<0xi32>
 
 // -
 
-// expected-error @+1 {{vector types must have positive constant sizes}}
+// expected-error @+1 {{vector types must have positive constant sizes but got 
1, 0}}
 func.func @zero_in_vector_type() -> vector<1x0xi32>
 
 // -
diff --git a/mlir/test/python/ir/builtin_types.py 
b/mlir/test/python/ir/builtin_types.py
index 2161f110ac31e2..f9554105ed 100644
--- a/mlir/test/python/ir/builtin_types.py
+++ b/mlir/test/python/ir/builtin_types.py
@@ -345,7 +345,7 @@ def testVectorType():
 VectorType.get(shape, none)
 except MLIRError as e:
 # CHECK: Invalid type:
-# CHECK: error: unknown: vector elements must be int/index/float 
type but got 'none'
+# CHECK: error: unknown: failed to verify 'elementType': integer 
or index or floating-point
 print(e)
 else:
 print("Exception not produced")

[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port SILowerI1Copies to new pass manager (PR #102663)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/102663

None

>From b6224ab90e73e07da50c612a7dc93da719208dbd Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 9 Aug 2024 22:54:33 +0400
Subject: [PATCH] AMDGPU/NewPM: Port SILowerI1Copies to new pass manager

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  14 +-
 .../AMDGPU/AMDGPUCodeGenPassBuilder.cpp   |   2 +
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |   1 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   4 +-
 llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp| 144 ++
 .../CodeGen/AMDGPU/si-lower-i1-copies.mir |   1 +
 6 files changed, 100 insertions(+), 66 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 99b04d6e4cfc3f..17a130e82ae8fb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
 
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/AMDGPUAddrSpace.h"
@@ -36,7 +37,7 @@ FunctionPass *createGCNDPPCombinePass();
 FunctionPass *createSIAnnotateControlFlowLegacyPass();
 FunctionPass *createSIFoldOperandsPass();
 FunctionPass *createSIPeepholeSDWAPass();
-FunctionPass *createSILowerI1CopiesPass();
+FunctionPass *createSILowerI1CopiesLegacyPass();
 FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
 FunctionPass *createSIShrinkInstructionsPass();
 FunctionPass *createSILoadStoreOptimizerPass();
@@ -82,6 +83,13 @@ struct AMDGPUUseNativeCallsPass : 
PassInfoMixin {
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
+class SILowerI1CopiesPass : public PassInfoMixin {
+public:
+  SILowerI1CopiesPass() = default;
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+};
+
 void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &);
 
 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
@@ -174,8 +182,8 @@ extern char &SIFixVGPRCopiesID;
 void initializeSILowerWWMCopiesPass(PassRegistry &);
 extern char &SILowerWWMCopiesID;
 
-void initializeSILowerI1CopiesPass(PassRegistry &);
-extern char &SILowerI1CopiesID;
+void initializeSILowerI1CopiesLegacyPass(PassRegistry &);
+extern char &SILowerI1CopiesLegacyID;
 
 void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
 extern char &AMDGPUGlobalISelDivergenceLoweringID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
index cc4285f130fc82..50491247f0eddb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
@@ -7,6 +7,7 @@
 
//===--===//
 
 #include "AMDGPUCodeGenPassBuilder.h"
+#include "AMDGPU.h"
 #include "AMDGPUISelDAGToDAG.h"
 #include "AMDGPUTargetMachine.h"
 #include "SIFixSGPRCopies.h"
@@ -40,5 +41,6 @@ void AMDGPUCodeGenPassBuilder::addAsmPrinter(AddMachinePass 
&addPass,
 Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const 
{
   addPass(AMDGPUISelDAGToDAGPass(TM));
   addPass(SIFixSGPRCopiesPass());
+  addPass(SILowerI1CopiesPass());
   return Error::success();
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index cbb19c003a264a..af68eea665571f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -79,4 +79,5 @@ FUNCTION_PASS_WITH_PARAMS(
 #endif
 MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
 MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
+MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
 #undef MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 3cc0d314a7c5d8..ad816ec29b02dc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -400,7 +400,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeGlobalISel(*PR);
   initializeAMDGPUDAGToDAGISelLegacyPass(*PR);
   initializeGCNDPPCombinePass(*PR);
-  initializeSILowerI1CopiesPass(*PR);
+  initializeSILowerI1CopiesLegacyPass(*PR);
   initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
   initializeSILowerWWMCopiesPass(*PR);
   initializeAMDGPUMarkLastScratchLoadPass(*PR);
@@ -1269,7 +1269,7 @@ bool GCNPassConfig::addILPOpts() {
 bool GCNPassConfig::addInstSelector() {
   AMDGPUPassConfig::addInstSelector();
   addPass(&SIFixSGPRCopiesLegacyID);
-  addPass(createSILowerI1CopiesPass());
+  addPass(createSILowerI1CopiesLegacyPass());
   return false;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp 
b/llvm/lib/Target/AMDGP

[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port SILowerI1Copies to new pass manager (PR #102663)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/102663
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port SILowerI1Copies to new pass manager (PR #102663)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/102663?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#102663** https://app.graphite.dev/github/pr/llvm/llvm-project/102663?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#102654** https://app.graphite.dev/github/pr/llvm/llvm-project/102654?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102653** https://app.graphite.dev/github/pr/llvm/llvm-project/102653?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/102663
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port SILowerI1Copies to new pass manager (PR #102663)

2024-08-09 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/102663.diff


6 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+11-3) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp (+2) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+2-2) 
- (modified) llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp (+83-61) 
- (modified) llvm/test/CodeGen/AMDGPU/si-lower-i1-copies.mir (+1) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 99b04d6e4cfc3..17a130e82ae8f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
 
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/AMDGPUAddrSpace.h"
@@ -36,7 +37,7 @@ FunctionPass *createGCNDPPCombinePass();
 FunctionPass *createSIAnnotateControlFlowLegacyPass();
 FunctionPass *createSIFoldOperandsPass();
 FunctionPass *createSIPeepholeSDWAPass();
-FunctionPass *createSILowerI1CopiesPass();
+FunctionPass *createSILowerI1CopiesLegacyPass();
 FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
 FunctionPass *createSIShrinkInstructionsPass();
 FunctionPass *createSILoadStoreOptimizerPass();
@@ -82,6 +83,13 @@ struct AMDGPUUseNativeCallsPass : 
PassInfoMixin {
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
+class SILowerI1CopiesPass : public PassInfoMixin {
+public:
+  SILowerI1CopiesPass() = default;
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+};
+
 void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &);
 
 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
@@ -174,8 +182,8 @@ extern char &SIFixVGPRCopiesID;
 void initializeSILowerWWMCopiesPass(PassRegistry &);
 extern char &SILowerWWMCopiesID;
 
-void initializeSILowerI1CopiesPass(PassRegistry &);
-extern char &SILowerI1CopiesID;
+void initializeSILowerI1CopiesLegacyPass(PassRegistry &);
+extern char &SILowerI1CopiesLegacyID;
 
 void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
 extern char &AMDGPUGlobalISelDivergenceLoweringID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
index cc4285f130fc8..50491247f0edd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
@@ -7,6 +7,7 @@
 
//===--===//
 
 #include "AMDGPUCodeGenPassBuilder.h"
+#include "AMDGPU.h"
 #include "AMDGPUISelDAGToDAG.h"
 #include "AMDGPUTargetMachine.h"
 #include "SIFixSGPRCopies.h"
@@ -40,5 +41,6 @@ void AMDGPUCodeGenPassBuilder::addAsmPrinter(AddMachinePass 
&addPass,
 Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const 
{
   addPass(AMDGPUISelDAGToDAGPass(TM));
   addPass(SIFixSGPRCopiesPass());
+  addPass(SILowerI1CopiesPass());
   return Error::success();
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index cbb19c003a264..af68eea665571 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -79,4 +79,5 @@ FUNCTION_PASS_WITH_PARAMS(
 #endif
 MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
 MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
+MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
 #undef MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 3cc0d314a7c5d..ad816ec29b02d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -400,7 +400,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeGlobalISel(*PR);
   initializeAMDGPUDAGToDAGISelLegacyPass(*PR);
   initializeGCNDPPCombinePass(*PR);
-  initializeSILowerI1CopiesPass(*PR);
+  initializeSILowerI1CopiesLegacyPass(*PR);
   initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
   initializeSILowerWWMCopiesPass(*PR);
   initializeAMDGPUMarkLastScratchLoadPass(*PR);
@@ -1269,7 +1269,7 @@ bool GCNPassConfig::addILPOpts() {
 bool GCNPassConfig::addInstSelector() {
   AMDGPUPassConfig::addInstSelector();
   addPass(&SIFixSGPRCopiesLegacyID);
-  addPass(createSILowerI1CopiesPass());
+  addPass(createSILowerI1CopiesLegacyPass());
   return false;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp 
b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index a9ee74dec1203..7d49358d44025 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cp

[llvm-branch-commits] [clang] release/19.x: [clang] Wire -fptrauth-returns to "ptrauth-returns" fn attribute. (#102416) (PR #102670)

2024-08-09 Thread via llvm-branch-commits


https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/102670

Backport 2eb6e30fe83ccce3cf01e596e73fa6385facd44b

Requested by: @asl

>From 89ac96cf1c2bcd057655fb4b8142e094f49031b5 Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha 
Date: Fri, 9 Aug 2024 11:49:50 -0700
Subject: [PATCH] [clang] Wire -fptrauth-returns to "ptrauth-returns" fn
 attribute. (#102416)

We already ended up with -fptrauth-returns, the feature macro, the lang
opt, and the actual backend lowering.

The only part left is threading it all through PointerAuthOptions, to
drive the addition of the "ptrauth-returns" attribute to generated
functions.
While there, do minor cleanup on ptrauth-function-attributes.c.

This also adds ptrauth_key_return_address to ptrauth.h.

(cherry picked from commit 2eb6e30fe83ccce3cf01e596e73fa6385facd44b)
---
 clang/include/clang/Basic/PointerAuthOptions.h   | 3 +++
 clang/lib/CodeGen/CodeGenFunction.cpp| 2 ++
 clang/lib/Frontend/CompilerInvocation.cpp| 4 +++-
 clang/lib/Headers/ptrauth.h  | 6 ++
 clang/test/CodeGen/ptrauth-function-attributes.c | 9 +++--
 5 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Basic/PointerAuthOptions.h 
b/clang/include/clang/Basic/PointerAuthOptions.h
index 417b4b00648c78..c0ab35bce5d84b 100644
--- a/clang/include/clang/Basic/PointerAuthOptions.h
+++ b/clang/include/clang/Basic/PointerAuthOptions.h
@@ -159,6 +159,9 @@ class PointerAuthSchema {
 };
 
 struct PointerAuthOptions {
+  /// Should return addresses be authenticated?
+  bool ReturnAddresses = false;
+
   /// Do indirect goto label addresses need to be authenticated?
   bool IndirectGotos = false;
 
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp 
b/clang/lib/CodeGen/CodeGenFunction.cpp
index af201554898f31..4dc57d0ff5b269 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -880,6 +880,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType 
RetTy,
 
   // Add pointer authentication attributes.
   const CodeGenOptions &CodeGenOpts = CGM.getCodeGenOpts();
+  if (CodeGenOpts.PointerAuth.ReturnAddresses)
+Fn->addFnAttr("ptrauth-returns");
   if (CodeGenOpts.PointerAuth.FunctionPointers)
 Fn->addFnAttr("ptrauth-calls");
   if (CodeGenOpts.PointerAuth.IndirectGotos)
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp 
b/clang/lib/Frontend/CompilerInvocation.cpp
index f6b6c44a4cab6a..fa5d076c202a36 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1505,13 +1505,15 @@ void CompilerInvocation::setDefaultPointerAuthOptions(
 PointerAuthSchema(Key::ASIA, false, Discrimination::Type);
   }
   Opts.IndirectGotos = LangOpts.PointerAuthIndirectGotos;
+  Opts.ReturnAddresses = LangOpts.PointerAuthReturns;
 }
 
 static void parsePointerAuthOptions(PointerAuthOptions &Opts,
 const LangOptions &LangOpts,
 const llvm::Triple &Triple,
 DiagnosticsEngine &Diags) {
-  if (!LangOpts.PointerAuthCalls && !LangOpts.PointerAuthIndirectGotos)
+  if (!LangOpts.PointerAuthCalls && !LangOpts.PointerAuthIndirectGotos &&
+  !LangOpts.PointerAuthReturns)
 return;
 
   CompilerInvocation::setDefaultPointerAuthOptions(Opts, LangOpts, Triple);
diff --git a/clang/lib/Headers/ptrauth.h b/clang/lib/Headers/ptrauth.h
index 4724155b0dc796..154b599862a8e2 100644
--- a/clang/lib/Headers/ptrauth.h
+++ b/clang/lib/Headers/ptrauth.h
@@ -28,6 +28,12 @@ typedef enum {
   /* A process-specific key which can be used to sign data pointers. */
   ptrauth_key_process_dependent_data = ptrauth_key_asdb,
 
+  /* The key used to sign return addresses on the stack.
+ The extra data is based on the storage address of the return address.
+ On AArch64, that is always the storage address of the return address + 8
+ (or, in other words, the value of the stack pointer on function entry) */
+  ptrauth_key_return_address = ptrauth_key_process_dependent_code,
+
   /* The key used to sign C function pointers.
  The extra data is always 0. */
   ptrauth_key_function_pointer = ptrauth_key_process_independent_code,
diff --git a/clang/test/CodeGen/ptrauth-function-attributes.c 
b/clang/test/CodeGen/ptrauth-function-attributes.c
index 6a09cd37bf4854..17ebf9d6e2e01c 100644
--- a/clang/test/CodeGen/ptrauth-function-attributes.c
+++ b/clang/test/CodeGen/ptrauth-function-attributes.c
@@ -1,11 +1,14 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios-emit-llvm %s  
-o - | FileCheck %s --check-prefixes=ALL,OFF
 // RUN: %clang_cc1 -triple arm64e-apple-ios   -emit-llvm %s  
-o - | FileCheck %s --check-prefixes=ALL,OFF
 // RUN: %clang_cc1 -triple aarch64-linux-gnu  -emit-llvm %s  
-o - | FileCheck %s --check-prefixes=ALL,OFF
 
-// RUN: %clang_cc1 -triple arm64-apple-ios  -fptrauth-calls   -emit-l

[llvm-branch-commits] [clang] release/19.x: [clang] Wire -fptrauth-returns to "ptrauth-returns" fn attribute. (#102416) (PR #102670)

2024-08-09 Thread via llvm-branch-commits


https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/102670
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] release/19.x: [clang] Wire -fptrauth-returns to "ptrauth-returns" fn attribute. (#102416) (PR #102670)

2024-08-09 Thread via llvm-branch-commits


llvmbot wrote:

@kovdan01 What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/102670
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] release/19.x: [clang] Wire -fptrauth-returns to "ptrauth-returns" fn attribute. (#102416) (PR #102670)

2024-08-09 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: None (llvmbot)


Changes

Backport 2eb6e30fe83ccce3cf01e596e73fa6385facd44b

Requested by: @asl

---
Full diff: https://github.com/llvm/llvm-project/pull/102670.diff


5 Files Affected:

- (modified) clang/include/clang/Basic/PointerAuthOptions.h (+3) 
- (modified) clang/lib/CodeGen/CodeGenFunction.cpp (+2) 
- (modified) clang/lib/Frontend/CompilerInvocation.cpp (+3-1) 
- (modified) clang/lib/Headers/ptrauth.h (+6) 
- (modified) clang/test/CodeGen/ptrauth-function-attributes.c (+7-2) 


``diff
diff --git a/clang/include/clang/Basic/PointerAuthOptions.h 
b/clang/include/clang/Basic/PointerAuthOptions.h
index 417b4b00648c78..c0ab35bce5d84b 100644
--- a/clang/include/clang/Basic/PointerAuthOptions.h
+++ b/clang/include/clang/Basic/PointerAuthOptions.h
@@ -159,6 +159,9 @@ class PointerAuthSchema {
 };
 
 struct PointerAuthOptions {
+  /// Should return addresses be authenticated?
+  bool ReturnAddresses = false;
+
   /// Do indirect goto label addresses need to be authenticated?
   bool IndirectGotos = false;
 
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp 
b/clang/lib/CodeGen/CodeGenFunction.cpp
index af201554898f31..4dc57d0ff5b269 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -880,6 +880,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType 
RetTy,
 
   // Add pointer authentication attributes.
   const CodeGenOptions &CodeGenOpts = CGM.getCodeGenOpts();
+  if (CodeGenOpts.PointerAuth.ReturnAddresses)
+Fn->addFnAttr("ptrauth-returns");
   if (CodeGenOpts.PointerAuth.FunctionPointers)
 Fn->addFnAttr("ptrauth-calls");
   if (CodeGenOpts.PointerAuth.IndirectGotos)
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp 
b/clang/lib/Frontend/CompilerInvocation.cpp
index f6b6c44a4cab6a..fa5d076c202a36 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1505,13 +1505,15 @@ void CompilerInvocation::setDefaultPointerAuthOptions(
 PointerAuthSchema(Key::ASIA, false, Discrimination::Type);
   }
   Opts.IndirectGotos = LangOpts.PointerAuthIndirectGotos;
+  Opts.ReturnAddresses = LangOpts.PointerAuthReturns;
 }
 
 static void parsePointerAuthOptions(PointerAuthOptions &Opts,
 const LangOptions &LangOpts,
 const llvm::Triple &Triple,
 DiagnosticsEngine &Diags) {
-  if (!LangOpts.PointerAuthCalls && !LangOpts.PointerAuthIndirectGotos)
+  if (!LangOpts.PointerAuthCalls && !LangOpts.PointerAuthIndirectGotos &&
+  !LangOpts.PointerAuthReturns)
 return;
 
   CompilerInvocation::setDefaultPointerAuthOptions(Opts, LangOpts, Triple);
diff --git a/clang/lib/Headers/ptrauth.h b/clang/lib/Headers/ptrauth.h
index 4724155b0dc796..154b599862a8e2 100644
--- a/clang/lib/Headers/ptrauth.h
+++ b/clang/lib/Headers/ptrauth.h
@@ -28,6 +28,12 @@ typedef enum {
   /* A process-specific key which can be used to sign data pointers. */
   ptrauth_key_process_dependent_data = ptrauth_key_asdb,
 
+  /* The key used to sign return addresses on the stack.
+ The extra data is based on the storage address of the return address.
+ On AArch64, that is always the storage address of the return address + 8
+ (or, in other words, the value of the stack pointer on function entry) */
+  ptrauth_key_return_address = ptrauth_key_process_dependent_code,
+
   /* The key used to sign C function pointers.
  The extra data is always 0. */
   ptrauth_key_function_pointer = ptrauth_key_process_independent_code,
diff --git a/clang/test/CodeGen/ptrauth-function-attributes.c 
b/clang/test/CodeGen/ptrauth-function-attributes.c
index 6a09cd37bf4854..17ebf9d6e2e01c 100644
--- a/clang/test/CodeGen/ptrauth-function-attributes.c
+++ b/clang/test/CodeGen/ptrauth-function-attributes.c
@@ -1,11 +1,14 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios-emit-llvm %s  
-o - | FileCheck %s --check-prefixes=ALL,OFF
 // RUN: %clang_cc1 -triple arm64e-apple-ios   -emit-llvm %s  
-o - | FileCheck %s --check-prefixes=ALL,OFF
 // RUN: %clang_cc1 -triple aarch64-linux-gnu  -emit-llvm %s  
-o - | FileCheck %s --check-prefixes=ALL,OFF
 
-// RUN: %clang_cc1 -triple arm64-apple-ios  -fptrauth-calls   -emit-llvm %s  
-o - | FileCheck %s --check-prefixes=ALL,CALLS
+// RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-calls  -emit-llvm %s  
-o - | FileCheck %s --check-prefixes=ALL,CALLS
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls  -emit-llvm %s  
-o - | FileCheck %s --check-prefixes=ALL,CALLS
 
+// RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-returns -emit-llvm %s 
-o - | FileCheck %s --check-prefixes=ALL,RETS
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-returns -emit-llvm %s 
-o - | FileCheck %s --check-prefixes=ALL,RETS
+
 // RUN: %clang_cc1 -triple arm64-apple-ios   -fptrauth-in

[llvm-branch-commits] [llvm] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics (PR #102599)

2024-08-09 Thread Stanislav Mekhanoshin via llvm-branch-commits


https://github.com/rampitec approved this pull request.


https://github.com/llvm/llvm-project/pull/102599
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [hmaptool] Implement simple string deduplication (PR #102677)

2024-08-09 Thread Shoaib Meenai via llvm-branch-commits


https://github.com/smeenai created 
https://github.com/llvm/llvm-project/pull/102677

This reduces the size of the generated header maps significantly (35%
measured internally). Further savings are possible through tail
deduplication, but the additional complication isn't worth the gain IMO.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [hmaptool] Implement simple string deduplication (PR #102677)

2024-08-09 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Shoaib Meenai (smeenai)


Changes

This reduces the size of the generated header maps significantly (35%
measured internally). Further savings are possible through tail
deduplication, but the additional complication isn't worth the gain IMO.


---
Full diff: https://github.com/llvm/llvm-project/pull/102677.diff


1 Files Affected:

- (modified) clang/utils/hmaptool/hmaptool (+23-8) 


``diff
diff --git a/clang/utils/hmaptool/hmaptool b/clang/utils/hmaptool/hmaptool
index aa400e3dd64e9..2ca769a549bed 100755
--- a/clang/utils/hmaptool/hmaptool
+++ b/clang/utils/hmaptool/hmaptool
@@ -110,6 +110,24 @@ class HeaderMap(object):
 yield (self.get_string(key_idx),
self.get_string(prefix_idx) + self.get_string(suffix_idx))
 
+class StringTable:
+def __init__(self):
+# A string table offset of 0 is interpreted as an empty bucket, so it's
+# important we don't assign an actual string to that offset.
+self.table = "\0"
+# For the same reason we don't want the empty string having a 0 offset.
+self.offsets = {}
+
+def add(self, string):
+offset = self.offsets.get(string)
+if offset:
+return offset
+
+offset = len(self.table)
+self.table += string + "\0"
+self.offsets[string] = offset
+return offset
+
 ###
 
 def action_dump(name, args):
@@ -182,7 +200,7 @@ def action_write(name, args):
 table = [(0, 0, 0)
  for i in range(num_buckets)]
 max_value_len = 0
-strtable = "\0"
+strtable = StringTable()
 for key,value in mappings.items():
 if not isinstance(key, str):
 key = key.decode('utf-8')
@@ -190,17 +208,14 @@ def action_write(name, args):
 value = value.decode('utf-8')
 max_value_len = max(max_value_len, len(value))
 
-key_idx = len(strtable)
-strtable += key + '\0'
+key_idx = strtable.add(key)
 prefix, suffix = os.path.split(value)
 # This guarantees that prefix + suffix == value in all cases, 
including when
 # prefix is empty or contains a trailing slash or suffix is empty 
(hence the use
 # of `len(value) - len(suffix)` instead of just `-len(suffix)`.
 prefix += value[len(prefix) : len(value) - len(suffix)]
-prefix_idx = len(strtable)
-strtable += prefix + '\0'
-suffix_idx = len(strtable)
-strtable += suffix + '\0'
+prefix_idx = strtable.add(prefix)
+suffix_idx = strtable.add(suffix)
 
 hash = hmap_hash(key)
 for i in range(num_buckets):
@@ -228,7 +243,7 @@ def action_write(name, args):
 f.write(struct.pack(header_fmt, *header))
 for bucket in table:
 f.write(struct.pack(bucket_fmt, *bucket))
-f.write(strtable.encode())
+f.write(strtable.table.encode())
 
 def action_tovfs(name, args):
 "convert a headermap to a VFS layout"

``




https://github.com/llvm/llvm-project/pull/102677
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] release/19.x: [clang] Wire -fptrauth-returns to "ptrauth-returns" fn attribute. (#102416) (PR #102670)

2024-08-09 Thread Daniil Kovalev via llvm-branch-commits


https://github.com/kovdan01 approved this pull request.


https://github.com/llvm/llvm-project/pull/102670
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [lldb] release/19.x: [lldb] Move definition of SBSaveCoreOptions dtor out of header (#102539) (PR #102680)

2024-08-09 Thread via llvm-branch-commits


https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/102680

Backport 101cf540e698529d3dd899d00111bcb654a3c12b

Requested by: @bulbazord

>From deb61f38595fbee906616175c7e5a9e323e30c8c Mon Sep 17 00:00:00 2001
From: Alex Langford 
Date: Fri, 9 Aug 2024 12:50:42 -0700
Subject: [PATCH] [lldb] Move definition of SBSaveCoreOptions dtor out of
 header (#102539)

This class is technically not usable in its current state. When you use
it in a simple C++ project, your compiler will complain about an
incomplete definition of SaveCoreOptions. Normally this isn't a problem,
other classes in the SBAPI do this. The difference is that
SBSaveCoreOptions has a default destructor in the header, so the
compiler will attempt to generate the code for the destructor with an
incomplete definition of the impl type.

All methods for every class, including constructors and destructors,
must have a separate implementation not in a header.

(cherry picked from commit 101cf540e698529d3dd899d00111bcb654a3c12b)
---
 lldb/include/lldb/API/SBSaveCoreOptions.h | 2 +-
 lldb/source/API/SBSaveCoreOptions.cpp | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/lldb/include/lldb/API/SBSaveCoreOptions.h 
b/lldb/include/lldb/API/SBSaveCoreOptions.h
index e77496bd3a4a0d..75506fd752e762 100644
--- a/lldb/include/lldb/API/SBSaveCoreOptions.h
+++ b/lldb/include/lldb/API/SBSaveCoreOptions.h
@@ -17,7 +17,7 @@ class LLDB_API SBSaveCoreOptions {
 public:
   SBSaveCoreOptions();
   SBSaveCoreOptions(const lldb::SBSaveCoreOptions &rhs);
-  ~SBSaveCoreOptions() = default;
+  ~SBSaveCoreOptions();
 
   const SBSaveCoreOptions &operator=(const lldb::SBSaveCoreOptions &rhs);
 
diff --git a/lldb/source/API/SBSaveCoreOptions.cpp 
b/lldb/source/API/SBSaveCoreOptions.cpp
index 6c3f74596203d6..19ca83f932bcf1 100644
--- a/lldb/source/API/SBSaveCoreOptions.cpp
+++ b/lldb/source/API/SBSaveCoreOptions.cpp
@@ -29,6 +29,8 @@ SBSaveCoreOptions::SBSaveCoreOptions(const SBSaveCoreOptions 
&rhs) {
   m_opaque_up = clone(rhs.m_opaque_up);
 }
 
+SBSaveCoreOptions::~SBSaveCoreOptions() = default;
+
 const SBSaveCoreOptions &
 SBSaveCoreOptions::operator=(const SBSaveCoreOptions &rhs) {
   LLDB_INSTRUMENT_VA(this, rhs);

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [lldb] release/19.x: [lldb] Move definition of SBSaveCoreOptions dtor out of header (#102539) (PR #102680)

2024-08-09 Thread via llvm-branch-commits


https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/102680
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [lldb] release/19.x: [lldb] Move definition of SBSaveCoreOptions dtor out of header (#102539) (PR #102680)

2024-08-09 Thread via llvm-branch-commits


llvmbot wrote:

@clayborg What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/102680
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [lldb] release/19.x: [lldb] Move definition of SBSaveCoreOptions dtor out of header (#102539) (PR #102680)

2024-08-09 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-lldb

Author: None (llvmbot)


Changes

Backport 101cf540e698529d3dd899d00111bcb654a3c12b

Requested by: @bulbazord

---
Full diff: https://github.com/llvm/llvm-project/pull/102680.diff


2 Files Affected:

- (modified) lldb/include/lldb/API/SBSaveCoreOptions.h (+1-1) 
- (modified) lldb/source/API/SBSaveCoreOptions.cpp (+2) 


``diff
diff --git a/lldb/include/lldb/API/SBSaveCoreOptions.h 
b/lldb/include/lldb/API/SBSaveCoreOptions.h
index e77496bd3a4a0d..75506fd752e762 100644
--- a/lldb/include/lldb/API/SBSaveCoreOptions.h
+++ b/lldb/include/lldb/API/SBSaveCoreOptions.h
@@ -17,7 +17,7 @@ class LLDB_API SBSaveCoreOptions {
 public:
   SBSaveCoreOptions();
   SBSaveCoreOptions(const lldb::SBSaveCoreOptions &rhs);
-  ~SBSaveCoreOptions() = default;
+  ~SBSaveCoreOptions();
 
   const SBSaveCoreOptions &operator=(const lldb::SBSaveCoreOptions &rhs);
 
diff --git a/lldb/source/API/SBSaveCoreOptions.cpp 
b/lldb/source/API/SBSaveCoreOptions.cpp
index 6c3f74596203d6..19ca83f932bcf1 100644
--- a/lldb/source/API/SBSaveCoreOptions.cpp
+++ b/lldb/source/API/SBSaveCoreOptions.cpp
@@ -29,6 +29,8 @@ SBSaveCoreOptions::SBSaveCoreOptions(const SBSaveCoreOptions 
&rhs) {
   m_opaque_up = clone(rhs.m_opaque_up);
 }
 
+SBSaveCoreOptions::~SBSaveCoreOptions() = default;
+
 const SBSaveCoreOptions &
 SBSaveCoreOptions::operator=(const SBSaveCoreOptions &rhs) {
   LLDB_INSTRUMENT_VA(this, rhs);

``




https://github.com/llvm/llvm-project/pull/102680
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)

2024-08-09 Thread Stanislav Mekhanoshin via llvm-branch-commits



@@ -22,6 +22,7 @@ MODULE_PASS("amdgpu-lower-buffer-fat-pointers",
 AMDGPULowerBufferFatPointersPass(*this))
 MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass())
 MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this))
+MODULE_PASS("amdgpu-perf-hint", AMDGPUPerfHintAnalysisPass(*static_cast(this)))

rampitec wrote:

Exceeds 80 chars per line.

https://github.com/llvm/llvm-project/pull/102645
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)

2024-08-09 Thread Stanislav Mekhanoshin via llvm-branch-commits



@@ -413,18 +439,57 @@ bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
   return Changed;
 }
 
-bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
-  auto FI = FIM.find(F);
-  if (FI == FIM.end())
-return false;
+bool AMDGPUPerfHintAnalysis::run(const GCNTargetMachine &TM,
+ LazyCallGraph &CG) {
 
-  return AMDGPUPerfHint::isMemBound(FI->second);
+  SmallVector Worklist;
+  CG.buildRefSCCs();
+  for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
+for (LazyCallGraph::SCC &SCC : RC) {
+  if (SCC.size() != 1)
+continue;
+  Function &F = SCC.begin()->getFunction();
+  if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage())

rampitec wrote:

Why is it limited to internal linkage?

https://github.com/llvm/llvm-project/pull/102645
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPUAnnotateUniformValues to new pass manager (PR #102654)

2024-08-09 Thread Stanislav Mekhanoshin via llvm-branch-commits


https://github.com/rampitec approved this pull request.


https://github.com/llvm/llvm-project/pull/102654
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port SILowerI1Copies to new pass manager (PR #102663)

2024-08-09 Thread Stanislav Mekhanoshin via llvm-branch-commits


https://github.com/rampitec approved this pull request.


https://github.com/llvm/llvm-project/pull/102663
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] release/19.x: [clang] Wire -fptrauth-returns to "ptrauth-returns" fn attribute. (#102416) (PR #102670)

2024-08-09 Thread Anton Korobeynikov via llvm-branch-commits


https://github.com/asl approved this pull request.

This is one of two small frontend changes require to close the chain of changes 
required for end-to-end support of pointer authentication in LLVM 19.

The change does not affect any other target and essentially just propagate 
command line options down to attributes.

https://github.com/llvm/llvm-project/pull/102670
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [lldb] release/19.x: [lldb] Move definition of SBSaveCoreOptions dtor out of header (#102539) (PR #102680)

2024-08-09 Thread Greg Clayton via llvm-branch-commits


https://github.com/clayborg approved this pull request.


https://github.com/llvm/llvm-project/pull/102680
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)

2024-08-09 Thread Lei Wang via llvm-branch-commits



@@ -1441,6 +1458,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext 
&BC) {
   StaleSampleCount += SampleCount;
   ++NumAllStaleFunctions;
 }
+
+if (opts::ShowDensity) {
+  uint64_t Size = Function.getSize();
+  // In case of BOLT split functions registered in BAT, executed traces are
+  // automatically attributed to the main fragment. Add up function sizes
+  // for all fragments.
+  if (IsHotParentOfBOLTSplitFunction)
+for (const BinaryFunction *Fragment : Function.getFragments())
+  Size += Fragment->getSize();
+  double Density = (double)1.0 * Function.getExecutedBytes() / Size;
+  FuncDensityList.emplace_back(Density, SampleCount);

wlei-llvm wrote:

If there is no special reason to use `SampleCount` here, how about using 
`ExecutedBytes` to be consistent?  We use the same value for 
FDO(https://github.com/llvm/llvm-project/blob/main/llvm/tools/llvm-profgen/ProfileGenerator.cpp#L807-L809).

https://github.com/llvm/llvm-project/pull/101094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)

2024-08-09 Thread Amir Ayupov via llvm-branch-commits



@@ -1441,6 +1458,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext 
&BC) {
   StaleSampleCount += SampleCount;
   ++NumAllStaleFunctions;
 }
+
+if (opts::ShowDensity) {
+  uint64_t Size = Function.getSize();
+  // In case of BOLT split functions registered in BAT, executed traces are
+  // automatically attributed to the main fragment. Add up function sizes
+  // for all fragments.
+  if (IsHotParentOfBOLTSplitFunction)
+for (const BinaryFunction *Fragment : Function.getFragments())
+  Size += Fragment->getSize();
+  double Density = (double)1.0 * Function.getExecutedBytes() / Size;
+  FuncDensityList.emplace_back(Density, SampleCount);

aaupov wrote:

I'm neutral about using ExecutedBytes but we use SampleCount for other 
profile-wide stats, e.g. stale samples percent. 

https://github.com/llvm/llvm-project/pull/101094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)

2024-08-09 Thread Lei Wang via llvm-branch-commits



@@ -1441,6 +1458,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext 
&BC) {
   StaleSampleCount += SampleCount;
   ++NumAllStaleFunctions;
 }
+
+if (opts::ShowDensity) {
+  uint64_t Size = Function.getSize();
+  // In case of BOLT split functions registered in BAT, executed traces are
+  // automatically attributed to the main fragment. Add up function sizes
+  // for all fragments.
+  if (IsHotParentOfBOLTSplitFunction)
+for (const BinaryFunction *Fragment : Function.getFragments())
+  Size += Fragment->getSize();
+  double Density = (double)1.0 * Function.getExecutedBytes() / Size;
+  FuncDensityList.emplace_back(Density, SampleCount);

wlei-llvm wrote:

> I'm neutral about using ExecutedBytes but we use SampleCount for other 
> profile-wide stats, e.g. stale samples percent.

Sounds good. No strong option, it shouldn't affect to the density value.

https://github.com/llvm/llvm-project/pull/101094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)

2024-08-09 Thread via llvm-branch-commits



@@ -1441,6 +1458,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext 
&BC) {
   StaleSampleCount += SampleCount;
   ++NumAllStaleFunctions;
 }
+
+if (opts::ShowDensity) {
+  uint64_t Size = Function.getSize();
+  // In case of BOLT split functions registered in BAT, executed traces are
+  // automatically attributed to the main fragment. Add up function sizes
+  // for all fragments.
+  if (IsHotParentOfBOLTSplitFunction)
+for (const BinaryFunction *Fragment : Function.getFragments())
+  Size += Fragment->getSize();
+  double Density = (double)1.0 * Function.getExecutedBytes() / Size;
+  FuncDensityList.emplace_back(Density, SampleCount);

WenleiHe wrote:

Don't have a good idea, but `ExecutedBytes` confused me as byte implied static 
size. It gives the impression of byte-wise coverage.  
`TotalSamplesInBytes`/`SampleCountInBytes`?

https://github.com/llvm/llvm-project/pull/101094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits



@@ -413,18 +439,57 @@ bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
   return Changed;
 }
 
-bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
-  auto FI = FIM.find(F);
-  if (FI == FIM.end())
-return false;
+bool AMDGPUPerfHintAnalysis::run(const GCNTargetMachine &TM,
+ LazyCallGraph &CG) {
 
-  return AMDGPUPerfHint::isMemBound(FI->second);
+  SmallVector Worklist;
+  CG.buildRefSCCs();
+  for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
+for (LazyCallGraph::SCC &SCC : RC) {
+  if (SCC.size() != 1)
+continue;
+  Function &F = SCC.begin()->getFunction();
+  if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage())

arsenm wrote:

Copied from FunctionAttrs, but I think this meant to really be checking for 
interposable linkage (and I thought I dropped this before posting?)

https://github.com/llvm/llvm-project/pull/102645
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port SILowerI1Copies to new pass manager (PR #102663)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

### Merge activity

* **Aug 9, 10:57 PM EDT**: @arsenm started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/102663).


https://github.com/llvm/llvm-project/pull/102663
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPUAnnotateUniformValues to new pass manager (PR #102654)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

### Merge activity

* **Aug 9, 10:57 PM EDT**: @arsenm started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/102654).


https://github.com/llvm/llvm-project/pull/102654
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)

2024-08-09 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/102645

>From a24983c4e848a0d1520c0fa25483bed76a7acbd0 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 9 Aug 2024 17:27:53 +0400
Subject: [PATCH] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager

This was much more difficult than I anticipated. The pass is
not in a good state, with poor test coverage. The legacy PM
does seem to be relying on maintaining the map state between
different SCCs, which seems bad. The pass is going out of its
way to avoid putting the attributes it introduces onto non-callee
functions. If it just added them, we could use them directly
instead of relying on the map, I would think.

The NewPM path uses a ModulePass; I'm not sure if we should be
using CGSCC here but there seems to be some missing infrastructure
to support backend defined ones.
---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |   4 +-
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |   2 +-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |   3 +
 .../Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp  | 112 ++
 .../Target/AMDGPU/AMDGPUPerfHintAnalysis.h|  62 ++
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   3 +-
 llvm/test/CodeGen/AMDGPU/perfhint.ll  |   1 +
 7 files changed, 137 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 195e2a19214e80..5b8d37a8ae7944 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -209,8 +209,8 @@ extern char &SIPreAllocateWWMRegsID;
 void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
 extern char &AMDGPUImageIntrinsicOptimizerID;
 
-void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
-extern char &AMDGPUPerfHintAnalysisID;
+void initializeAMDGPUPerfHintAnalysisLegacyPass(PassRegistry &);
+extern char &AMDGPUPerfHintAnalysisLegacyID;
 
 void initializeGCNRegPressurePrinterPass(PassRegistry &);
 extern char &GCNRegPressurePrinterID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 8579774f522309..bbb4573655ab79 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -102,7 +102,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, 
"amdgpu-isel",
   "AMDGPU DAG->DAG Pattern Instruction Selection", false,
   false)
 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
-INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysisLegacy)
 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
 #ifdef EXPENSIVE_CHECKS
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index b6a6c33d85f83c..7188c8953254c0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -22,6 +22,9 @@ MODULE_PASS("amdgpu-lower-buffer-fat-pointers",
 AMDGPULowerBufferFatPointersPass(*this))
 MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass())
 MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this))
+MODULE_PASS("amdgpu-perf-hint",
+AMDGPUPerfHintAnalysisPass(
+  *static_cast(this)))
 MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass())
 MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass())
 #undef MODULE_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
index 1213d5e0b41db1..f8943962cca069 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -12,12 +12,15 @@
 ///
 
//===--===//
 
-#include "AMDGPU.h"
 #include "AMDGPUPerfHintAnalysis.h"
+#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -54,12 +57,6 @@ static cl::opt
 STATISTIC(NumMemBound, "Number of functions marked as memory bound");
 STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave");
 
-char llvm::AMDGPUPerfHintAnalysis::ID = 0;
-char &llvm::AMDGPUPerfHintAnalysisID = AMDGPUPerfHintAnalysis::ID;
-
-INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE,
-"Analysis if a function is memory bound", true, true)
-
 namespace {
 
 struct AMDGPUPerfHint {
@@ -67,7 +64,7 @@ struct AMDGPUPerfHint {
 
 public:
   AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_,

[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)

2024-08-09 Thread Shilei Tian via llvm-branch-commits


https://github.com/shiltian approved this pull request.


https://github.com/llvm/llvm-project/pull/102645
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)

2024-08-09 Thread Shilei Tian via llvm-branch-commits


https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/102645
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)

2024-08-09 Thread Amir Ayupov via llvm-branch-commits



@@ -1441,6 +1458,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext 
&BC) {
   StaleSampleCount += SampleCount;
   ++NumAllStaleFunctions;
 }
+
+if (opts::ShowDensity) {
+  uint64_t Size = Function.getSize();
+  // In case of BOLT split functions registered in BAT, executed traces are
+  // automatically attributed to the main fragment. Add up function sizes
+  // for all fragments.
+  if (IsHotParentOfBOLTSplitFunction)
+for (const BinaryFunction *Fragment : Function.getFragments())
+  Size += Fragment->getSize();
+  double Density = (double)1.0 * Function.getExecutedBytes() / Size;
+  FuncDensityList.emplace_back(Density, SampleCount);

aaupov wrote:

Comments above the variable and the method say it's dynamically executed bytes, 
but will rename them to avoid confusion.

> TotalSamplesInBytes/SampleCountInBytes?

What do you mean by that?

https://github.com/llvm/llvm-project/pull/101094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] [Clang][OMPX] Add the code generation for multi-dim `thread_limit` clause (PR #102717)

2024-08-09 Thread Shilei Tian via llvm-branch-commits


https://github.com/shiltian created 
https://github.com/llvm/llvm-project/pull/102717

None

>From 3ec01daaa2d43350b2c835d4173ede441ca004a1 Mon Sep 17 00:00:00 2001
From: Shilei Tian 
Date: Fri, 9 Aug 2024 23:25:21 -0400
Subject: [PATCH] [Clang][OMPX] Add the code generation for multi-dim
 `thread_limit` clause

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 29 ++---
 clang/test/OpenMP/target_teams_codegen.cpp| 12 +++
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h   | 26 
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 31 +++
 4 files changed, 54 insertions(+), 44 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 8c5e4aa9c037e2..6c0c8646898cc6 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9588,15 +9588,17 @@ static void genMapInfo(const OMPExecutableDirective &D, 
CodeGenFunction &CGF,
   genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
 }
 
-static void emitNumTeamsForBareTargetDirective(
+template 
+static void emitClauseForBareTargetDirective(
 CodeGenFunction &CGF, const OMPExecutableDirective &D,
-llvm::SmallVectorImpl &NumTeams) {
-  const auto *C = D.getSingleClause();
-  assert(!C->varlist_empty() && "ompx_bare requires explicit num_teams");
-  CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
-  for (auto *E : C->getNumTeams()) {
+llvm::SmallVectorImpl &Valuess) {
+  const auto *C = D.getSingleClause();
+  assert(!C->varlist_empty() &&
+ "ompx_bare requires explicit num_teams and thread_limit");
+  CodeGenFunction::RunCleanupsScope Scope(CGF);
+  for (auto *E : C->varlist()) {
 llvm::Value *V = CGF.EmitScalarExpr(E);
-NumTeams.push_back(
+Valuess.push_back(
 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
   }
 }
@@ -9672,14 +9674,17 @@ static void emitTargetCallKernelLaunch(
 
 bool IsBare = D.hasClausesOfKind();
 SmallVector NumTeams;
-if (IsBare)
-  emitNumTeamsForBareTargetDirective(CGF, D, NumTeams);
-else
+SmallVector NumThreads;
+if (IsBare) {
+  emitClauseForBareTargetDirective(CGF, D, NumTeams);
+  emitClauseForBareTargetDirective(CGF, D,
+ NumThreads);
+} else {
   NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
+  NumThreads.push_back(OMPRuntime->emitNumThreadsForTargetDirective(CGF, 
D));
+}
 
 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
-llvm::Value *NumThreads =
-OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
 llvm::Value *NumIterations =
 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
diff --git a/clang/test/OpenMP/target_teams_codegen.cpp 
b/clang/test/OpenMP/target_teams_codegen.cpp
index 9cab8eef148833..13d44e127201bd 100644
--- a/clang/test/OpenMP/target_teams_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_codegen.cpp
@@ -127,13 +127,13 @@ int foo(int n) {
 aa += 1;
   }
 
-  #pragma omp target teams ompx_bare num_teams(1, 2) thread_limit(1)
+  #pragma omp target teams ompx_bare num_teams(1, 2) thread_limit(1, 2)
   {
 a += 1;
 aa += 1;
   }
 
-  #pragma omp target teams ompx_bare num_teams(1, 2, 3) thread_limit(1)
+  #pragma omp target teams ompx_bare num_teams(1, 2, 3) thread_limit(1, 2, 3)
   {
 a += 1;
 aa += 1;
@@ -667,7 +667,7 @@ int bar(int n){
 // CHECK1-NEXT:[[TMP144:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 10
 // CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 0], ptr [[TMP144]], 
align 4
 // CHECK1-NEXT:[[TMP145:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 11
-// CHECK1-NEXT:store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP145]], 
align 4
+// CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 0], ptr [[TMP145]], 
align 4
 // CHECK1-NEXT:[[TMP146:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 12
 // CHECK1-NEXT:store i32 0, ptr [[TMP146]], align 4
 // CHECK1-NEXT:[[TMP147:%.*]] = call i32 @__tgt_target_kernel(ptr 
@[[GLOB1]], i64 -1, i32 1, i32 1, ptr 
@.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.region_id, ptr 
[[KERNEL_ARGS29]])
@@ -720,7 +720,7 @@ int bar(int n){
 // CHECK1-NEXT:[[TMP171:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 10
 // CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 3], ptr [[TMP171]], 
align 4
 // CHECK1-NEXT:[[TMP172:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 11
-// CHECK1-NEXT:store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP172]], 
align 4
+// CHECK1-NEXT:

[llvm-branch-commits] [clang] [llvm] [Clang][OMPX] Add the code generation for multi-dim `thread_limit` clause (PR #102717)

2024-08-09 Thread Shilei Tian via llvm-branch-commits


https://github.com/shiltian ready_for_review 
https://github.com/llvm/llvm-project/pull/102717
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] [Clang][OMPX] Add the code generation for multi-dim `thread_limit` clause (PR #102717)

2024-08-09 Thread Shilei Tian via llvm-branch-commits


shiltian wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/102717?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#102717** https://app.graphite.dev/github/pr/llvm/llvm-project/102717?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#102715** https://app.graphite.dev/github/pr/llvm/llvm-project/102715?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @shiltian and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/102717
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] [Clang][OMPX] Add the code generation for multi-dim `thread_limit` clause (PR #102717)

2024-08-09 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-flang-openmp

Author: Shilei Tian (shiltian)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/102717.diff


4 Files Affected:

- (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+17-12) 
- (modified) clang/test/OpenMP/target_teams_codegen.cpp (+6-6) 
- (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+13-13) 
- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+18-13) 


``diff
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 8c5e4aa9c037e2..6c0c8646898cc6 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9588,15 +9588,17 @@ static void genMapInfo(const OMPExecutableDirective &D, 
CodeGenFunction &CGF,
   genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
 }
 
-static void emitNumTeamsForBareTargetDirective(
+template 
+static void emitClauseForBareTargetDirective(
 CodeGenFunction &CGF, const OMPExecutableDirective &D,
-llvm::SmallVectorImpl &NumTeams) {
-  const auto *C = D.getSingleClause();
-  assert(!C->varlist_empty() && "ompx_bare requires explicit num_teams");
-  CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
-  for (auto *E : C->getNumTeams()) {
+llvm::SmallVectorImpl &Valuess) {
+  const auto *C = D.getSingleClause();
+  assert(!C->varlist_empty() &&
+ "ompx_bare requires explicit num_teams and thread_limit");
+  CodeGenFunction::RunCleanupsScope Scope(CGF);
+  for (auto *E : C->varlist()) {
 llvm::Value *V = CGF.EmitScalarExpr(E);
-NumTeams.push_back(
+Valuess.push_back(
 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
   }
 }
@@ -9672,14 +9674,17 @@ static void emitTargetCallKernelLaunch(
 
 bool IsBare = D.hasClausesOfKind();
 SmallVector NumTeams;
-if (IsBare)
-  emitNumTeamsForBareTargetDirective(CGF, D, NumTeams);
-else
+SmallVector NumThreads;
+if (IsBare) {
+  emitClauseForBareTargetDirective(CGF, D, NumTeams);
+  emitClauseForBareTargetDirective(CGF, D,
+ NumThreads);
+} else {
   NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
+  NumThreads.push_back(OMPRuntime->emitNumThreadsForTargetDirective(CGF, 
D));
+}
 
 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
-llvm::Value *NumThreads =
-OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
 llvm::Value *NumIterations =
 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
diff --git a/clang/test/OpenMP/target_teams_codegen.cpp 
b/clang/test/OpenMP/target_teams_codegen.cpp
index 9cab8eef148833..13d44e127201bd 100644
--- a/clang/test/OpenMP/target_teams_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_codegen.cpp
@@ -127,13 +127,13 @@ int foo(int n) {
 aa += 1;
   }
 
-  #pragma omp target teams ompx_bare num_teams(1, 2) thread_limit(1)
+  #pragma omp target teams ompx_bare num_teams(1, 2) thread_limit(1, 2)
   {
 a += 1;
 aa += 1;
   }
 
-  #pragma omp target teams ompx_bare num_teams(1, 2, 3) thread_limit(1)
+  #pragma omp target teams ompx_bare num_teams(1, 2, 3) thread_limit(1, 2, 3)
   {
 a += 1;
 aa += 1;
@@ -667,7 +667,7 @@ int bar(int n){
 // CHECK1-NEXT:[[TMP144:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 10
 // CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 0], ptr [[TMP144]], 
align 4
 // CHECK1-NEXT:[[TMP145:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 11
-// CHECK1-NEXT:store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP145]], 
align 4
+// CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 0], ptr [[TMP145]], 
align 4
 // CHECK1-NEXT:[[TMP146:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 12
 // CHECK1-NEXT:store i32 0, ptr [[TMP146]], align 4
 // CHECK1-NEXT:[[TMP147:%.*]] = call i32 @__tgt_target_kernel(ptr 
@[[GLOB1]], i64 -1, i32 1, i32 1, ptr 
@.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.region_id, ptr 
[[KERNEL_ARGS29]])
@@ -720,7 +720,7 @@ int bar(int n){
 // CHECK1-NEXT:[[TMP171:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 10
 // CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 3], ptr [[TMP171]], 
align 4
 // CHECK1-NEXT:[[TMP172:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 11
-// CHECK1-NEXT:store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP172]], 
align 4
+// CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 3], ptr [[TMP172]], 
align 4
 // CHECK1-NEXT:[[TMP173:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERN

[llvm-branch-commits] [clang] [llvm] [Clang][OMPX] Add the code generation for multi-dim `thread_limit` clause (PR #102717)

2024-08-09 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Shilei Tian (shiltian)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/102717.diff


4 Files Affected:

- (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+17-12) 
- (modified) clang/test/OpenMP/target_teams_codegen.cpp (+6-6) 
- (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+13-13) 
- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+18-13) 


``diff
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 8c5e4aa9c037e2..6c0c8646898cc6 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9588,15 +9588,17 @@ static void genMapInfo(const OMPExecutableDirective &D, 
CodeGenFunction &CGF,
   genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
 }
 
-static void emitNumTeamsForBareTargetDirective(
+template 
+static void emitClauseForBareTargetDirective(
 CodeGenFunction &CGF, const OMPExecutableDirective &D,
-llvm::SmallVectorImpl &NumTeams) {
-  const auto *C = D.getSingleClause();
-  assert(!C->varlist_empty() && "ompx_bare requires explicit num_teams");
-  CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
-  for (auto *E : C->getNumTeams()) {
+llvm::SmallVectorImpl &Valuess) {
+  const auto *C = D.getSingleClause();
+  assert(!C->varlist_empty() &&
+ "ompx_bare requires explicit num_teams and thread_limit");
+  CodeGenFunction::RunCleanupsScope Scope(CGF);
+  for (auto *E : C->varlist()) {
 llvm::Value *V = CGF.EmitScalarExpr(E);
-NumTeams.push_back(
+Valuess.push_back(
 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
   }
 }
@@ -9672,14 +9674,17 @@ static void emitTargetCallKernelLaunch(
 
 bool IsBare = D.hasClausesOfKind();
 SmallVector NumTeams;
-if (IsBare)
-  emitNumTeamsForBareTargetDirective(CGF, D, NumTeams);
-else
+SmallVector NumThreads;
+if (IsBare) {
+  emitClauseForBareTargetDirective(CGF, D, NumTeams);
+  emitClauseForBareTargetDirective(CGF, D,
+ NumThreads);
+} else {
   NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
+  NumThreads.push_back(OMPRuntime->emitNumThreadsForTargetDirective(CGF, 
D));
+}
 
 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
-llvm::Value *NumThreads =
-OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
 llvm::Value *NumIterations =
 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
diff --git a/clang/test/OpenMP/target_teams_codegen.cpp 
b/clang/test/OpenMP/target_teams_codegen.cpp
index 9cab8eef148833..13d44e127201bd 100644
--- a/clang/test/OpenMP/target_teams_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_codegen.cpp
@@ -127,13 +127,13 @@ int foo(int n) {
 aa += 1;
   }
 
-  #pragma omp target teams ompx_bare num_teams(1, 2) thread_limit(1)
+  #pragma omp target teams ompx_bare num_teams(1, 2) thread_limit(1, 2)
   {
 a += 1;
 aa += 1;
   }
 
-  #pragma omp target teams ompx_bare num_teams(1, 2, 3) thread_limit(1)
+  #pragma omp target teams ompx_bare num_teams(1, 2, 3) thread_limit(1, 2, 3)
   {
 a += 1;
 aa += 1;
@@ -667,7 +667,7 @@ int bar(int n){
 // CHECK1-NEXT:[[TMP144:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 10
 // CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 0], ptr [[TMP144]], 
align 4
 // CHECK1-NEXT:[[TMP145:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 11
-// CHECK1-NEXT:store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP145]], 
align 4
+// CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 0], ptr [[TMP145]], 
align 4
 // CHECK1-NEXT:[[TMP146:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 12
 // CHECK1-NEXT:store i32 0, ptr [[TMP146]], align 4
 // CHECK1-NEXT:[[TMP147:%.*]] = call i32 @__tgt_target_kernel(ptr 
@[[GLOB1]], i64 -1, i32 1, i32 1, ptr 
@.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.region_id, ptr 
[[KERNEL_ARGS29]])
@@ -720,7 +720,7 @@ int bar(int n){
 // CHECK1-NEXT:[[TMP171:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 10
 // CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 3], ptr [[TMP171]], 
align 4
 // CHECK1-NEXT:[[TMP172:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 11
-// CHECK1-NEXT:store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP172]], 
align 4
+// CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 3], ptr [[TMP172]], 
align 4
 // CHECK1-NEXT:[[TMP173:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS

[llvm-branch-commits] [llvm] release/19.x: [AIX]export function descriptor symbols related to template functions. (#101920) (PR #102407)

2024-08-09 Thread Hubert Tong via llvm-branch-commits


https://github.com/hubert-reinterpretcast approved this pull request.

LGTM!

https://github.com/llvm/llvm-project/pull/102407
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

90 matches

Mail list logo