[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/100523 >From ef1f347a20205255e14735e0809c2168f4124556 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jul 2024 10:38:11 +0400 Subject: [PATCH] TTI: Check legalization cost of abs nodes Also adjust the AMDGPU cost. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 32 +- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 9 +- llvm/test/Analysis/CostModel/AMDGPU/abs.ll| 368 +- .../Analysis/CostModel/AMDGPU/arith-ssat.ll | 32 +- .../Analysis/CostModel/AMDGPU/arith-usat.ll | 32 +- 5 files changed, 242 insertions(+), 231 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 8a14c8a37577ad..c2bc1353ee8838 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2120,20 +2120,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::vector_reduce_fminimum: return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID), VecOpTy, ICA.getFlags(), CostKind); -case Intrinsic::abs: { - // abs(X) = select(icmp(X,0),X,sub(0,X)) - Type *CondTy = RetTy->getWithNewBitWidth(1); - CmpInst::Predicate Pred = CmpInst::ICMP_SGT; - InstructionCost Cost = 0; - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - Pred, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - Pred, CostKind); - // TODO: Should we add an OperandValueProperties::OP_Zero property? - Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None}); - return Cost; -} +case Intrinsic::abs: + ISD = ISD::ABS; + break; case Intrinsic::smax: ISD = ISD::SMAX; break; @@ -2402,6 +2391,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); return Cost; } +case Intrinsic::abs: { + // abs(X) = select(icmp(X,0),X,sub(0,X)) + Type *CondTy = RetTy->getWithNewBitWidth(1); + CmpInst::Predicate Pred = CmpInst::ICMP_SGT; + InstructionCost Cost = 0; + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + Pred, CostKind); + // TODO: Should we add an OperandValueProperties::OP_Zero property? + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Sub, RetTy, CostKind, + {TTI::OK_UniformConstantValue, TTI::OP_None}); + return Cost; +} case Intrinsic::fptosi_sat: case Intrinsic::fptoui_sat: { if (Tys.empty()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 8d4ff64ac5adcf..c6aadf0b503012 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -696,6 +696,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) { case Intrinsic::usub_sat: case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: + case Intrinsic::abs: return true; default: return false; @@ -724,7 +725,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, if (SLT == MVT::f64) return LT.first * NElts * get64BitInstrCost(CostKind); - if ((ST->has16BitInsts() && SLT == MVT::f16) || + if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) || (ST->hasPackedFP32Ops() && SLT == MVT::f32)) NElts = (NElts + 1) / 2; @@ -752,11 +753,17 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, case Intrinsic::usub_sat: case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { +// TODO: Full rate for i32/i16 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16}; if (any_of(ValidSatTys, [<](MVT M) { return M == LT.second; })) NElts = 1; break; } + case Intrinsic::abs: +// Expansion takes 2 instructions for VALU +if (SLT == MVT::i16 || SLT == MVT::i32) + InstRate = 2 * getFullRateInstrCost(); +break; default: break; } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll index f65615b07abc0f..b86e99558377bb 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll @@ -14,116 +14,116 @@ define void @abs_nonpoison() { ; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call
[llvm-branch-commits] [clang] [llvm] release/19.x: Revert "demangle function names in trace files (#87626)" (PR #102552)
https://github.com/nikic approved this pull request. https://github.com/llvm/llvm-project/pull/102552 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TTI: Check legalization cost of abs nodes (PR #100523)
arsenm wrote: ### Merge activity * **Aug 9, 4:27 AM EDT**: @arsenm started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/100523). https://github.com/llvm/llvm-project/pull/100523 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Correct costs of saturating add/sub intrinsics (PR #100808)
arsenm wrote: ### Merge activity * **Aug 9, 4:27 AM EDT**: @arsenm started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/100808). https://github.com/llvm/llvm-project/pull/100808 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LAA] Refine stride checks for SCEVs during dependence analysis. (#99… (PR #102201)
https://github.com/Meinersbur approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/102201 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics (PR #102599)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/102599 This will be needed to continue generating the raw instruction in the flat case. >From be3f530768b923491b5747bac5b005779bd46a7e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 9 Aug 2024 14:51:41 +0400 Subject: [PATCH] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics This will be needed to continue generating the raw instruction in the flat case. --- llvm/lib/IR/AutoUpgrade.cpp| 13 +- llvm/test/Bitcode/amdgcn-atomic.ll | 39 -- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index ec719754183d5d..3f6ccbebd35ef2 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -34,9 +34,11 @@ #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" +#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Regex.h" @@ -4096,11 +4098,20 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, AtomicRMWInst *RMW = Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID); - if (PtrTy->getAddressSpace() != 3) { + unsigned AddrSpace = PtrTy->getAddressSpace(); + if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) { RMW->setMetadata("amdgpu.no.fine.grained.memory", MDNode::get(F->getContext(), {})); } + if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) { +MDBuilder MDB(F->getContext()); +MDNode *RangeNotPrivate = +MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS), +APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1)); +RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate); + } + if (IsVolatile) RMW->setVolatile(true); diff --git a/llvm/test/Bitcode/amdgcn-atomic.ll b/llvm/test/Bitcode/amdgcn-atomic.ll index a114c27bafd4a2..5feba38e635f32 100644 --- a/llvm/test/Bitcode/amdgcn-atomic.ll +++ b/llvm/test/Bitcode/amdgcn-atomic.ll @@ -2,10 +2,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 0, i32 0, i1 false) - ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 false) ; CHECK: atomicrmw uinc_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4{{$}} @@ -26,10 +26,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr } define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 0, i1 false) - ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 false) ; CHECK: atomicrmw udec_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4{{$}} @@ -51,49 +51,49 @@ define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr ; Test some invalid ordering handling define void @ordering(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 -1, i32 0, i1 true) - ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1,
[llvm-branch-commits] [llvm] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics (PR #102599)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/102599?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#102599** https://app.graphite.dev/github/pr/llvm/llvm-project/102599?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#102461** https://app.graphite.dev/github/pr/llvm/llvm-project/102461?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>: 1 other dependent PR ([#102462](https://github.com/llvm/llvm-project/pull/102462) https://app.graphite.dev/github/pr/llvm/llvm-project/102462?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>) * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/102599 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics (PR #102599)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes This will be needed to continue generating the raw instruction in the flat case. --- Full diff: https://github.com/llvm/llvm-project/pull/102599.diff 2 Files Affected: - (modified) llvm/lib/IR/AutoUpgrade.cpp (+12-1) - (modified) llvm/test/Bitcode/amdgcn-atomic.ll (+21-18) ``diff diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index ec719754183d5d..3f6ccbebd35ef2 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -34,9 +34,11 @@ #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" +#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Regex.h" @@ -4096,11 +4098,20 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, AtomicRMWInst *RMW = Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID); - if (PtrTy->getAddressSpace() != 3) { + unsigned AddrSpace = PtrTy->getAddressSpace(); + if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) { RMW->setMetadata("amdgpu.no.fine.grained.memory", MDNode::get(F->getContext(), {})); } + if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) { +MDBuilder MDB(F->getContext()); +MDNode *RangeNotPrivate = +MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS), +APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1)); +RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate); + } + if (IsVolatile) RMW->setVolatile(true); diff --git a/llvm/test/Bitcode/amdgcn-atomic.ll b/llvm/test/Bitcode/amdgcn-atomic.ll index a114c27bafd4a2..5feba38e635f32 100644 --- a/llvm/test/Bitcode/amdgcn-atomic.ll +++ b/llvm/test/Bitcode/amdgcn-atomic.ll @@ -2,10 +2,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 0, i32 0, i1 false) - ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 false) ; CHECK: atomicrmw uinc_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4{{$}} @@ -26,10 +26,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr } define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 0, i1 false) - ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 false) ; CHECK: atomicrmw udec_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4{{$}} @@ -51,49 +51,49 @@ define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr ; Test some invalid ordering handling define void @ordering(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 -1, i32 0, i1 true) - ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result1 = call i32 @llvm.amdgcn
[llvm-branch-commits] [clang] [llvm] clang/AMDGPU: Set noalias.addrspace metadata on atomicrmw (PR #102462)
@@ -647,6 +647,14 @@ class LangOptions : public LangOptionsBase { return ConvergentFunctions; } + /// Return true if atomicrmw operations targeting allocations in private + /// memory are undefined. + bool threadPrivateMemoryAtomicsAreUndefined() const { +// Should be false for OpenMP. +// TODO: Should this be true for SYCL? +return OpenCL || CUDA; gonzalobg wrote: > @gonzalobg -- Does NVIDIA define what happens if atomics are used on local > address space? I agree with @arsenm that this is a language property. In CUDA C++, just like in C++, the behavior of atomics to automatic variables is well-defined, e.g., this is ok: ``` __device__ void foo() { cuda::atomic<...> x(0); x.fetch_add(1); // OK } ``` When compiling to PTX, however, `atom` requires global or shared statespaces (or generic to those). That is, for local memory, LLVM must generate code that does not use `atom`. But that's a problem for the LLVM NVPTX backend to solve. https://github.com/llvm/llvm-project/pull/102462 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics (PR #102599)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/102599 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics (PR #102599)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/102599 >From 697de6b204b911ba4e34160328c174d9775c237c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 9 Aug 2024 14:51:41 +0400 Subject: [PATCH] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics This will be needed to continue generating the raw instruction in the flat case. --- llvm/lib/IR/AutoUpgrade.cpp| 13 +- llvm/test/Bitcode/amdgcn-atomic.ll | 39 -- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index ec719754183d5d..3f6ccbebd35ef2 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -34,9 +34,11 @@ #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" +#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Regex.h" @@ -4096,11 +4098,20 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, AtomicRMWInst *RMW = Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID); - if (PtrTy->getAddressSpace() != 3) { + unsigned AddrSpace = PtrTy->getAddressSpace(); + if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) { RMW->setMetadata("amdgpu.no.fine.grained.memory", MDNode::get(F->getContext(), {})); } + if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) { +MDBuilder MDB(F->getContext()); +MDNode *RangeNotPrivate = +MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS), +APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1)); +RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate); + } + if (IsVolatile) RMW->setVolatile(true); diff --git a/llvm/test/Bitcode/amdgcn-atomic.ll b/llvm/test/Bitcode/amdgcn-atomic.ll index a114c27bafd4a2..5feba38e635f32 100644 --- a/llvm/test/Bitcode/amdgcn-atomic.ll +++ b/llvm/test/Bitcode/amdgcn-atomic.ll @@ -2,10 +2,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 0, i32 0, i1 false) - ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 false) ; CHECK: atomicrmw uinc_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4{{$}} @@ -26,10 +26,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr } define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 0, i1 false) - ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1 %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 false) ; CHECK: atomicrmw udec_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4{{$}} @@ -51,49 +51,49 @@ define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr ; Test some invalid ordering handling define void @ordering(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) { - ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 + ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}} %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 -1, i32 0, i1 true) - ; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 +
[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)
https://github.com/skatrak created https://github.com/llvm/llvm-project/pull/102613 After decomposition of OpenMP compound constructs and assignment of applicable clauses to each leaf construct, composite constructs are then combined again into a single element in the construct queue. This helped later lowering stages easily identify composite constructs. However, as a result of the re-composition stage, the same list of clauses is used to produce all MLIR operations corresponding to each leaf of the original composite construct. This undoes existing logic introducing implicit clauses and deciding to which leaf construct(s) each clause applies. This patch removes construct re-composition logic and updates Flang lowering to be able to identify composite constructs from a list of leaf constructs. As a result, the right set of clauses is produced for each operation representing a leaf of a composite construct. >From aa0403a8a137295345b066cebaab9635e4b9886f Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Fri, 9 Aug 2024 12:58:27 +0100 Subject: [PATCH] [Flang][OpenMP] Prevent re-composition of composite constructs After decomposition of OpenMP compound constructs and assignment of applicable clauses to each leaf construct, composite constructs are then combined again into a single element in the construct queue. This helped later lowering stages easily identify composite constructs. However, as a result of the re-composition stage, the same list of clauses is used to produce all MLIR operations corresponding to each leaf of the original composite construct. This undoes existing logic introducing implicit clauses and deciding to which leaf construct(s) each clause applies. This patch removes construct re-composition logic and updates Flang lowering to be able to identify composite constructs from a list of leaf constructs. As a result, the right set of clauses is produced for each operation representing a leaf of a composite construct. --- flang/lib/Lower/OpenMP/Decomposer.cpp | 56 +-- flang/lib/Lower/OpenMP/Decomposer.h | 7 +- flang/lib/Lower/OpenMP/OpenMP.cpp | 93 +++- .../Lower/OpenMP/Todo/omp-do-simd-linear.f90 | 2 +- .../Lower/OpenMP/default-clause-byref.f90 | 4 +- flang/test/Lower/OpenMP/default-clause.f90| 4 +- .../Frontend/OpenMP/ConstructCompositionT.h | 425 -- 7 files changed, 103 insertions(+), 488 deletions(-) delete mode 100644 llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h diff --git a/flang/lib/Lower/OpenMP/Decomposer.cpp b/flang/lib/Lower/OpenMP/Decomposer.cpp index dfd85897469e28..5a7b1078bdd414 100644 --- a/flang/lib/Lower/OpenMP/Decomposer.cpp +++ b/flang/lib/Lower/OpenMP/Decomposer.cpp @@ -22,7 +22,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Frontend/OpenMP/ClauseT.h" -#include "llvm/Frontend/OpenMP/ConstructCompositionT.h" #include "llvm/Frontend/OpenMP/ConstructDecompositionT.h" #include "llvm/Frontend/OpenMP/OMP.h" #include "llvm/Support/raw_ostream.h" @@ -68,12 +67,6 @@ struct ConstructDecomposition { }; } // namespace -static UnitConstruct mergeConstructs(uint32_t version, - llvm::ArrayRef units) { - tomp::ConstructCompositionT compose(version, units); - return compose.merged; -} - namespace Fortran::lower::omp { LLVM_DUMP_METHOD llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const UnitConstruct &uc) { @@ -90,38 +83,33 @@ ConstructQueue buildConstructQueue( Fortran::lower::pft::Evaluation &eval, const parser::CharBlock &source, llvm::omp::Directive compound, const List &clauses) { - List constructs; - ConstructDecomposition decompose(modOp, semaCtx, eval, compound, clauses); assert(!decompose.output.empty() && "Construct decomposition failed"); - llvm::SmallVector loweringUnits; - std::ignore = - llvm::omp::getLeafOrCompositeConstructs(compound, loweringUnits); - uint32_t version = getOpenMPVersionAttribute(modOp); - - int leafIndex = 0; - for (llvm::omp::Directive dir_id : loweringUnits) { -llvm::ArrayRef leafsOrSelf = -llvm::omp::getLeafConstructsOrSelf(dir_id); -size_t numLeafs = leafsOrSelf.size(); - -llvm::ArrayRef toMerge{&decompose.output[leafIndex], - numLeafs}; -auto &uc = constructs.emplace_back(mergeConstructs(version, toMerge)); - -if (!transferLocations(clauses, uc.clauses)) { - // If some clauses are left without source information, use the - // directive's source. - for (auto &clause : uc.clauses) { -if (clause.source.empty()) - clause.source = source; - } -} -leafIndex += numLeafs; + for (UnitConstruct &uc : decompose.output) { +assert(getLeafConstructs(uc.id).empty() && "unexpected compound directive"); +// If some clauses are left without source information, use t
[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)
llvmbot wrote: @llvm/pr-subscribers-flang-openmp Author: Sergio Afonso (skatrak) Changes After decomposition of OpenMP compound constructs and assignment of applicable clauses to each leaf construct, composite constructs are then combined again into a single element in the construct queue. This helped later lowering stages easily identify composite constructs. However, as a result of the re-composition stage, the same list of clauses is used to produce all MLIR operations corresponding to each leaf of the original composite construct. This undoes existing logic introducing implicit clauses and deciding to which leaf construct(s) each clause applies. This patch removes construct re-composition logic and updates Flang lowering to be able to identify composite constructs from a list of leaf constructs. As a result, the right set of clauses is produced for each operation representing a leaf of a composite construct. --- Patch is 34.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/102613.diff 7 Files Affected: - (modified) flang/lib/Lower/OpenMP/Decomposer.cpp (+22-34) - (modified) flang/lib/Lower/OpenMP/Decomposer.h (+6-1) - (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+70-23) - (modified) flang/test/Lower/OpenMP/Todo/omp-do-simd-linear.f90 (+1-1) - (modified) flang/test/Lower/OpenMP/default-clause-byref.f90 (+2-2) - (modified) flang/test/Lower/OpenMP/default-clause.f90 (+2-2) - (removed) llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h (-425) ``diff diff --git a/flang/lib/Lower/OpenMP/Decomposer.cpp b/flang/lib/Lower/OpenMP/Decomposer.cpp index dfd85897469e28..5a7b1078bdd414 100644 --- a/flang/lib/Lower/OpenMP/Decomposer.cpp +++ b/flang/lib/Lower/OpenMP/Decomposer.cpp @@ -22,7 +22,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Frontend/OpenMP/ClauseT.h" -#include "llvm/Frontend/OpenMP/ConstructCompositionT.h" #include "llvm/Frontend/OpenMP/ConstructDecompositionT.h" #include "llvm/Frontend/OpenMP/OMP.h" #include "llvm/Support/raw_ostream.h" @@ -68,12 +67,6 @@ struct ConstructDecomposition { }; } // namespace -static UnitConstruct mergeConstructs(uint32_t version, - llvm::ArrayRef units) { - tomp::ConstructCompositionT compose(version, units); - return compose.merged; -} - namespace Fortran::lower::omp { LLVM_DUMP_METHOD llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const UnitConstruct &uc) { @@ -90,38 +83,33 @@ ConstructQueue buildConstructQueue( Fortran::lower::pft::Evaluation &eval, const parser::CharBlock &source, llvm::omp::Directive compound, const List &clauses) { - List constructs; - ConstructDecomposition decompose(modOp, semaCtx, eval, compound, clauses); assert(!decompose.output.empty() && "Construct decomposition failed"); - llvm::SmallVector loweringUnits; - std::ignore = - llvm::omp::getLeafOrCompositeConstructs(compound, loweringUnits); - uint32_t version = getOpenMPVersionAttribute(modOp); - - int leafIndex = 0; - for (llvm::omp::Directive dir_id : loweringUnits) { -llvm::ArrayRef leafsOrSelf = -llvm::omp::getLeafConstructsOrSelf(dir_id); -size_t numLeafs = leafsOrSelf.size(); - -llvm::ArrayRef toMerge{&decompose.output[leafIndex], - numLeafs}; -auto &uc = constructs.emplace_back(mergeConstructs(version, toMerge)); - -if (!transferLocations(clauses, uc.clauses)) { - // If some clauses are left without source information, use the - // directive's source. - for (auto &clause : uc.clauses) { -if (clause.source.empty()) - clause.source = source; - } -} -leafIndex += numLeafs; + for (UnitConstruct &uc : decompose.output) { +assert(getLeafConstructs(uc.id).empty() && "unexpected compound directive"); +// If some clauses are left without source information, use the directive's +// source. +for (auto &clause : uc.clauses) + if (clause.source.empty()) +clause.source = source; } - return constructs; + return decompose.output; +} + +bool matchLeafSequence(ConstructQueue::const_iterator item, + const ConstructQueue &queue, + llvm::ArrayRef directives) { + for (auto [dir, leaf] : + llvm::zip_longest(directives, llvm::make_range(item, queue.end( { +if (!dir || !leaf) + return false; + +if (dir.value() != leaf.value().id) + return false; + } + return true; } bool isLastItemInQueue(ConstructQueue::const_iterator item, diff --git a/flang/lib/Lower/OpenMP/Decomposer.h b/flang/lib/Lower/OpenMP/Decomposer.h index e85956ffe1a231..6c90e8540d459b 100644 --- a/flang/lib/Lower/OpenMP/Decomposer.h +++ b/flang/lib/Lower/OpenMP/Decomposer.h @@ -10,7 +10,6 @@ #include "Clauses.h" #include "mlir/IR/BuiltinOps.h" -#includ
[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)
https://github.com/skatrak edited https://github.com/llvm/llvm-project/pull/102613 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)
@@ -2263,24 +2321,13 @@ static void genOMPDispatch(lower::AbstractConverter &converter, // Composite constructs case llvm::omp::Directive::OMPD_distribute_parallel_do: -genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval, loc, - queue, item, *loopDsp); -break; case llvm::omp::Directive::OMPD_distribute_parallel_do_simd: -genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval, - loc, queue, item, *loopDsp); -break; case llvm::omp::Directive::OMPD_distribute_simd: -genCompositeDistributeSimd(converter, symTable, semaCtx, eval, loc, queue, - item, *loopDsp); -break; case llvm::omp::Directive::OMPD_do_simd: -genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item, - *loopDsp); -break; case llvm::omp::Directive::OMPD_taskloop_simd: -genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, loc, queue, - item, *loopDsp); +// Composite constructs should have been split into a sequence of leaf +// constructs and lowered by genOMPCompositeDispatch(). +llvm_unreachable("Unexpected composite construct."); break; default: kparzysz wrote: Maybe you could delete the specific cases with composite constructs, and add `assert(!isCompositeConstruct(dir))` to the default label. https://github.com/llvm/llvm-project/pull/102613 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)
@@ -2141,13 +2154,50 @@ static void genCompositeTaskloopSimd( semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { + assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); TODO(loc, "Composite TASKLOOP SIMD"); } //===--===// // Dispatch //===--===// +static bool genOMPCompositeDispatch( +lower::AbstractConverter &converter, lower::SymMap &symTable, +semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, +mlir::Location loc, const ConstructQueue &queue, +ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { + using llvm::omp::Directive; + using llvm::omp::getLeafConstructs, lower::omp::matchLeafSequence; + + if (matchLeafSequence( + item, queue, + getLeafConstructs(Directive::OMPD_distribute_parallel_do))) kparzysz wrote: Maybe `matchLeafSequence` could take the directive, and call `getLeafConstructs` itself. It would make these calls a bit tidier... https://github.com/llvm/llvm-project/pull/102613 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)
@@ -90,38 +83,33 @@ ConstructQueue buildConstructQueue( Fortran::lower::pft::Evaluation &eval, const parser::CharBlock &source, llvm::omp::Directive compound, const List &clauses) { - List constructs; - ConstructDecomposition decompose(modOp, semaCtx, eval, compound, clauses); assert(!decompose.output.empty() && "Construct decomposition failed"); - llvm::SmallVector loweringUnits; - std::ignore = - llvm::omp::getLeafOrCompositeConstructs(compound, loweringUnits); - uint32_t version = getOpenMPVersionAttribute(modOp); - - int leafIndex = 0; - for (llvm::omp::Directive dir_id : loweringUnits) { -llvm::ArrayRef leafsOrSelf = -llvm::omp::getLeafConstructsOrSelf(dir_id); -size_t numLeafs = leafsOrSelf.size(); - -llvm::ArrayRef toMerge{&decompose.output[leafIndex], - numLeafs}; -auto &uc = constructs.emplace_back(mergeConstructs(version, toMerge)); - -if (!transferLocations(clauses, uc.clauses)) { - // If some clauses are left without source information, use the - // directive's source. - for (auto &clause : uc.clauses) { -if (clause.source.empty()) - clause.source = source; - } -} -leafIndex += numLeafs; + for (UnitConstruct &uc : decompose.output) { +assert(getLeafConstructs(uc.id).empty() && "unexpected compound directive"); +// If some clauses are left without source information, use the directive's +// source. +for (auto &clause : uc.clauses) + if (clause.source.empty()) +clause.source = source; } - return constructs; + return decompose.output; +} + +bool matchLeafSequence(ConstructQueue::const_iterator item, + const ConstructQueue &queue, + llvm::ArrayRef directives) { + for (auto [dir, leaf] : + llvm::zip_longest(directives, llvm::make_range(item, queue.end( { +if (!dir || !leaf) kparzysz wrote: `0` is a legitimate directive id (`OMPD_allocate`). https://github.com/llvm/llvm-project/pull/102613 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)
https://github.com/skatrak updated https://github.com/llvm/llvm-project/pull/102613 >From aa0403a8a137295345b066cebaab9635e4b9886f Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Fri, 9 Aug 2024 12:58:27 +0100 Subject: [PATCH 1/2] [Flang][OpenMP] Prevent re-composition of composite constructs After decomposition of OpenMP compound constructs and assignment of applicable clauses to each leaf construct, composite constructs are then combined again into a single element in the construct queue. This helped later lowering stages easily identify composite constructs. However, as a result of the re-composition stage, the same list of clauses is used to produce all MLIR operations corresponding to each leaf of the original composite construct. This undoes existing logic introducing implicit clauses and deciding to which leaf construct(s) each clause applies. This patch removes construct re-composition logic and updates Flang lowering to be able to identify composite constructs from a list of leaf constructs. As a result, the right set of clauses is produced for each operation representing a leaf of a composite construct. --- flang/lib/Lower/OpenMP/Decomposer.cpp | 56 +-- flang/lib/Lower/OpenMP/Decomposer.h | 7 +- flang/lib/Lower/OpenMP/OpenMP.cpp | 93 +++- .../Lower/OpenMP/Todo/omp-do-simd-linear.f90 | 2 +- .../Lower/OpenMP/default-clause-byref.f90 | 4 +- flang/test/Lower/OpenMP/default-clause.f90| 4 +- .../Frontend/OpenMP/ConstructCompositionT.h | 425 -- 7 files changed, 103 insertions(+), 488 deletions(-) delete mode 100644 llvm/include/llvm/Frontend/OpenMP/ConstructCompositionT.h diff --git a/flang/lib/Lower/OpenMP/Decomposer.cpp b/flang/lib/Lower/OpenMP/Decomposer.cpp index dfd85897469e28..5a7b1078bdd414 100644 --- a/flang/lib/Lower/OpenMP/Decomposer.cpp +++ b/flang/lib/Lower/OpenMP/Decomposer.cpp @@ -22,7 +22,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Frontend/OpenMP/ClauseT.h" -#include "llvm/Frontend/OpenMP/ConstructCompositionT.h" #include "llvm/Frontend/OpenMP/ConstructDecompositionT.h" #include "llvm/Frontend/OpenMP/OMP.h" #include "llvm/Support/raw_ostream.h" @@ -68,12 +67,6 @@ struct ConstructDecomposition { }; } // namespace -static UnitConstruct mergeConstructs(uint32_t version, - llvm::ArrayRef units) { - tomp::ConstructCompositionT compose(version, units); - return compose.merged; -} - namespace Fortran::lower::omp { LLVM_DUMP_METHOD llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const UnitConstruct &uc) { @@ -90,38 +83,33 @@ ConstructQueue buildConstructQueue( Fortran::lower::pft::Evaluation &eval, const parser::CharBlock &source, llvm::omp::Directive compound, const List &clauses) { - List constructs; - ConstructDecomposition decompose(modOp, semaCtx, eval, compound, clauses); assert(!decompose.output.empty() && "Construct decomposition failed"); - llvm::SmallVector loweringUnits; - std::ignore = - llvm::omp::getLeafOrCompositeConstructs(compound, loweringUnits); - uint32_t version = getOpenMPVersionAttribute(modOp); - - int leafIndex = 0; - for (llvm::omp::Directive dir_id : loweringUnits) { -llvm::ArrayRef leafsOrSelf = -llvm::omp::getLeafConstructsOrSelf(dir_id); -size_t numLeafs = leafsOrSelf.size(); - -llvm::ArrayRef toMerge{&decompose.output[leafIndex], - numLeafs}; -auto &uc = constructs.emplace_back(mergeConstructs(version, toMerge)); - -if (!transferLocations(clauses, uc.clauses)) { - // If some clauses are left without source information, use the - // directive's source. - for (auto &clause : uc.clauses) { -if (clause.source.empty()) - clause.source = source; - } -} -leafIndex += numLeafs; + for (UnitConstruct &uc : decompose.output) { +assert(getLeafConstructs(uc.id).empty() && "unexpected compound directive"); +// If some clauses are left without source information, use the directive's +// source. +for (auto &clause : uc.clauses) + if (clause.source.empty()) +clause.source = source; } - return constructs; + return decompose.output; +} + +bool matchLeafSequence(ConstructQueue::const_iterator item, + const ConstructQueue &queue, + llvm::ArrayRef directives) { + for (auto [dir, leaf] : + llvm::zip_longest(directives, llvm::make_range(item, queue.end( { +if (!dir || !leaf) + return false; + +if (dir.value() != leaf.value().id) + return false; + } + return true; } bool isLastItemInQueue(ConstructQueue::const_iterator item, diff --git a/flang/lib/Lower/OpenMP/Decomposer.h b/flang/lib/Lower/OpenMP/Decomposer.h index e85956ffe1a231..6c90e8540d459b 100644 --- a/flang/lib/Lower/OpenMP/Decomposer.h +++
[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)
https://github.com/skatrak edited https://github.com/llvm/llvm-project/pull/102613 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)
@@ -2141,13 +2154,50 @@ static void genCompositeTaskloopSimd( semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { + assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); TODO(loc, "Composite TASKLOOP SIMD"); } //===--===// // Dispatch //===--===// +static bool genOMPCompositeDispatch( +lower::AbstractConverter &converter, lower::SymMap &symTable, +semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, +mlir::Location loc, const ConstructQueue &queue, +ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { + using llvm::omp::Directive; + using llvm::omp::getLeafConstructs, lower::omp::matchLeafSequence; + + if (matchLeafSequence( + item, queue, + getLeafConstructs(Directive::OMPD_distribute_parallel_do))) skatrak wrote: Done. https://github.com/llvm/llvm-project/pull/102613 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)
@@ -90,38 +83,33 @@ ConstructQueue buildConstructQueue( Fortran::lower::pft::Evaluation &eval, const parser::CharBlock &source, llvm::omp::Directive compound, const List &clauses) { - List constructs; - ConstructDecomposition decompose(modOp, semaCtx, eval, compound, clauses); assert(!decompose.output.empty() && "Construct decomposition failed"); - llvm::SmallVector loweringUnits; - std::ignore = - llvm::omp::getLeafOrCompositeConstructs(compound, loweringUnits); - uint32_t version = getOpenMPVersionAttribute(modOp); - - int leafIndex = 0; - for (llvm::omp::Directive dir_id : loweringUnits) { -llvm::ArrayRef leafsOrSelf = -llvm::omp::getLeafConstructsOrSelf(dir_id); -size_t numLeafs = leafsOrSelf.size(); - -llvm::ArrayRef toMerge{&decompose.output[leafIndex], - numLeafs}; -auto &uc = constructs.emplace_back(mergeConstructs(version, toMerge)); - -if (!transferLocations(clauses, uc.clauses)) { - // If some clauses are left without source information, use the - // directive's source. - for (auto &clause : uc.clauses) { -if (clause.source.empty()) - clause.source = source; - } -} -leafIndex += numLeafs; + for (UnitConstruct &uc : decompose.output) { +assert(getLeafConstructs(uc.id).empty() && "unexpected compound directive"); +// If some clauses are left without source information, use the directive's +// source. +for (auto &clause : uc.clauses) + if (clause.source.empty()) +clause.source = source; } - return constructs; + return decompose.output; +} + +bool matchLeafSequence(ConstructQueue::const_iterator item, + const ConstructQueue &queue, + llvm::ArrayRef directives) { + for (auto [dir, leaf] : + llvm::zip_longest(directives, llvm::make_range(item, queue.end( { +if (!dir || !leaf) skatrak wrote: `llvm::zip_longest` returns two `std::optional` (for when ranges are not the same size), which is what this is checking, not the actual integer value of the directive. I made some changes to avoid confusion. https://github.com/llvm/llvm-project/pull/102613 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)
https://github.com/skatrak commented: Thank you Krzysztof for your comments, they should be addressed now. https://github.com/llvm/llvm-project/pull/102613 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [llvm] [Flang][OpenMP] Prevent re-composition of composite constructs (PR #102613)
@@ -2263,24 +2321,13 @@ static void genOMPDispatch(lower::AbstractConverter &converter, // Composite constructs case llvm::omp::Directive::OMPD_distribute_parallel_do: -genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval, loc, - queue, item, *loopDsp); -break; case llvm::omp::Directive::OMPD_distribute_parallel_do_simd: -genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval, - loc, queue, item, *loopDsp); -break; case llvm::omp::Directive::OMPD_distribute_simd: -genCompositeDistributeSimd(converter, symTable, semaCtx, eval, loc, queue, - item, *loopDsp); -break; case llvm::omp::Directive::OMPD_do_simd: -genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item, - *loopDsp); -break; case llvm::omp::Directive::OMPD_taskloop_simd: -genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, loc, queue, - item, *loopDsp); +// Composite constructs should have been split into a sequence of leaf +// constructs and lowered by genOMPCompositeDispatch(). +llvm_unreachable("Unexpected composite construct."); break; default: skatrak wrote: Done. I added a bit more generic assert, since we don't expect combined constructs there either. https://github.com/llvm/llvm-project/pull/102613 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations. (#102105) (PR #102641)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/102641 Backport 0b745a10843fc85e579bbf459f78b3f43e7ab309 Requested by: @davemgreen >From f83753954d8f94dc7573e210e7fb2300803c0012 Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 9 Aug 2024 14:25:07 +0100 Subject: [PATCH] [AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations. (#102105) The code-generator is currently not able to handle scalable vectors of . The usual "fix" for this until it is supported is to mark the costs of loads/stores with an invalid cost, preventing the vectorizer from vectorizing at those factors. But on rare occasions loops do not contain load/stores, only reductions. So whilst this is still unsupported return an invalid cost to avoid selecting vscale x 1 VFs. The cost of a reduction is not currently used by the vectorizer so this adds the cost to the add/mul/and/or/xor or min/max that should feed the reduction. It includes reduction costs too, for completeness. This change will be removed when code-generation for these types is sufficiently reliable. Fixes #99760 (cherry picked from commit 0b745a10843fc85e579bbf459f78b3f43e7ab309) --- .../AArch64/AArch64TargetTransformInfo.cpp| 32 + .../CostModel/AArch64/arith-fp-sve.ll | 4 +++ .../Analysis/CostModel/AArch64/cttz_elts.ll | 2 ++ .../Analysis/CostModel/AArch64/sve-arith.ll | 21 +++ .../CostModel/AArch64/sve-intrinsics.ll | 36 +++ .../Analysis/CostModel/AArch64/sve-min-max.ll | 12 +++ 6 files changed, 107 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 45148449dfb821..6a3efd587ac3f3 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -540,7 +540,15 @@ static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) { InstructionCost AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. auto *RetTy = ICA.getReturnType(); + if (auto *VTy = dyn_cast(RetTy)) +if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + switch (ICA.getID()) { case Intrinsic::experimental_vector_histogram_add: if (!ST->hasSVE2()) @@ -3018,6 +3026,14 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( ArrayRef Args, const Instruction *CxtI) { + // The code-generator is currently not able to handle scalable vectors + // of yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast(Ty)) +if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + // TODO: Handle more cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, @@ -3792,6 +3808,14 @@ InstructionCost AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast(Ty)) +if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + std::pair LT = getTypeLegalizationCost(Ty); if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16()) @@ -3836,6 +3860,14 @@ InstructionCost AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional FMF, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast(ValTy)) +if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + if (TTI::requiresOrderedReduction(FMF)) { if (auto *FixedVTy = dyn_cast(ValTy)) { InstructionCost BaseCost = diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll b/llvm/test/Analysis/CostModel/AArch64/arith
[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations. (#102105) (PR #102641)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/102641 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations. (#102105) (PR #102641)
llvmbot wrote: @pawosm-arm What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/102641 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations. (#102105) (PR #102641)
llvmbot wrote: @llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-analysis Author: None (llvmbot) Changes Backport 0b745a10843fc85e579bbf459f78b3f43e7ab309 Requested by: @davemgreen --- Patch is 36.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/102641.diff 6 Files Affected: - (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+32) - (modified) llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll (+4) - (modified) llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll (+2) - (modified) llvm/test/Analysis/CostModel/AArch64/sve-arith.ll (+21) - (modified) llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll (+36) - (modified) llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll (+12) ``diff diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 45148449dfb821..6a3efd587ac3f3 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -540,7 +540,15 @@ static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) { InstructionCost AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. auto *RetTy = ICA.getReturnType(); + if (auto *VTy = dyn_cast(RetTy)) +if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + switch (ICA.getID()) { case Intrinsic::experimental_vector_histogram_add: if (!ST->hasSVE2()) @@ -3018,6 +3026,14 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( ArrayRef Args, const Instruction *CxtI) { + // The code-generator is currently not able to handle scalable vectors + // of yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast(Ty)) +if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + // TODO: Handle more cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, @@ -3792,6 +3808,14 @@ InstructionCost AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast(Ty)) +if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + std::pair LT = getTypeLegalizationCost(Ty); if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16()) @@ -3836,6 +3860,14 @@ InstructionCost AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional FMF, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast(ValTy)) +if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + if (TTI::requiresOrderedReduction(FMF)) { if (auto *FixedVTy = dyn_cast(ValTy)) { InstructionCost BaseCost = diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll index 18a1c31c03f748..770d3087b07522 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll @@ -8,6 +8,7 @@ define void @fadd() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fadd undef, undef +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1F32 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fadd undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd undef, un
[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations. (#102105) (PR #102641)
https://github.com/pawosm-arm approved this pull request. https://github.com/llvm/llvm-project/pull/102641 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/102645 This was much more difficult than I anticipated. The pass is not in a good state, with poor test coverage. The legacy PM does seem to be relying on maintaining the map state between different SCCs, which seems bad. The pass is going out of its way to avoid putting the attributes it introduces onto non-callee functions. If it just added them, we could use them directly instead of relying on the map, I would think. The NewPM path uses a ModulePass; I'm not sure if we should be using CGSCC here but there seems to be some missing infrastructure to support backend defined ones. >From 353885ddcab1182b25915766b9235ba411386359 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 9 Aug 2024 17:27:53 +0400 Subject: [PATCH] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager This was much more difficult than I anticipated. The pass is not in a good state, with poor test coverage. The legacy PM does seem to be relying on maintaining the map state between different SCCs, which seems bad. The pass is going out of its way to avoid putting the attributes it introduces onto non-callee functions. If it just added them, we could use them directly instead of relying on the map, I would think. The NewPM path uses a ModulePass; I'm not sure if we should be using CGSCC here but there seems to be some missing infrastructure to support backend defined ones. --- llvm/lib/Target/AMDGPU/AMDGPU.h | 4 +- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 + .../Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp | 113 ++ .../Target/AMDGPU/AMDGPUPerfHintAnalysis.h| 62 ++ .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 +- llvm/test/CodeGen/AMDGPU/perfhint.ll | 1 + 7 files changed, 136 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 195e2a19214e80..5b8d37a8ae7944 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -209,8 +209,8 @@ extern char &SIPreAllocateWWMRegsID; void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &); extern char &AMDGPUImageIntrinsicOptimizerID; -void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); -extern char &AMDGPUPerfHintAnalysisID; +void initializeAMDGPUPerfHintAnalysisLegacyPass(PassRegistry &); +extern char &AMDGPUPerfHintAnalysisLegacyID; void initializeGCNRegPressurePrinterPass(PassRegistry &); extern char &GCNRegPressurePrinterID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 8579774f522309..bbb4573655ab79 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -102,7 +102,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) -INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis) +INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysisLegacy) INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) #ifdef EXPENSIVE_CHECKS INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index b6a6c33d85f83c..23fb1dba7b084c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -22,6 +22,7 @@ MODULE_PASS("amdgpu-lower-buffer-fat-pointers", AMDGPULowerBufferFatPointersPass(*this)) MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass()) MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this)) +MODULE_PASS("amdgpu-perf-hint", AMDGPUPerfHintAnalysisPass(*static_cast(this))) MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass()) MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass()) #undef MODULE_PASS diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp index 1213d5e0b41db1..5797f02cb374e0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp @@ -12,12 +12,15 @@ /// //===--===// -#include "AMDGPU.h" #include "AMDGPUPerfHintAnalysis.h" +#include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -54,12 +57,6 @@ static
[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/102645?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#102645** https://app.graphite.dev/github/pr/llvm/llvm-project/102645?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#102644** https://app.graphite.dev/github/pr/llvm/llvm-project/102644?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/102645 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes This was much more difficult than I anticipated. The pass is not in a good state, with poor test coverage. The legacy PM does seem to be relying on maintaining the map state between different SCCs, which seems bad. The pass is going out of its way to avoid putting the attributes it introduces onto non-callee functions. If it just added them, we could use them directly instead of relying on the map, I would think. The NewPM path uses a ModulePass; I'm not sure if we should be using CGSCC here but there seems to be some missing infrastructure to support backend defined ones. --- Full diff: https://github.com/llvm/llvm-project/pull/102645.diff 7 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+2-2) - (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (+1-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp (+89-24) - (modified) llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h (+40-22) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+2-1) - (modified) llvm/test/CodeGen/AMDGPU/perfhint.ll (+1) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 195e2a19214e8..5b8d37a8ae794 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -209,8 +209,8 @@ extern char &SIPreAllocateWWMRegsID; void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &); extern char &AMDGPUImageIntrinsicOptimizerID; -void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); -extern char &AMDGPUPerfHintAnalysisID; +void initializeAMDGPUPerfHintAnalysisLegacyPass(PassRegistry &); +extern char &AMDGPUPerfHintAnalysisLegacyID; void initializeGCNRegPressurePrinterPass(PassRegistry &); extern char &GCNRegPressurePrinterID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 8579774f52230..bbb4573655ab7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -102,7 +102,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) -INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis) +INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysisLegacy) INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) #ifdef EXPENSIVE_CHECKS INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index b6a6c33d85f83..23fb1dba7b084 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -22,6 +22,7 @@ MODULE_PASS("amdgpu-lower-buffer-fat-pointers", AMDGPULowerBufferFatPointersPass(*this)) MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass()) MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this)) +MODULE_PASS("amdgpu-perf-hint", AMDGPUPerfHintAnalysisPass(*static_cast(this))) MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass()) MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass()) #undef MODULE_PASS diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp index 1213d5e0b41db..5797f02cb374e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp @@ -12,12 +12,15 @@ /// //===--===// -#include "AMDGPU.h" #include "AMDGPUPerfHintAnalysis.h" +#include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -54,12 +57,6 @@ static cl::opt STATISTIC(NumMemBound, "Number of functions marked as memory bound"); STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave"); -char llvm::AMDGPUPerfHintAnalysis::ID = 0; -char &llvm::AMDGPUPerfHintAnalysisID = AMDGPUPerfHintAnalysis::ID; - -INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE, -"Analysis if a function is memory bound", true, true) - namespace { struct AMDGPUPerfHint { @@ -67,7 +64,7 @@ struct AMDGPUPerfHint { public: AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_, - const TargetLowering *TLI_) + const SITargetLowering *TLI_) : FIM(FIM_), TLI(TLI_) {} bool runOnFu
[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/102645 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [Arm][AArch64][Clang] Respect function's branch protection attributes (#101978) (PR #102646)
https://github.com/RSilicon created https://github.com/llvm/llvm-project/pull/102646 Default attributes assigned to all functions according to the command line parameters. Some functions might have their own attributes and we need to set or remove attributes accordingly. Tests are updated to test this scenarios too. (cherry picked from commit 9e9fa00dcb9522db3f78d921eda6a18b9ee568bb) >From 62e4bbbaef6d003a058681cde53e909b365c21db Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Fri, 9 Aug 2024 17:51:38 +0200 Subject: [PATCH] [Arm][AArch64][Clang] Respect function's branch protection attributes. (#101978) Default attributes assigned to all functions according to the command line parameters. Some functions might have their own attributes and we need to set or remove attributes accordingly. Tests are updated to test this scenarios too. (cherry picked from commit 9e9fa00dcb9522db3f78d921eda6a18b9ee568bb) --- clang/lib/CodeGen/CGCall.cpp | 2 +- clang/lib/CodeGen/TargetInfo.cpp | 32 --- clang/lib/CodeGen/TargetInfo.h| 7 ++-- .../CodeGen/aarch64-branch-protection-attr.c | 13 +++- .../CodeGen/arm-branch-protection-attr-1.c| 6 5 files changed, 52 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 234a9c16e39dfd..6e69e84a2344c1 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2032,7 +2032,7 @@ static void getTrivialDefaultFunctionAttributes( } TargetInfo::BranchProtectionInfo BPI(LangOpts); - TargetCodeGenInfo::setBranchProtectionFnAttributes(BPI, FuncAttrs); + TargetCodeGenInfo::initBranchProtectionFnAttributes(BPI, FuncAttrs); } /// Merges `target-features` from \TargetOpts and \F, and sets the result in diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 38faa50cf19cf2..64a9a5554caf72 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -209,13 +209,37 @@ llvm::Value *TargetCodeGenInfo::createEnqueuedBlockKernel( void TargetCodeGenInfo::setBranchProtectionFnAttributes( const TargetInfo::BranchProtectionInfo &BPI, llvm::Function &F) { - llvm::AttrBuilder FuncAttrs(F.getContext()); - setBranchProtectionFnAttributes(BPI, FuncAttrs); - F.addFnAttrs(FuncAttrs); + // Called on already created and initialized function where attributes already + // set from command line attributes but some might need to be removed as the + // actual BPI is different. + if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) { +F.addFnAttr("sign-return-address", BPI.getSignReturnAddrStr()); +F.addFnAttr("sign-return-address-key", BPI.getSignKeyStr()); + } else { +if (F.hasFnAttribute("sign-return-address")) + F.removeFnAttr("sign-return-address"); +if (F.hasFnAttribute("sign-return-address-key")) + F.removeFnAttr("sign-return-address-key"); + } + + auto AddRemoveAttributeAsSet = [&](bool Set, const StringRef &ModAttr) { +if (Set) + F.addFnAttr(ModAttr); +else if (F.hasFnAttribute(ModAttr)) + F.removeFnAttr(ModAttr); + }; + + AddRemoveAttributeAsSet(BPI.BranchTargetEnforcement, + "branch-target-enforcement"); + AddRemoveAttributeAsSet(BPI.BranchProtectionPAuthLR, + "branch-protection-pauth-lr"); + AddRemoveAttributeAsSet(BPI.GuardedControlStack, "guarded-control-stack"); } -void TargetCodeGenInfo::setBranchProtectionFnAttributes( +void TargetCodeGenInfo::initBranchProtectionFnAttributes( const TargetInfo::BranchProtectionInfo &BPI, llvm::AttrBuilder &FuncAttrs) { + // Only used for initializing attributes in the AttrBuilder, which will not + // contain any of these attributes so no need to remove anything. if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) { FuncAttrs.addAttribute("sign-return-address", BPI.getSignReturnAddrStr()); FuncAttrs.addAttribute("sign-return-address-key", BPI.getSignKeyStr()); diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index 2f2138582ba1e3..156b4ff4353bee 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -414,13 +414,16 @@ class TargetCodeGenInfo { return nullptr; } + // Set the Branch Protection Attributes of the Function accordingly to the + // BPI. Remove attributes that contradict with current BPI. static void setBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI, llvm::Function &F); + // Add the Branch Protection Attributes of the FuncAttrs. static void - setBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI, - llvm::AttrBuilder &FuncAttrs); + initBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI, + llvm::Att
[llvm-branch-commits] [clang] release/19.x: [Arm][AArch64][Clang] Respect function's branch protection attributes. (#101978) (PR #102646)
https://github.com/RSilicon edited https://github.com/llvm/llvm-project/pull/102646 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [Arm][AArch64][Clang] Respect function's branch protection attributes. (#101978) (PR #102646)
llvmbot wrote: @llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: Rose Silicon (RSilicon) Changes Default attributes assigned to all functions according to the command line parameters. Some functions might have their own attributes and we need to set or remove attributes accordingly. Tests are updated to test this scenarios too. (cherry picked from commit 9e9fa00dcb9522db3f78d921eda6a18b9ee568bb) --- Full diff: https://github.com/llvm/llvm-project/pull/102646.diff 5 Files Affected: - (modified) clang/lib/CodeGen/CGCall.cpp (+1-1) - (modified) clang/lib/CodeGen/TargetInfo.cpp (+28-4) - (modified) clang/lib/CodeGen/TargetInfo.h (+5-2) - (modified) clang/test/CodeGen/aarch64-branch-protection-attr.c (+12-1) - (modified) clang/test/CodeGen/arm-branch-protection-attr-1.c (+6) ``diff diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 234a9c16e39dfd..6e69e84a2344c1 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2032,7 +2032,7 @@ static void getTrivialDefaultFunctionAttributes( } TargetInfo::BranchProtectionInfo BPI(LangOpts); - TargetCodeGenInfo::setBranchProtectionFnAttributes(BPI, FuncAttrs); + TargetCodeGenInfo::initBranchProtectionFnAttributes(BPI, FuncAttrs); } /// Merges `target-features` from \TargetOpts and \F, and sets the result in diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 38faa50cf19cf2..64a9a5554caf72 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -209,13 +209,37 @@ llvm::Value *TargetCodeGenInfo::createEnqueuedBlockKernel( void TargetCodeGenInfo::setBranchProtectionFnAttributes( const TargetInfo::BranchProtectionInfo &BPI, llvm::Function &F) { - llvm::AttrBuilder FuncAttrs(F.getContext()); - setBranchProtectionFnAttributes(BPI, FuncAttrs); - F.addFnAttrs(FuncAttrs); + // Called on already created and initialized function where attributes already + // set from command line attributes but some might need to be removed as the + // actual BPI is different. + if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) { +F.addFnAttr("sign-return-address", BPI.getSignReturnAddrStr()); +F.addFnAttr("sign-return-address-key", BPI.getSignKeyStr()); + } else { +if (F.hasFnAttribute("sign-return-address")) + F.removeFnAttr("sign-return-address"); +if (F.hasFnAttribute("sign-return-address-key")) + F.removeFnAttr("sign-return-address-key"); + } + + auto AddRemoveAttributeAsSet = [&](bool Set, const StringRef &ModAttr) { +if (Set) + F.addFnAttr(ModAttr); +else if (F.hasFnAttribute(ModAttr)) + F.removeFnAttr(ModAttr); + }; + + AddRemoveAttributeAsSet(BPI.BranchTargetEnforcement, + "branch-target-enforcement"); + AddRemoveAttributeAsSet(BPI.BranchProtectionPAuthLR, + "branch-protection-pauth-lr"); + AddRemoveAttributeAsSet(BPI.GuardedControlStack, "guarded-control-stack"); } -void TargetCodeGenInfo::setBranchProtectionFnAttributes( +void TargetCodeGenInfo::initBranchProtectionFnAttributes( const TargetInfo::BranchProtectionInfo &BPI, llvm::AttrBuilder &FuncAttrs) { + // Only used for initializing attributes in the AttrBuilder, which will not + // contain any of these attributes so no need to remove anything. if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) { FuncAttrs.addAttribute("sign-return-address", BPI.getSignReturnAddrStr()); FuncAttrs.addAttribute("sign-return-address-key", BPI.getSignKeyStr()); diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index 2f2138582ba1e3..156b4ff4353bee 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -414,13 +414,16 @@ class TargetCodeGenInfo { return nullptr; } + // Set the Branch Protection Attributes of the Function accordingly to the + // BPI. Remove attributes that contradict with current BPI. static void setBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI, llvm::Function &F); + // Add the Branch Protection Attributes of the FuncAttrs. static void - setBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI, - llvm::AttrBuilder &FuncAttrs); + initBranchProtectionFnAttributes(const TargetInfo::BranchProtectionInfo &BPI, + llvm::AttrBuilder &FuncAttrs); protected: static std::string qualifyWindowsLibrary(StringRef Lib); diff --git a/clang/test/CodeGen/aarch64-branch-protection-attr.c b/clang/test/CodeGen/aarch64-branch-protection-attr.c index e7ae7fb1570c95..c66bce1bee6d36 100644 --- a/clang/test/CodeGen/aarch64-branch-protection-attr.c +++ b/clang/test/CodeGen/aarch64-branch-protection-attr.c @@ -1,6 +1,18 @@ // REQUIRES: aarch64-registered-target // RUN: %c
[llvm-branch-commits] [clang] release/19.x: [Arm][AArch64][Clang] Respect function's branch protection attributes. (#101978) (PR #102646)
https://github.com/DanielKristofKiss approved this pull request. @RSilicon Thanks for the cherry-pick, LGTM. Did it locally too and check-clang passes. https://github.com/llvm/llvm-project/pull/102646 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPUAnnotateUniformValues to new pass manager (PR #102654)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/102654 None >From 25b920ae924a5bb9bbd5900296359f2df08becd3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 9 Aug 2024 21:33:55 +0400 Subject: [PATCH] AMDGPU/NewPM: Port AMDGPUAnnotateUniformValues to new pass manager --- llvm/lib/Target/AMDGPU/AMDGPU.h | 13 ++- .../AMDGPU/AMDGPUAnnotateUniformValues.cpp| 105 +++--- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 4 +- .../test/CodeGen/AMDGPU/annotate-noclobber.ll | 3 +- 5 files changed, 81 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 6b5754dad1770f..99b04d6e4cfc3f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -293,7 +293,14 @@ class AMDGPUAttributorPass : public PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; -FunctionPass *createAMDGPUAnnotateUniformValues(); +class AMDGPUAnnotateUniformValuesPass +: public PassInfoMixin { +public: + AMDGPUAnnotateUniformValuesPass() {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +FunctionPass *createAMDGPUAnnotateUniformValuesLegacy(); ModulePass *createAMDGPUPrintfRuntimeBinding(); void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); @@ -321,8 +328,8 @@ extern char &SIOptimizeExecMaskingPreRAID; void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &); extern char &SIOptimizeVGPRLiveRangeID; -void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); -extern char &AMDGPUAnnotateUniformValuesPassID; +void initializeAMDGPUAnnotateUniformValuesLegacyPass(PassRegistry &); +extern char &AMDGPUAnnotateUniformValuesLegacyPassID; void initializeAMDGPUCodeGenPreparePass(PassRegistry&); extern char &AMDGPUCodeGenPrepareID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index 6a409f0dcbe774..fa66d53bd7c64b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -27,8 +27,8 @@ using namespace llvm; namespace { -class AMDGPUAnnotateUniformValues : public FunctionPass, - public InstVisitor { +class AMDGPUAnnotateUniformValues +: public InstVisitor { UniformityInfo *UA; MemorySSA *MSSA; AliasAnalysis *AA; @@ -46,37 +46,19 @@ class AMDGPUAnnotateUniformValues : public FunctionPass, } public: - static char ID; - AMDGPUAnnotateUniformValues() : -FunctionPass(ID) { } - bool doInitialization(Module &M) override; - bool runOnFunction(Function &F) override; - StringRef getPassName() const override { -return "AMDGPU Annotate Uniform Values"; - } - void getAnalysisUsage(AnalysisUsage &AU) const override { -AU.addRequired(); -AU.addRequired(); -AU.addRequired(); -AU.setPreservesAll(); - } + AMDGPUAnnotateUniformValues(UniformityInfo &UA, MemorySSA &MSSA, + AliasAnalysis &AA, const Function &F) + : UA(&UA), MSSA(&MSSA), AA(&AA), +isEntryFunc(AMDGPU::isEntryFunctionCC(F.getCallingConv())) {} void visitBranchInst(BranchInst &I); void visitLoadInst(LoadInst &I); + + bool changed() const { return Changed; } }; } // End anonymous namespace -INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE, - "Add AMDGPU uniform metadata", false, false) -INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE, -"Add AMDGPU uniform metadata", false, false) - -char AMDGPUAnnotateUniformValues::ID = 0; - void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) { if (UA->isUniform(&I)) setUniformMetadata(&I); @@ -100,25 +82,70 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { setNoClobberMetadata(&I); } -bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) { - return false; +PreservedAnalyses +AMDGPUAnnotateUniformValuesPass::run(Function &F, + FunctionAnalysisManager &FAM) { + UniformityInfo &UI = FAM.getResult(F); + MemorySSA &MSSA = FAM.getResult(F).getMSSA(); + AAResults &AA = FAM.getResult(F); + + AMDGPUAnnotateUniformValues Impl(UI, MSSA, AA, F); + Impl.visit(F); + + PreservedAnalyses PA = PreservedAnalyses::none(); + if (!Impl.changed()) +return PA; + + // TODO: Should preserve nearly everything + PA.preserveSet(); + return PA; } -bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) { +class AMDGPUAnnotateUniformValuesLegacy : public FunctionPass { +public: + static char ID; + + AMDGPUAnnotateUniformValuesLegacy() : FunctionPass(ID) {} + + bool doInitialization(Modul
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPUAnnotateUniformValues to new pass manager (PR #102654)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/102654?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#102654** https://app.graphite.dev/github/pr/llvm/llvm-project/102654?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#102653** https://app.graphite.dev/github/pr/llvm/llvm-project/102653?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/102654 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPUAnnotateUniformValues to new pass manager (PR #102654)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/102654 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPUAnnotateUniformValues to new pass manager (PR #102654)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/102654.diff 5 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+10-3) - (modified) llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp (+66-39) - (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/annotate-noclobber.ll (+2-1) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 6b5754dad1770..99b04d6e4cfc3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -293,7 +293,14 @@ class AMDGPUAttributorPass : public PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; -FunctionPass *createAMDGPUAnnotateUniformValues(); +class AMDGPUAnnotateUniformValuesPass +: public PassInfoMixin { +public: + AMDGPUAnnotateUniformValuesPass() {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +FunctionPass *createAMDGPUAnnotateUniformValuesLegacy(); ModulePass *createAMDGPUPrintfRuntimeBinding(); void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); @@ -321,8 +328,8 @@ extern char &SIOptimizeExecMaskingPreRAID; void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &); extern char &SIOptimizeVGPRLiveRangeID; -void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); -extern char &AMDGPUAnnotateUniformValuesPassID; +void initializeAMDGPUAnnotateUniformValuesLegacyPass(PassRegistry &); +extern char &AMDGPUAnnotateUniformValuesLegacyPassID; void initializeAMDGPUCodeGenPreparePass(PassRegistry&); extern char &AMDGPUCodeGenPrepareID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index 6a409f0dcbe77..fa66d53bd7c64 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -27,8 +27,8 @@ using namespace llvm; namespace { -class AMDGPUAnnotateUniformValues : public FunctionPass, - public InstVisitor { +class AMDGPUAnnotateUniformValues +: public InstVisitor { UniformityInfo *UA; MemorySSA *MSSA; AliasAnalysis *AA; @@ -46,37 +46,19 @@ class AMDGPUAnnotateUniformValues : public FunctionPass, } public: - static char ID; - AMDGPUAnnotateUniformValues() : -FunctionPass(ID) { } - bool doInitialization(Module &M) override; - bool runOnFunction(Function &F) override; - StringRef getPassName() const override { -return "AMDGPU Annotate Uniform Values"; - } - void getAnalysisUsage(AnalysisUsage &AU) const override { -AU.addRequired(); -AU.addRequired(); -AU.addRequired(); -AU.setPreservesAll(); - } + AMDGPUAnnotateUniformValues(UniformityInfo &UA, MemorySSA &MSSA, + AliasAnalysis &AA, const Function &F) + : UA(&UA), MSSA(&MSSA), AA(&AA), +isEntryFunc(AMDGPU::isEntryFunctionCC(F.getCallingConv())) {} void visitBranchInst(BranchInst &I); void visitLoadInst(LoadInst &I); + + bool changed() const { return Changed; } }; } // End anonymous namespace -INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE, - "Add AMDGPU uniform metadata", false, false) -INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE, -"Add AMDGPU uniform metadata", false, false) - -char AMDGPUAnnotateUniformValues::ID = 0; - void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) { if (UA->isUniform(&I)) setUniformMetadata(&I); @@ -100,25 +82,70 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { setNoClobberMetadata(&I); } -bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) { - return false; +PreservedAnalyses +AMDGPUAnnotateUniformValuesPass::run(Function &F, + FunctionAnalysisManager &FAM) { + UniformityInfo &UI = FAM.getResult(F); + MemorySSA &MSSA = FAM.getResult(F).getMSSA(); + AAResults &AA = FAM.getResult(F); + + AMDGPUAnnotateUniformValues Impl(UI, MSSA, AA, F); + Impl.visit(F); + + PreservedAnalyses PA = PreservedAnalyses::none(); + if (!Impl.changed()) +return PA; + + // TODO: Should preserve nearly everything + PA.preserveSet(); + return PA; } -bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) { +class AMDGPUAnnotateUniformValuesLegacy : public FunctionPass { +public: + static char ID; + + AMDGPUAnnotateUniformValuesLegacy() : FunctionPass(ID) {} + + bool doInitialization(Module &M) override { return false; } + + bool runOnFunction(Function &F) override; + StringRef getPassName() const override {
[llvm-branch-commits] [BOLT] Set RawBranchCount in DataAggregator (PR #101093)
https://github.com/aaupov ready_for_review https://github.com/llvm/llvm-project/pull/101093 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [MC][NFC] Store MCPseudoProbeFuncDesc::FuncName as StringRef (PR #100655)
https://github.com/aaupov closed https://github.com/llvm/llvm-project/pull/100655 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/101094 >From f598510001859a29f6f1ff6362fb9950ab6340cd Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Mon, 29 Jul 2024 16:14:08 -0700 Subject: [PATCH 1/2] Update test to check the option with llvm-bolt with fdata, YAML, and pre-aggregated profile Created using spr 1.3.4 --- bolt/test/X86/pre-aggregated-perf.test | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test index fc6f332d53dfb8..0f5137309e85d1 100644 --- a/bolt/test/X86/pre-aggregated-perf.test +++ b/bolt/test/X86/pre-aggregated-perf.test @@ -15,9 +15,15 @@ RUN: --show-density --profile-density-threshold=9 \ RUN: --profile-density-cutoff-hot=97 \ RUN: --profile-use-dfs | FileCheck %s -RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s -RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s -RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null | FileCheck %s +RUN: llvm-bolt %t.exe -data %t -o %t.null \ +RUN: --show-density --profile-density-threshold=9 \ +RUN: --profile-density-cutoff-hot=97 | FileCheck %s +RUN: llvm-bolt %t.exe -data %t.new -o %t.null \ +RUN: --show-density --profile-density-threshold=9 \ +RUN: --profile-density-cutoff-hot=97 | FileCheck %s +RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null \ +RUN: --show-density --profile-density-threshold=9 \ +RUN: --profile-density-cutoff-hot=97 | FileCheck %s CHECK: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile CHECK: BOLT-INFO: Functions with density >= 9.4 account for 97.00% total sample counts. >From e91907e57b39c8c79eb58b4d28d78fa253b130cb Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Mon, 29 Jul 2024 20:09:08 -0700 Subject: [PATCH 2/2] show-density init(true) Created using spr 1.3.4 --- bolt/lib/Passes/BinaryPasses.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index 23009bf74e0773..83fd6b2562eca8 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -224,7 +224,7 @@ static cl::opt TopCalledLimit( cl::init(100), cl::Hidden, cl::cat(BoltCategory)); // Profile density options, synced with llvm-profgen/ProfileGenerator.cpp -static cl::opt ShowDensity("show-density", cl::init(false), +static cl::opt ShowDensity("show-density", cl::init(true), cl::desc("show profile density details"), cl::Optional); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/101094 >From f598510001859a29f6f1ff6362fb9950ab6340cd Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Mon, 29 Jul 2024 16:14:08 -0700 Subject: [PATCH 1/2] Update test to check the option with llvm-bolt with fdata, YAML, and pre-aggregated profile Created using spr 1.3.4 --- bolt/test/X86/pre-aggregated-perf.test | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test index fc6f332d53dfb8..0f5137309e85d1 100644 --- a/bolt/test/X86/pre-aggregated-perf.test +++ b/bolt/test/X86/pre-aggregated-perf.test @@ -15,9 +15,15 @@ RUN: --show-density --profile-density-threshold=9 \ RUN: --profile-density-cutoff-hot=97 \ RUN: --profile-use-dfs | FileCheck %s -RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s -RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s -RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null | FileCheck %s +RUN: llvm-bolt %t.exe -data %t -o %t.null \ +RUN: --show-density --profile-density-threshold=9 \ +RUN: --profile-density-cutoff-hot=97 | FileCheck %s +RUN: llvm-bolt %t.exe -data %t.new -o %t.null \ +RUN: --show-density --profile-density-threshold=9 \ +RUN: --profile-density-cutoff-hot=97 | FileCheck %s +RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null \ +RUN: --show-density --profile-density-threshold=9 \ +RUN: --profile-density-cutoff-hot=97 | FileCheck %s CHECK: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile CHECK: BOLT-INFO: Functions with density >= 9.4 account for 97.00% total sample counts. >From e91907e57b39c8c79eb58b4d28d78fa253b130cb Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Mon, 29 Jul 2024 20:09:08 -0700 Subject: [PATCH 2/2] show-density init(true) Created using spr 1.3.4 --- bolt/lib/Passes/BinaryPasses.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index 23009bf74e0773..83fd6b2562eca8 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -224,7 +224,7 @@ static cl::opt TopCalledLimit( cl::init(100), cl::Hidden, cl::cat(BoltCategory)); // Profile density options, synced with llvm-profgen/ProfileGenerator.cpp -static cl::opt ShowDensity("show-density", cl::init(false), +static cl::opt ShowDensity("show-density", cl::init(true), cl::desc("show profile density details"), cl::Optional); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/101094 >From f598510001859a29f6f1ff6362fb9950ab6340cd Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Mon, 29 Jul 2024 16:14:08 -0700 Subject: [PATCH 1/3] Update test to check the option with llvm-bolt with fdata, YAML, and pre-aggregated profile Created using spr 1.3.4 --- bolt/test/X86/pre-aggregated-perf.test | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test index fc6f332d53dfb8..0f5137309e85d1 100644 --- a/bolt/test/X86/pre-aggregated-perf.test +++ b/bolt/test/X86/pre-aggregated-perf.test @@ -15,9 +15,15 @@ RUN: --show-density --profile-density-threshold=9 \ RUN: --profile-density-cutoff-hot=97 \ RUN: --profile-use-dfs | FileCheck %s -RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s -RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s -RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null | FileCheck %s +RUN: llvm-bolt %t.exe -data %t -o %t.null \ +RUN: --show-density --profile-density-threshold=9 \ +RUN: --profile-density-cutoff-hot=97 | FileCheck %s +RUN: llvm-bolt %t.exe -data %t.new -o %t.null \ +RUN: --show-density --profile-density-threshold=9 \ +RUN: --profile-density-cutoff-hot=97 | FileCheck %s +RUN: llvm-bolt %t.exe -p %p/Inputs/pre-aggregated.txt --pa -o %t.null \ +RUN: --show-density --profile-density-threshold=9 \ +RUN: --profile-density-cutoff-hot=97 | FileCheck %s CHECK: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile CHECK: BOLT-INFO: Functions with density >= 9.4 account for 97.00% total sample counts. >From e91907e57b39c8c79eb58b4d28d78fa253b130cb Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Mon, 29 Jul 2024 20:09:08 -0700 Subject: [PATCH 2/3] show-density init(true) Created using spr 1.3.4 --- bolt/lib/Passes/BinaryPasses.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index 23009bf74e0773..83fd6b2562eca8 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -224,7 +224,7 @@ static cl::opt TopCalledLimit( cl::init(100), cl::Hidden, cl::cat(BoltCategory)); // Profile density options, synced with llvm-profgen/ProfileGenerator.cpp -static cl::opt ShowDensity("show-density", cl::init(false), +static cl::opt ShowDensity("show-density", cl::init(true), cl::desc("show profile density details"), cl::Optional); >From 0d5291b01264a5387f8afd9fb69baf55fdc409a7 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Fri, 9 Aug 2024 11:17:57 -0700 Subject: [PATCH 3/3] show-density off by default Created using spr 1.3.4 --- bolt/lib/Passes/BinaryPasses.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index e0ad2af63a384a..0dc4a37e0ba946 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -224,7 +224,7 @@ static cl::opt TopCalledLimit( cl::init(100), cl::Hidden, cl::cat(BoltCategory)); // Profile density options, synced with llvm-profgen/ProfileGenerator.cpp -static cl::opt ShowDensity("show-density", cl::init(true), +static cl::opt ShowDensity("show-density", cl::init(false), cl::desc("show profile density details"), cl::Optional); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][IR] Auto-generate element type verification for VectorType (PR #102449)
https://github.com/River707 approved this pull request. https://github.com/llvm/llvm-project/pull/102449 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [Arm][AArch64][Clang] Respect function's branch protection attributes. (#101978) (PR #102646)
https://github.com/nikic milestoned https://github.com/llvm/llvm-project/pull/102646 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][ODS] Verify type constraints in Types and Attributes (PR #102326)
https://github.com/matthias-springer edited https://github.com/llvm/llvm-project/pull/102326 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][ODS] Verify type constraints in Types and Attributes (PR #102326)
https://github.com/matthias-springer edited https://github.com/llvm/llvm-project/pull/102326 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][ODS] Verify type constraints in Types and Attributes (PR #102326)
@@ -153,6 +153,9 @@ class TypeConstraint { // The name of the C++ Type class if known, or Type if not. string cppClassName = cppClassNameParam; + // TODO: This field is sometimes called `cppClassName` and sometimes matthias-springer wrote: Fixed in #102657. https://github.com/llvm/llvm-project/pull/102326 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][IR] Auto-generate element type verification for VectorType (PR #102449)
https://github.com/matthias-springer updated https://github.com/llvm/llvm-project/pull/102449 >From c399a1c6e82afcfcc2ad531cb49d53683f294f91 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Thu, 8 Aug 2024 12:28:23 +0200 Subject: [PATCH] [mlir][IR] Auto-generate element type verification for VectorType --- mlir/include/mlir/IR/BuiltinTypes.td| 4 +++- mlir/lib/AsmParser/TypeParser.cpp | 13 +++-- mlir/test/IR/invalid-builtin-types.mlir | 6 +++--- mlir/test/python/ir/builtin_types.py| 2 +- 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/mlir/include/mlir/IR/BuiltinTypes.td b/mlir/include/mlir/IR/BuiltinTypes.td index 365edcf68d8b94..4b3add2035263c 100644 --- a/mlir/include/mlir/IR/BuiltinTypes.td +++ b/mlir/include/mlir/IR/BuiltinTypes.td @@ -17,6 +17,7 @@ include "mlir/IR/AttrTypeBase.td" include "mlir/IR/BuiltinDialect.td" include "mlir/IR/BuiltinTypeInterfaces.td" +include "mlir/IR/CommonTypeConstraints.td" // TODO: Currently the types defined in this file are prefixed with `Builtin_`. // This is to differentiate the types here with the ones in OpBase.td. We should @@ -1146,7 +1147,7 @@ def Builtin_Vector : Builtin_Type<"Vector", "vector", }]; let parameters = (ins ArrayRefParameter<"int64_t">:$shape, -"Type":$elementType, +AnyTypeOf<[AnyInteger, Index, AnyFloat]>:$elementType, ArrayRefParameter<"bool">:$scalableDims ); let builders = [ @@ -1173,6 +1174,7 @@ def Builtin_Vector : Builtin_Type<"Vector", "vector", /// type. In particular, vectors can consist of integer, index, or float /// primitives. static bool isValidElementType(Type t) { + // TODO: Auto-generate this function from $elementType. return ::llvm::isa(t); } diff --git a/mlir/lib/AsmParser/TypeParser.cpp b/mlir/lib/AsmParser/TypeParser.cpp index 542eaeefe57f12..f070c072c43296 100644 --- a/mlir/lib/AsmParser/TypeParser.cpp +++ b/mlir/lib/AsmParser/TypeParser.cpp @@ -458,31 +458,24 @@ Type Parser::parseTupleType() { /// static-dim-list ::= decimal-literal (`x` decimal-literal)* /// VectorType Parser::parseVectorType() { + SMLoc loc = getToken().getLoc(); consumeToken(Token::kw_vector); if (parseToken(Token::less, "expected '<' in vector type")) return nullptr; + // Parse the dimensions. SmallVector dimensions; SmallVector scalableDims; if (parseVectorDimensionList(dimensions, scalableDims)) return nullptr; - if (any_of(dimensions, [](int64_t i) { return i <= 0; })) -return emitError(getToken().getLoc(), - "vector types must have positive constant sizes"), - nullptr; // Parse the element type. - auto typeLoc = getToken().getLoc(); auto elementType = parseType(); if (!elementType || parseToken(Token::greater, "expected '>' in vector type")) return nullptr; - if (!VectorType::isValidElementType(elementType)) -return emitError(typeLoc, "vector elements must be int/index/float type"), - nullptr; - - return VectorType::get(dimensions, elementType, scalableDims); + return getChecked(loc, dimensions, elementType, scalableDims); } /// Parse a dimension list in a vector type. This populates the dimension list. diff --git a/mlir/test/IR/invalid-builtin-types.mlir b/mlir/test/IR/invalid-builtin-types.mlir index 9884212e916c1f..07854a25000feb 100644 --- a/mlir/test/IR/invalid-builtin-types.mlir +++ b/mlir/test/IR/invalid-builtin-types.mlir @@ -120,17 +120,17 @@ func.func @illegaltype(i21312312323120) // expected-error {{invalid integer widt // - // Test no nested vector. -// expected-error@+1 {{vector elements must be int/index/float type}} +// expected-error@+1 {{failed to verify 'elementType': integer or index or floating-point}} func.func @vectors(vector<1 x vector<1xi32>>, vector<2x4xf32>) // - -// expected-error @+1 {{vector types must have positive constant sizes}} +// expected-error @+1 {{vector types must have positive constant sizes but got 0}} func.func @zero_vector_type() -> vector<0xi32> // - -// expected-error @+1 {{vector types must have positive constant sizes}} +// expected-error @+1 {{vector types must have positive constant sizes but got 1, 0}} func.func @zero_in_vector_type() -> vector<1x0xi32> // - diff --git a/mlir/test/python/ir/builtin_types.py b/mlir/test/python/ir/builtin_types.py index 2161f110ac31e2..f9554105ed 100644 --- a/mlir/test/python/ir/builtin_types.py +++ b/mlir/test/python/ir/builtin_types.py @@ -345,7 +345,7 @@ def testVectorType(): VectorType.get(shape, none) except MLIRError as e: # CHECK: Invalid type: -# CHECK: error: unknown: vector elements must be int/index/float type but got 'none' +# CHECK: error: unknown: failed to verify 'elementType': integer or index or floating-point print(e) else: print("Exception not produced")
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port SILowerI1Copies to new pass manager (PR #102663)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/102663 None >From b6224ab90e73e07da50c612a7dc93da719208dbd Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 9 Aug 2024 22:54:33 +0400 Subject: [PATCH] AMDGPU/NewPM: Port SILowerI1Copies to new pass manager --- llvm/lib/Target/AMDGPU/AMDGPU.h | 14 +- .../AMDGPU/AMDGPUCodeGenPassBuilder.cpp | 2 + llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 4 +- llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp| 144 ++ .../CodeGen/AMDGPU/si-lower-i1-copies.mir | 1 + 6 files changed, 100 insertions(+), 66 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 99b04d6e4cfc3f..17a130e82ae8fb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -10,6 +10,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H +#include "llvm/CodeGen/MachinePassManager.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/AMDGPUAddrSpace.h" @@ -36,7 +37,7 @@ FunctionPass *createGCNDPPCombinePass(); FunctionPass *createSIAnnotateControlFlowLegacyPass(); FunctionPass *createSIFoldOperandsPass(); FunctionPass *createSIPeepholeSDWAPass(); -FunctionPass *createSILowerI1CopiesPass(); +FunctionPass *createSILowerI1CopiesLegacyPass(); FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass(); FunctionPass *createSIShrinkInstructionsPass(); FunctionPass *createSILoadStoreOptimizerPass(); @@ -82,6 +83,13 @@ struct AMDGPUUseNativeCallsPass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; +class SILowerI1CopiesPass : public PassInfoMixin { +public: + SILowerI1CopiesPass() = default; + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &); void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); @@ -174,8 +182,8 @@ extern char &SIFixVGPRCopiesID; void initializeSILowerWWMCopiesPass(PassRegistry &); extern char &SILowerWWMCopiesID; -void initializeSILowerI1CopiesPass(PassRegistry &); -extern char &SILowerI1CopiesID; +void initializeSILowerI1CopiesLegacyPass(PassRegistry &); +extern char &SILowerI1CopiesLegacyID; void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &); extern char &AMDGPUGlobalISelDivergenceLoweringID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp index cc4285f130fc82..50491247f0eddb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp @@ -7,6 +7,7 @@ //===--===// #include "AMDGPUCodeGenPassBuilder.h" +#include "AMDGPU.h" #include "AMDGPUISelDAGToDAG.h" #include "AMDGPUTargetMachine.h" #include "SIFixSGPRCopies.h" @@ -40,5 +41,6 @@ void AMDGPUCodeGenPassBuilder::addAsmPrinter(AddMachinePass &addPass, Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const { addPass(AMDGPUISelDAGToDAGPass(TM)); addPass(SIFixSGPRCopiesPass()); + addPass(SILowerI1CopiesPass()); return Error::success(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index cbb19c003a264a..af68eea665571f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -79,4 +79,5 @@ FUNCTION_PASS_WITH_PARAMS( #endif MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this)) MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass()) +MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass()) #undef MACHINE_FUNCTION_PASS diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 3cc0d314a7c5d8..ad816ec29b02dc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -400,7 +400,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeGlobalISel(*PR); initializeAMDGPUDAGToDAGISelLegacyPass(*PR); initializeGCNDPPCombinePass(*PR); - initializeSILowerI1CopiesPass(*PR); + initializeSILowerI1CopiesLegacyPass(*PR); initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR); initializeSILowerWWMCopiesPass(*PR); initializeAMDGPUMarkLastScratchLoadPass(*PR); @@ -1269,7 +1269,7 @@ bool GCNPassConfig::addILPOpts() { bool GCNPassConfig::addInstSelector() { AMDGPUPassConfig::addInstSelector(); addPass(&SIFixSGPRCopiesLegacyID); - addPass(createSILowerI1CopiesPass()); + addPass(createSILowerI1CopiesLegacyPass()); return false; } diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGP
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port SILowerI1Copies to new pass manager (PR #102663)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/102663 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port SILowerI1Copies to new pass manager (PR #102663)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/102663?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#102663** https://app.graphite.dev/github/pr/llvm/llvm-project/102663?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#102654** https://app.graphite.dev/github/pr/llvm/llvm-project/102654?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102653** https://app.graphite.dev/github/pr/llvm/llvm-project/102653?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/102663 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port SILowerI1Copies to new pass manager (PR #102663)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/102663.diff 6 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+11-3) - (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp (+2) - (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+2-2) - (modified) llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp (+83-61) - (modified) llvm/test/CodeGen/AMDGPU/si-lower-i1-copies.mir (+1) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 99b04d6e4cfc3..17a130e82ae8f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -10,6 +10,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H +#include "llvm/CodeGen/MachinePassManager.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/AMDGPUAddrSpace.h" @@ -36,7 +37,7 @@ FunctionPass *createGCNDPPCombinePass(); FunctionPass *createSIAnnotateControlFlowLegacyPass(); FunctionPass *createSIFoldOperandsPass(); FunctionPass *createSIPeepholeSDWAPass(); -FunctionPass *createSILowerI1CopiesPass(); +FunctionPass *createSILowerI1CopiesLegacyPass(); FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass(); FunctionPass *createSIShrinkInstructionsPass(); FunctionPass *createSILoadStoreOptimizerPass(); @@ -82,6 +83,13 @@ struct AMDGPUUseNativeCallsPass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; +class SILowerI1CopiesPass : public PassInfoMixin { +public: + SILowerI1CopiesPass() = default; + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); +}; + void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &); void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); @@ -174,8 +182,8 @@ extern char &SIFixVGPRCopiesID; void initializeSILowerWWMCopiesPass(PassRegistry &); extern char &SILowerWWMCopiesID; -void initializeSILowerI1CopiesPass(PassRegistry &); -extern char &SILowerI1CopiesID; +void initializeSILowerI1CopiesLegacyPass(PassRegistry &); +extern char &SILowerI1CopiesLegacyID; void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &); extern char &AMDGPUGlobalISelDivergenceLoweringID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp index cc4285f130fc8..50491247f0edd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp @@ -7,6 +7,7 @@ //===--===// #include "AMDGPUCodeGenPassBuilder.h" +#include "AMDGPU.h" #include "AMDGPUISelDAGToDAG.h" #include "AMDGPUTargetMachine.h" #include "SIFixSGPRCopies.h" @@ -40,5 +41,6 @@ void AMDGPUCodeGenPassBuilder::addAsmPrinter(AddMachinePass &addPass, Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const { addPass(AMDGPUISelDAGToDAGPass(TM)); addPass(SIFixSGPRCopiesPass()); + addPass(SILowerI1CopiesPass()); return Error::success(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index cbb19c003a264..af68eea665571 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -79,4 +79,5 @@ FUNCTION_PASS_WITH_PARAMS( #endif MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this)) MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass()) +MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass()) #undef MACHINE_FUNCTION_PASS diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 3cc0d314a7c5d..ad816ec29b02d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -400,7 +400,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeGlobalISel(*PR); initializeAMDGPUDAGToDAGISelLegacyPass(*PR); initializeGCNDPPCombinePass(*PR); - initializeSILowerI1CopiesPass(*PR); + initializeSILowerI1CopiesLegacyPass(*PR); initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR); initializeSILowerWWMCopiesPass(*PR); initializeAMDGPUMarkLastScratchLoadPass(*PR); @@ -1269,7 +1269,7 @@ bool GCNPassConfig::addILPOpts() { bool GCNPassConfig::addInstSelector() { AMDGPUPassConfig::addInstSelector(); addPass(&SIFixSGPRCopiesLegacyID); - addPass(createSILowerI1CopiesPass()); + addPass(createSILowerI1CopiesLegacyPass()); return false; } diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp index a9ee74dec1203..7d49358d44025 100644 --- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cp
[llvm-branch-commits] [clang] release/19.x: [clang] Wire -fptrauth-returns to "ptrauth-returns" fn attribute. (#102416) (PR #102670)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/102670 Backport 2eb6e30fe83ccce3cf01e596e73fa6385facd44b Requested by: @asl >From 89ac96cf1c2bcd057655fb4b8142e094f49031b5 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Fri, 9 Aug 2024 11:49:50 -0700 Subject: [PATCH] [clang] Wire -fptrauth-returns to "ptrauth-returns" fn attribute. (#102416) We already ended up with -fptrauth-returns, the feature macro, the lang opt, and the actual backend lowering. The only part left is threading it all through PointerAuthOptions, to drive the addition of the "ptrauth-returns" attribute to generated functions. While there, do minor cleanup on ptrauth-function-attributes.c. This also adds ptrauth_key_return_address to ptrauth.h. (cherry picked from commit 2eb6e30fe83ccce3cf01e596e73fa6385facd44b) --- clang/include/clang/Basic/PointerAuthOptions.h | 3 +++ clang/lib/CodeGen/CodeGenFunction.cpp| 2 ++ clang/lib/Frontend/CompilerInvocation.cpp| 4 +++- clang/lib/Headers/ptrauth.h | 6 ++ clang/test/CodeGen/ptrauth-function-attributes.c | 9 +++-- 5 files changed, 21 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/PointerAuthOptions.h b/clang/include/clang/Basic/PointerAuthOptions.h index 417b4b00648c78..c0ab35bce5d84b 100644 --- a/clang/include/clang/Basic/PointerAuthOptions.h +++ b/clang/include/clang/Basic/PointerAuthOptions.h @@ -159,6 +159,9 @@ class PointerAuthSchema { }; struct PointerAuthOptions { + /// Should return addresses be authenticated? + bool ReturnAddresses = false; + /// Do indirect goto label addresses need to be authenticated? bool IndirectGotos = false; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index af201554898f31..4dc57d0ff5b269 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -880,6 +880,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, // Add pointer authentication attributes. const CodeGenOptions &CodeGenOpts = CGM.getCodeGenOpts(); + if (CodeGenOpts.PointerAuth.ReturnAddresses) +Fn->addFnAttr("ptrauth-returns"); if (CodeGenOpts.PointerAuth.FunctionPointers) Fn->addFnAttr("ptrauth-calls"); if (CodeGenOpts.PointerAuth.IndirectGotos) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index f6b6c44a4cab6a..fa5d076c202a36 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1505,13 +1505,15 @@ void CompilerInvocation::setDefaultPointerAuthOptions( PointerAuthSchema(Key::ASIA, false, Discrimination::Type); } Opts.IndirectGotos = LangOpts.PointerAuthIndirectGotos; + Opts.ReturnAddresses = LangOpts.PointerAuthReturns; } static void parsePointerAuthOptions(PointerAuthOptions &Opts, const LangOptions &LangOpts, const llvm::Triple &Triple, DiagnosticsEngine &Diags) { - if (!LangOpts.PointerAuthCalls && !LangOpts.PointerAuthIndirectGotos) + if (!LangOpts.PointerAuthCalls && !LangOpts.PointerAuthIndirectGotos && + !LangOpts.PointerAuthReturns) return; CompilerInvocation::setDefaultPointerAuthOptions(Opts, LangOpts, Triple); diff --git a/clang/lib/Headers/ptrauth.h b/clang/lib/Headers/ptrauth.h index 4724155b0dc796..154b599862a8e2 100644 --- a/clang/lib/Headers/ptrauth.h +++ b/clang/lib/Headers/ptrauth.h @@ -28,6 +28,12 @@ typedef enum { /* A process-specific key which can be used to sign data pointers. */ ptrauth_key_process_dependent_data = ptrauth_key_asdb, + /* The key used to sign return addresses on the stack. + The extra data is based on the storage address of the return address. + On AArch64, that is always the storage address of the return address + 8 + (or, in other words, the value of the stack pointer on function entry) */ + ptrauth_key_return_address = ptrauth_key_process_dependent_code, + /* The key used to sign C function pointers. The extra data is always 0. */ ptrauth_key_function_pointer = ptrauth_key_process_independent_code, diff --git a/clang/test/CodeGen/ptrauth-function-attributes.c b/clang/test/CodeGen/ptrauth-function-attributes.c index 6a09cd37bf4854..17ebf9d6e2e01c 100644 --- a/clang/test/CodeGen/ptrauth-function-attributes.c +++ b/clang/test/CodeGen/ptrauth-function-attributes.c @@ -1,11 +1,14 @@ +// RUN: %clang_cc1 -triple arm64-apple-ios-emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,OFF // RUN: %clang_cc1 -triple arm64e-apple-ios -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,OFF // RUN: %clang_cc1 -triple aarch64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,OFF -// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-calls -emit-l
[llvm-branch-commits] [clang] release/19.x: [clang] Wire -fptrauth-returns to "ptrauth-returns" fn attribute. (#102416) (PR #102670)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/102670 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang] Wire -fptrauth-returns to "ptrauth-returns" fn attribute. (#102416) (PR #102670)
llvmbot wrote: @kovdan01 What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/102670 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang] Wire -fptrauth-returns to "ptrauth-returns" fn attribute. (#102416) (PR #102670)
llvmbot wrote: @llvm/pr-subscribers-backend-x86 Author: None (llvmbot) Changes Backport 2eb6e30fe83ccce3cf01e596e73fa6385facd44b Requested by: @asl --- Full diff: https://github.com/llvm/llvm-project/pull/102670.diff 5 Files Affected: - (modified) clang/include/clang/Basic/PointerAuthOptions.h (+3) - (modified) clang/lib/CodeGen/CodeGenFunction.cpp (+2) - (modified) clang/lib/Frontend/CompilerInvocation.cpp (+3-1) - (modified) clang/lib/Headers/ptrauth.h (+6) - (modified) clang/test/CodeGen/ptrauth-function-attributes.c (+7-2) ``diff diff --git a/clang/include/clang/Basic/PointerAuthOptions.h b/clang/include/clang/Basic/PointerAuthOptions.h index 417b4b00648c78..c0ab35bce5d84b 100644 --- a/clang/include/clang/Basic/PointerAuthOptions.h +++ b/clang/include/clang/Basic/PointerAuthOptions.h @@ -159,6 +159,9 @@ class PointerAuthSchema { }; struct PointerAuthOptions { + /// Should return addresses be authenticated? + bool ReturnAddresses = false; + /// Do indirect goto label addresses need to be authenticated? bool IndirectGotos = false; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index af201554898f31..4dc57d0ff5b269 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -880,6 +880,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, // Add pointer authentication attributes. const CodeGenOptions &CodeGenOpts = CGM.getCodeGenOpts(); + if (CodeGenOpts.PointerAuth.ReturnAddresses) +Fn->addFnAttr("ptrauth-returns"); if (CodeGenOpts.PointerAuth.FunctionPointers) Fn->addFnAttr("ptrauth-calls"); if (CodeGenOpts.PointerAuth.IndirectGotos) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index f6b6c44a4cab6a..fa5d076c202a36 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1505,13 +1505,15 @@ void CompilerInvocation::setDefaultPointerAuthOptions( PointerAuthSchema(Key::ASIA, false, Discrimination::Type); } Opts.IndirectGotos = LangOpts.PointerAuthIndirectGotos; + Opts.ReturnAddresses = LangOpts.PointerAuthReturns; } static void parsePointerAuthOptions(PointerAuthOptions &Opts, const LangOptions &LangOpts, const llvm::Triple &Triple, DiagnosticsEngine &Diags) { - if (!LangOpts.PointerAuthCalls && !LangOpts.PointerAuthIndirectGotos) + if (!LangOpts.PointerAuthCalls && !LangOpts.PointerAuthIndirectGotos && + !LangOpts.PointerAuthReturns) return; CompilerInvocation::setDefaultPointerAuthOptions(Opts, LangOpts, Triple); diff --git a/clang/lib/Headers/ptrauth.h b/clang/lib/Headers/ptrauth.h index 4724155b0dc796..154b599862a8e2 100644 --- a/clang/lib/Headers/ptrauth.h +++ b/clang/lib/Headers/ptrauth.h @@ -28,6 +28,12 @@ typedef enum { /* A process-specific key which can be used to sign data pointers. */ ptrauth_key_process_dependent_data = ptrauth_key_asdb, + /* The key used to sign return addresses on the stack. + The extra data is based on the storage address of the return address. + On AArch64, that is always the storage address of the return address + 8 + (or, in other words, the value of the stack pointer on function entry) */ + ptrauth_key_return_address = ptrauth_key_process_dependent_code, + /* The key used to sign C function pointers. The extra data is always 0. */ ptrauth_key_function_pointer = ptrauth_key_process_independent_code, diff --git a/clang/test/CodeGen/ptrauth-function-attributes.c b/clang/test/CodeGen/ptrauth-function-attributes.c index 6a09cd37bf4854..17ebf9d6e2e01c 100644 --- a/clang/test/CodeGen/ptrauth-function-attributes.c +++ b/clang/test/CodeGen/ptrauth-function-attributes.c @@ -1,11 +1,14 @@ +// RUN: %clang_cc1 -triple arm64-apple-ios-emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,OFF // RUN: %clang_cc1 -triple arm64e-apple-ios -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,OFF // RUN: %clang_cc1 -triple aarch64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,OFF -// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-calls -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,CALLS +// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-calls -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,CALLS // RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-calls -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,CALLS +// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-returns -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,RETS +// RUN: %clang_cc1 -triple aarch64-linux-gnu -fptrauth-returns -emit-llvm %s -o - | FileCheck %s --check-prefixes=ALL,RETS + // RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-in
[llvm-branch-commits] [llvm] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics (PR #102599)
https://github.com/rampitec approved this pull request. https://github.com/llvm/llvm-project/pull/102599 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [hmaptool] Implement simple string deduplication (PR #102677)
https://github.com/smeenai created https://github.com/llvm/llvm-project/pull/102677 This reduces the size of the generated header maps significantly (35% measured internally). Further savings are possible through tail deduplication, but the additional complication isn't worth the gain IMO. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [hmaptool] Implement simple string deduplication (PR #102677)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Shoaib Meenai (smeenai) Changes This reduces the size of the generated header maps significantly (35% measured internally). Further savings are possible through tail deduplication, but the additional complication isn't worth the gain IMO. --- Full diff: https://github.com/llvm/llvm-project/pull/102677.diff 1 Files Affected: - (modified) clang/utils/hmaptool/hmaptool (+23-8) ``diff diff --git a/clang/utils/hmaptool/hmaptool b/clang/utils/hmaptool/hmaptool index aa400e3dd64e9..2ca769a549bed 100755 --- a/clang/utils/hmaptool/hmaptool +++ b/clang/utils/hmaptool/hmaptool @@ -110,6 +110,24 @@ class HeaderMap(object): yield (self.get_string(key_idx), self.get_string(prefix_idx) + self.get_string(suffix_idx)) +class StringTable: +def __init__(self): +# A string table offset of 0 is interpreted as an empty bucket, so it's +# important we don't assign an actual string to that offset. +self.table = "\0" +# For the same reason we don't want the empty string having a 0 offset. +self.offsets = {} + +def add(self, string): +offset = self.offsets.get(string) +if offset: +return offset + +offset = len(self.table) +self.table += string + "\0" +self.offsets[string] = offset +return offset + ### def action_dump(name, args): @@ -182,7 +200,7 @@ def action_write(name, args): table = [(0, 0, 0) for i in range(num_buckets)] max_value_len = 0 -strtable = "\0" +strtable = StringTable() for key,value in mappings.items(): if not isinstance(key, str): key = key.decode('utf-8') @@ -190,17 +208,14 @@ def action_write(name, args): value = value.decode('utf-8') max_value_len = max(max_value_len, len(value)) -key_idx = len(strtable) -strtable += key + '\0' +key_idx = strtable.add(key) prefix, suffix = os.path.split(value) # This guarantees that prefix + suffix == value in all cases, including when # prefix is empty or contains a trailing slash or suffix is empty (hence the use # of `len(value) - len(suffix)` instead of just `-len(suffix)`. prefix += value[len(prefix) : len(value) - len(suffix)] -prefix_idx = len(strtable) -strtable += prefix + '\0' -suffix_idx = len(strtable) -strtable += suffix + '\0' +prefix_idx = strtable.add(prefix) +suffix_idx = strtable.add(suffix) hash = hmap_hash(key) for i in range(num_buckets): @@ -228,7 +243,7 @@ def action_write(name, args): f.write(struct.pack(header_fmt, *header)) for bucket in table: f.write(struct.pack(bucket_fmt, *bucket)) -f.write(strtable.encode()) +f.write(strtable.table.encode()) def action_tovfs(name, args): "convert a headermap to a VFS layout" `` https://github.com/llvm/llvm-project/pull/102677 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang] Wire -fptrauth-returns to "ptrauth-returns" fn attribute. (#102416) (PR #102670)
https://github.com/kovdan01 approved this pull request. https://github.com/llvm/llvm-project/pull/102670 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] release/19.x: [lldb] Move definition of SBSaveCoreOptions dtor out of header (#102539) (PR #102680)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/102680 Backport 101cf540e698529d3dd899d00111bcb654a3c12b Requested by: @bulbazord >From deb61f38595fbee906616175c7e5a9e323e30c8c Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Fri, 9 Aug 2024 12:50:42 -0700 Subject: [PATCH] [lldb] Move definition of SBSaveCoreOptions dtor out of header (#102539) This class is technically not usable in its current state. When you use it in a simple C++ project, your compiler will complain about an incomplete definition of SaveCoreOptions. Normally this isn't a problem, other classes in the SBAPI do this. The difference is that SBSaveCoreOptions has a default destructor in the header, so the compiler will attempt to generate the code for the destructor with an incomplete definition of the impl type. All methods for every class, including constructors and destructors, must have a separate implementation not in a header. (cherry picked from commit 101cf540e698529d3dd899d00111bcb654a3c12b) --- lldb/include/lldb/API/SBSaveCoreOptions.h | 2 +- lldb/source/API/SBSaveCoreOptions.cpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lldb/include/lldb/API/SBSaveCoreOptions.h b/lldb/include/lldb/API/SBSaveCoreOptions.h index e77496bd3a4a0d..75506fd752e762 100644 --- a/lldb/include/lldb/API/SBSaveCoreOptions.h +++ b/lldb/include/lldb/API/SBSaveCoreOptions.h @@ -17,7 +17,7 @@ class LLDB_API SBSaveCoreOptions { public: SBSaveCoreOptions(); SBSaveCoreOptions(const lldb::SBSaveCoreOptions &rhs); - ~SBSaveCoreOptions() = default; + ~SBSaveCoreOptions(); const SBSaveCoreOptions &operator=(const lldb::SBSaveCoreOptions &rhs); diff --git a/lldb/source/API/SBSaveCoreOptions.cpp b/lldb/source/API/SBSaveCoreOptions.cpp index 6c3f74596203d6..19ca83f932bcf1 100644 --- a/lldb/source/API/SBSaveCoreOptions.cpp +++ b/lldb/source/API/SBSaveCoreOptions.cpp @@ -29,6 +29,8 @@ SBSaveCoreOptions::SBSaveCoreOptions(const SBSaveCoreOptions &rhs) { m_opaque_up = clone(rhs.m_opaque_up); } +SBSaveCoreOptions::~SBSaveCoreOptions() = default; + const SBSaveCoreOptions & SBSaveCoreOptions::operator=(const SBSaveCoreOptions &rhs) { LLDB_INSTRUMENT_VA(this, rhs); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] release/19.x: [lldb] Move definition of SBSaveCoreOptions dtor out of header (#102539) (PR #102680)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/102680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] release/19.x: [lldb] Move definition of SBSaveCoreOptions dtor out of header (#102539) (PR #102680)
llvmbot wrote: @clayborg What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/102680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] release/19.x: [lldb] Move definition of SBSaveCoreOptions dtor out of header (#102539) (PR #102680)
llvmbot wrote: @llvm/pr-subscribers-lldb Author: None (llvmbot) Changes Backport 101cf540e698529d3dd899d00111bcb654a3c12b Requested by: @bulbazord --- Full diff: https://github.com/llvm/llvm-project/pull/102680.diff 2 Files Affected: - (modified) lldb/include/lldb/API/SBSaveCoreOptions.h (+1-1) - (modified) lldb/source/API/SBSaveCoreOptions.cpp (+2) ``diff diff --git a/lldb/include/lldb/API/SBSaveCoreOptions.h b/lldb/include/lldb/API/SBSaveCoreOptions.h index e77496bd3a4a0d..75506fd752e762 100644 --- a/lldb/include/lldb/API/SBSaveCoreOptions.h +++ b/lldb/include/lldb/API/SBSaveCoreOptions.h @@ -17,7 +17,7 @@ class LLDB_API SBSaveCoreOptions { public: SBSaveCoreOptions(); SBSaveCoreOptions(const lldb::SBSaveCoreOptions &rhs); - ~SBSaveCoreOptions() = default; + ~SBSaveCoreOptions(); const SBSaveCoreOptions &operator=(const lldb::SBSaveCoreOptions &rhs); diff --git a/lldb/source/API/SBSaveCoreOptions.cpp b/lldb/source/API/SBSaveCoreOptions.cpp index 6c3f74596203d6..19ca83f932bcf1 100644 --- a/lldb/source/API/SBSaveCoreOptions.cpp +++ b/lldb/source/API/SBSaveCoreOptions.cpp @@ -29,6 +29,8 @@ SBSaveCoreOptions::SBSaveCoreOptions(const SBSaveCoreOptions &rhs) { m_opaque_up = clone(rhs.m_opaque_up); } +SBSaveCoreOptions::~SBSaveCoreOptions() = default; + const SBSaveCoreOptions & SBSaveCoreOptions::operator=(const SBSaveCoreOptions &rhs) { LLDB_INSTRUMENT_VA(this, rhs); `` https://github.com/llvm/llvm-project/pull/102680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)
@@ -22,6 +22,7 @@ MODULE_PASS("amdgpu-lower-buffer-fat-pointers", AMDGPULowerBufferFatPointersPass(*this)) MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass()) MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this)) +MODULE_PASS("amdgpu-perf-hint", AMDGPUPerfHintAnalysisPass(*static_cast(this))) rampitec wrote: Exceeds 80 chars per line. https://github.com/llvm/llvm-project/pull/102645 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)
@@ -413,18 +439,57 @@ bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) { return Changed; } -bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const { - auto FI = FIM.find(F); - if (FI == FIM.end()) -return false; +bool AMDGPUPerfHintAnalysis::run(const GCNTargetMachine &TM, + LazyCallGraph &CG) { - return AMDGPUPerfHint::isMemBound(FI->second); + SmallVector Worklist; + CG.buildRefSCCs(); + for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { +for (LazyCallGraph::SCC &SCC : RC) { + if (SCC.size() != 1) +continue; + Function &F = SCC.begin()->getFunction(); + if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage()) rampitec wrote: Why is it limited to internal linkage? https://github.com/llvm/llvm-project/pull/102645 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPUAnnotateUniformValues to new pass manager (PR #102654)
https://github.com/rampitec approved this pull request. https://github.com/llvm/llvm-project/pull/102654 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port SILowerI1Copies to new pass manager (PR #102663)
https://github.com/rampitec approved this pull request. https://github.com/llvm/llvm-project/pull/102663 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/19.x: [clang] Wire -fptrauth-returns to "ptrauth-returns" fn attribute. (#102416) (PR #102670)
https://github.com/asl approved this pull request. This is one of two small frontend changes require to close the chain of changes required for end-to-end support of pointer authentication in LLVM 19. The change does not affect any other target and essentially just propagate command line options down to attributes. https://github.com/llvm/llvm-project/pull/102670 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] release/19.x: [lldb] Move definition of SBSaveCoreOptions dtor out of header (#102539) (PR #102680)
https://github.com/clayborg approved this pull request. https://github.com/llvm/llvm-project/pull/102680 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)
@@ -1441,6 +1458,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { StaleSampleCount += SampleCount; ++NumAllStaleFunctions; } + +if (opts::ShowDensity) { + uint64_t Size = Function.getSize(); + // In case of BOLT split functions registered in BAT, executed traces are + // automatically attributed to the main fragment. Add up function sizes + // for all fragments. + if (IsHotParentOfBOLTSplitFunction) +for (const BinaryFunction *Fragment : Function.getFragments()) + Size += Fragment->getSize(); + double Density = (double)1.0 * Function.getExecutedBytes() / Size; + FuncDensityList.emplace_back(Density, SampleCount); wlei-llvm wrote: If there is no special reason to use `SampleCount` here, how about using `ExecutedBytes` to be consistent? We use the same value for FDO(https://github.com/llvm/llvm-project/blob/main/llvm/tools/llvm-profgen/ProfileGenerator.cpp#L807-L809). https://github.com/llvm/llvm-project/pull/101094 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)
@@ -1441,6 +1458,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { StaleSampleCount += SampleCount; ++NumAllStaleFunctions; } + +if (opts::ShowDensity) { + uint64_t Size = Function.getSize(); + // In case of BOLT split functions registered in BAT, executed traces are + // automatically attributed to the main fragment. Add up function sizes + // for all fragments. + if (IsHotParentOfBOLTSplitFunction) +for (const BinaryFunction *Fragment : Function.getFragments()) + Size += Fragment->getSize(); + double Density = (double)1.0 * Function.getExecutedBytes() / Size; + FuncDensityList.emplace_back(Density, SampleCount); aaupov wrote: I'm neutral about using ExecutedBytes but we use SampleCount for other profile-wide stats, e.g. stale samples percent. https://github.com/llvm/llvm-project/pull/101094 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)
@@ -1441,6 +1458,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { StaleSampleCount += SampleCount; ++NumAllStaleFunctions; } + +if (opts::ShowDensity) { + uint64_t Size = Function.getSize(); + // In case of BOLT split functions registered in BAT, executed traces are + // automatically attributed to the main fragment. Add up function sizes + // for all fragments. + if (IsHotParentOfBOLTSplitFunction) +for (const BinaryFunction *Fragment : Function.getFragments()) + Size += Fragment->getSize(); + double Density = (double)1.0 * Function.getExecutedBytes() / Size; + FuncDensityList.emplace_back(Density, SampleCount); wlei-llvm wrote: > I'm neutral about using ExecutedBytes but we use SampleCount for other > profile-wide stats, e.g. stale samples percent. Sounds good. No strong option, it shouldn't affect to the density value. https://github.com/llvm/llvm-project/pull/101094 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)
@@ -1441,6 +1458,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { StaleSampleCount += SampleCount; ++NumAllStaleFunctions; } + +if (opts::ShowDensity) { + uint64_t Size = Function.getSize(); + // In case of BOLT split functions registered in BAT, executed traces are + // automatically attributed to the main fragment. Add up function sizes + // for all fragments. + if (IsHotParentOfBOLTSplitFunction) +for (const BinaryFunction *Fragment : Function.getFragments()) + Size += Fragment->getSize(); + double Density = (double)1.0 * Function.getExecutedBytes() / Size; + FuncDensityList.emplace_back(Density, SampleCount); WenleiHe wrote: Don't have a good idea, but `ExecutedBytes` confused me as byte implied static size. It gives the impression of byte-wise coverage. `TotalSamplesInBytes`/`SampleCountInBytes`? https://github.com/llvm/llvm-project/pull/101094 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)
@@ -413,18 +439,57 @@ bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) { return Changed; } -bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const { - auto FI = FIM.find(F); - if (FI == FIM.end()) -return false; +bool AMDGPUPerfHintAnalysis::run(const GCNTargetMachine &TM, + LazyCallGraph &CG) { - return AMDGPUPerfHint::isMemBound(FI->second); + SmallVector Worklist; + CG.buildRefSCCs(); + for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { +for (LazyCallGraph::SCC &SCC : RC) { + if (SCC.size() != 1) +continue; + Function &F = SCC.begin()->getFunction(); + if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage()) arsenm wrote: Copied from FunctionAttrs, but I think this meant to really be checking for interposable linkage (and I thought I dropped this before posting?) https://github.com/llvm/llvm-project/pull/102645 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port SILowerI1Copies to new pass manager (PR #102663)
arsenm wrote: ### Merge activity * **Aug 9, 10:57 PM EDT**: @arsenm started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/102663). https://github.com/llvm/llvm-project/pull/102663 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPUAnnotateUniformValues to new pass manager (PR #102654)
arsenm wrote: ### Merge activity * **Aug 9, 10:57 PM EDT**: @arsenm started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/102654). https://github.com/llvm/llvm-project/pull/102654 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/102645 >From a24983c4e848a0d1520c0fa25483bed76a7acbd0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 9 Aug 2024 17:27:53 +0400 Subject: [PATCH] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager This was much more difficult than I anticipated. The pass is not in a good state, with poor test coverage. The legacy PM does seem to be relying on maintaining the map state between different SCCs, which seems bad. The pass is going out of its way to avoid putting the attributes it introduces onto non-callee functions. If it just added them, we could use them directly instead of relying on the map, I would think. The NewPM path uses a ModulePass; I'm not sure if we should be using CGSCC here but there seems to be some missing infrastructure to support backend defined ones. --- llvm/lib/Target/AMDGPU/AMDGPU.h | 4 +- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 3 + .../Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp | 112 ++ .../Target/AMDGPU/AMDGPUPerfHintAnalysis.h| 62 ++ .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 +- llvm/test/CodeGen/AMDGPU/perfhint.ll | 1 + 7 files changed, 137 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 195e2a19214e80..5b8d37a8ae7944 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -209,8 +209,8 @@ extern char &SIPreAllocateWWMRegsID; void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &); extern char &AMDGPUImageIntrinsicOptimizerID; -void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); -extern char &AMDGPUPerfHintAnalysisID; +void initializeAMDGPUPerfHintAnalysisLegacyPass(PassRegistry &); +extern char &AMDGPUPerfHintAnalysisLegacyID; void initializeGCNRegPressurePrinterPass(PassRegistry &); extern char &GCNRegPressurePrinterID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 8579774f522309..bbb4573655ab79 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -102,7 +102,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) -INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis) +INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysisLegacy) INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) #ifdef EXPENSIVE_CHECKS INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index b6a6c33d85f83c..7188c8953254c0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -22,6 +22,9 @@ MODULE_PASS("amdgpu-lower-buffer-fat-pointers", AMDGPULowerBufferFatPointersPass(*this)) MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass()) MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this)) +MODULE_PASS("amdgpu-perf-hint", +AMDGPUPerfHintAnalysisPass( + *static_cast(this))) MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass()) MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass()) #undef MODULE_PASS diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp index 1213d5e0b41db1..f8943962cca069 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp @@ -12,12 +12,15 @@ /// //===--===// -#include "AMDGPU.h" #include "AMDGPUPerfHintAnalysis.h" +#include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -54,12 +57,6 @@ static cl::opt STATISTIC(NumMemBound, "Number of functions marked as memory bound"); STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave"); -char llvm::AMDGPUPerfHintAnalysis::ID = 0; -char &llvm::AMDGPUPerfHintAnalysisID = AMDGPUPerfHintAnalysis::ID; - -INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE, -"Analysis if a function is memory bound", true, true) - namespace { struct AMDGPUPerfHint { @@ -67,7 +64,7 @@ struct AMDGPUPerfHint { public: AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_,
[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)
https://github.com/shiltian approved this pull request. https://github.com/llvm/llvm-project/pull/102645 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)
https://github.com/shiltian edited https://github.com/llvm/llvm-project/pull/102645 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Add profile density computation (PR #101094)
@@ -1441,6 +1458,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) { StaleSampleCount += SampleCount; ++NumAllStaleFunctions; } + +if (opts::ShowDensity) { + uint64_t Size = Function.getSize(); + // In case of BOLT split functions registered in BAT, executed traces are + // automatically attributed to the main fragment. Add up function sizes + // for all fragments. + if (IsHotParentOfBOLTSplitFunction) +for (const BinaryFunction *Fragment : Function.getFragments()) + Size += Fragment->getSize(); + double Density = (double)1.0 * Function.getExecutedBytes() / Size; + FuncDensityList.emplace_back(Density, SampleCount); aaupov wrote: Comments above the variable and the method say it's dynamically executed bytes, but will rename them to avoid confusion. > TotalSamplesInBytes/SampleCountInBytes? What do you mean by that? https://github.com/llvm/llvm-project/pull/101094 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [Clang][OMPX] Add the code generation for multi-dim `thread_limit` clause (PR #102717)
https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/102717 None >From 3ec01daaa2d43350b2c835d4173ede441ca004a1 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Fri, 9 Aug 2024 23:25:21 -0400 Subject: [PATCH] [Clang][OMPX] Add the code generation for multi-dim `thread_limit` clause --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 29 ++--- clang/test/OpenMP/target_teams_codegen.cpp| 12 +++ .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 26 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 31 +++ 4 files changed, 54 insertions(+), 44 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 8c5e4aa9c037e2..6c0c8646898cc6 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9588,15 +9588,17 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet); } -static void emitNumTeamsForBareTargetDirective( +template +static void emitClauseForBareTargetDirective( CodeGenFunction &CGF, const OMPExecutableDirective &D, -llvm::SmallVectorImpl &NumTeams) { - const auto *C = D.getSingleClause(); - assert(!C->varlist_empty() && "ompx_bare requires explicit num_teams"); - CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); - for (auto *E : C->getNumTeams()) { +llvm::SmallVectorImpl &Valuess) { + const auto *C = D.getSingleClause(); + assert(!C->varlist_empty() && + "ompx_bare requires explicit num_teams and thread_limit"); + CodeGenFunction::RunCleanupsScope Scope(CGF); + for (auto *E : C->varlist()) { llvm::Value *V = CGF.EmitScalarExpr(E); -NumTeams.push_back( +Valuess.push_back( CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true)); } } @@ -9672,14 +9674,17 @@ static void emitTargetCallKernelLaunch( bool IsBare = D.hasClausesOfKind(); SmallVector NumTeams; -if (IsBare) - emitNumTeamsForBareTargetDirective(CGF, D, NumTeams); -else +SmallVector NumThreads; +if (IsBare) { + emitClauseForBareTargetDirective(CGF, D, NumTeams); + emitClauseForBareTargetDirective(CGF, D, + NumThreads); +} else { NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D)); + NumThreads.push_back(OMPRuntime->emitNumThreadsForTargetDirective(CGF, D)); +} llvm::Value *DeviceID = emitDeviceID(Device, CGF); -llvm::Value *NumThreads = -OMPRuntime->emitNumThreadsForTargetDirective(CGF, D); llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc()); llvm::Value *NumIterations = OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter); diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp index 9cab8eef148833..13d44e127201bd 100644 --- a/clang/test/OpenMP/target_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_codegen.cpp @@ -127,13 +127,13 @@ int foo(int n) { aa += 1; } - #pragma omp target teams ompx_bare num_teams(1, 2) thread_limit(1) + #pragma omp target teams ompx_bare num_teams(1, 2) thread_limit(1, 2) { a += 1; aa += 1; } - #pragma omp target teams ompx_bare num_teams(1, 2, 3) thread_limit(1) + #pragma omp target teams ompx_bare num_teams(1, 2, 3) thread_limit(1, 2, 3) { a += 1; aa += 1; @@ -667,7 +667,7 @@ int bar(int n){ // CHECK1-NEXT:[[TMP144:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 10 // CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 0], ptr [[TMP144]], align 4 // CHECK1-NEXT:[[TMP145:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 11 -// CHECK1-NEXT:store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP145]], align 4 +// CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 0], ptr [[TMP145]], align 4 // CHECK1-NEXT:[[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 12 // CHECK1-NEXT:store i32 0, ptr [[TMP146]], align 4 // CHECK1-NEXT:[[TMP147:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.region_id, ptr [[KERNEL_ARGS29]]) @@ -720,7 +720,7 @@ int bar(int n){ // CHECK1-NEXT:[[TMP171:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 10 // CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 3], ptr [[TMP171]], align 4 // CHECK1-NEXT:[[TMP172:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 11 -// CHECK1-NEXT:store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP172]], align 4 +// CHECK1-NEXT:
[llvm-branch-commits] [clang] [llvm] [Clang][OMPX] Add the code generation for multi-dim `thread_limit` clause (PR #102717)
https://github.com/shiltian ready_for_review https://github.com/llvm/llvm-project/pull/102717 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [Clang][OMPX] Add the code generation for multi-dim `thread_limit` clause (PR #102717)
shiltian wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/102717?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#102717** https://app.graphite.dev/github/pr/llvm/llvm-project/102717?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#102715** https://app.graphite.dev/github/pr/llvm/llvm-project/102715?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @shiltian and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/102717 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [Clang][OMPX] Add the code generation for multi-dim `thread_limit` clause (PR #102717)
llvmbot wrote: @llvm/pr-subscribers-flang-openmp Author: Shilei Tian (shiltian) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/102717.diff 4 Files Affected: - (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+17-12) - (modified) clang/test/OpenMP/target_teams_codegen.cpp (+6-6) - (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+13-13) - (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+18-13) ``diff diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 8c5e4aa9c037e2..6c0c8646898cc6 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9588,15 +9588,17 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet); } -static void emitNumTeamsForBareTargetDirective( +template +static void emitClauseForBareTargetDirective( CodeGenFunction &CGF, const OMPExecutableDirective &D, -llvm::SmallVectorImpl &NumTeams) { - const auto *C = D.getSingleClause(); - assert(!C->varlist_empty() && "ompx_bare requires explicit num_teams"); - CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); - for (auto *E : C->getNumTeams()) { +llvm::SmallVectorImpl &Valuess) { + const auto *C = D.getSingleClause(); + assert(!C->varlist_empty() && + "ompx_bare requires explicit num_teams and thread_limit"); + CodeGenFunction::RunCleanupsScope Scope(CGF); + for (auto *E : C->varlist()) { llvm::Value *V = CGF.EmitScalarExpr(E); -NumTeams.push_back( +Valuess.push_back( CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true)); } } @@ -9672,14 +9674,17 @@ static void emitTargetCallKernelLaunch( bool IsBare = D.hasClausesOfKind(); SmallVector NumTeams; -if (IsBare) - emitNumTeamsForBareTargetDirective(CGF, D, NumTeams); -else +SmallVector NumThreads; +if (IsBare) { + emitClauseForBareTargetDirective(CGF, D, NumTeams); + emitClauseForBareTargetDirective(CGF, D, + NumThreads); +} else { NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D)); + NumThreads.push_back(OMPRuntime->emitNumThreadsForTargetDirective(CGF, D)); +} llvm::Value *DeviceID = emitDeviceID(Device, CGF); -llvm::Value *NumThreads = -OMPRuntime->emitNumThreadsForTargetDirective(CGF, D); llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc()); llvm::Value *NumIterations = OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter); diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp index 9cab8eef148833..13d44e127201bd 100644 --- a/clang/test/OpenMP/target_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_codegen.cpp @@ -127,13 +127,13 @@ int foo(int n) { aa += 1; } - #pragma omp target teams ompx_bare num_teams(1, 2) thread_limit(1) + #pragma omp target teams ompx_bare num_teams(1, 2) thread_limit(1, 2) { a += 1; aa += 1; } - #pragma omp target teams ompx_bare num_teams(1, 2, 3) thread_limit(1) + #pragma omp target teams ompx_bare num_teams(1, 2, 3) thread_limit(1, 2, 3) { a += 1; aa += 1; @@ -667,7 +667,7 @@ int bar(int n){ // CHECK1-NEXT:[[TMP144:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 10 // CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 0], ptr [[TMP144]], align 4 // CHECK1-NEXT:[[TMP145:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 11 -// CHECK1-NEXT:store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP145]], align 4 +// CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 0], ptr [[TMP145]], align 4 // CHECK1-NEXT:[[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 12 // CHECK1-NEXT:store i32 0, ptr [[TMP146]], align 4 // CHECK1-NEXT:[[TMP147:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.region_id, ptr [[KERNEL_ARGS29]]) @@ -720,7 +720,7 @@ int bar(int n){ // CHECK1-NEXT:[[TMP171:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 10 // CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 3], ptr [[TMP171]], align 4 // CHECK1-NEXT:[[TMP172:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 11 -// CHECK1-NEXT:store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP172]], align 4 +// CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 3], ptr [[TMP172]], align 4 // CHECK1-NEXT:[[TMP173:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERN
[llvm-branch-commits] [clang] [llvm] [Clang][OMPX] Add the code generation for multi-dim `thread_limit` clause (PR #102717)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Shilei Tian (shiltian) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/102717.diff 4 Files Affected: - (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+17-12) - (modified) clang/test/OpenMP/target_teams_codegen.cpp (+6-6) - (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+13-13) - (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+18-13) ``diff diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 8c5e4aa9c037e2..6c0c8646898cc6 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9588,15 +9588,17 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet); } -static void emitNumTeamsForBareTargetDirective( +template +static void emitClauseForBareTargetDirective( CodeGenFunction &CGF, const OMPExecutableDirective &D, -llvm::SmallVectorImpl &NumTeams) { - const auto *C = D.getSingleClause(); - assert(!C->varlist_empty() && "ompx_bare requires explicit num_teams"); - CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); - for (auto *E : C->getNumTeams()) { +llvm::SmallVectorImpl &Valuess) { + const auto *C = D.getSingleClause(); + assert(!C->varlist_empty() && + "ompx_bare requires explicit num_teams and thread_limit"); + CodeGenFunction::RunCleanupsScope Scope(CGF); + for (auto *E : C->varlist()) { llvm::Value *V = CGF.EmitScalarExpr(E); -NumTeams.push_back( +Valuess.push_back( CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true)); } } @@ -9672,14 +9674,17 @@ static void emitTargetCallKernelLaunch( bool IsBare = D.hasClausesOfKind(); SmallVector NumTeams; -if (IsBare) - emitNumTeamsForBareTargetDirective(CGF, D, NumTeams); -else +SmallVector NumThreads; +if (IsBare) { + emitClauseForBareTargetDirective(CGF, D, NumTeams); + emitClauseForBareTargetDirective(CGF, D, + NumThreads); +} else { NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D)); + NumThreads.push_back(OMPRuntime->emitNumThreadsForTargetDirective(CGF, D)); +} llvm::Value *DeviceID = emitDeviceID(Device, CGF); -llvm::Value *NumThreads = -OMPRuntime->emitNumThreadsForTargetDirective(CGF, D); llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc()); llvm::Value *NumIterations = OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter); diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp index 9cab8eef148833..13d44e127201bd 100644 --- a/clang/test/OpenMP/target_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_codegen.cpp @@ -127,13 +127,13 @@ int foo(int n) { aa += 1; } - #pragma omp target teams ompx_bare num_teams(1, 2) thread_limit(1) + #pragma omp target teams ompx_bare num_teams(1, 2) thread_limit(1, 2) { a += 1; aa += 1; } - #pragma omp target teams ompx_bare num_teams(1, 2, 3) thread_limit(1) + #pragma omp target teams ompx_bare num_teams(1, 2, 3) thread_limit(1, 2, 3) { a += 1; aa += 1; @@ -667,7 +667,7 @@ int bar(int n){ // CHECK1-NEXT:[[TMP144:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 10 // CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 0], ptr [[TMP144]], align 4 // CHECK1-NEXT:[[TMP145:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 11 -// CHECK1-NEXT:store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP145]], align 4 +// CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 0], ptr [[TMP145]], align 4 // CHECK1-NEXT:[[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 12 // CHECK1-NEXT:store i32 0, ptr [[TMP146]], align 4 // CHECK1-NEXT:[[TMP147:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.region_id, ptr [[KERNEL_ARGS29]]) @@ -720,7 +720,7 @@ int bar(int n){ // CHECK1-NEXT:[[TMP171:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 10 // CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 3], ptr [[TMP171]], align 4 // CHECK1-NEXT:[[TMP172:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 11 -// CHECK1-NEXT:store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP172]], align 4 +// CHECK1-NEXT:store [3 x i32] [i32 1, i32 2, i32 3], ptr [[TMP172]], align 4 // CHECK1-NEXT:[[TMP173:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS
[llvm-branch-commits] [llvm] release/19.x: [AIX]export function descriptor symbols related to template functions. (#101920) (PR #102407)
https://github.com/hubert-reinterpretcast approved this pull request. LGTM! https://github.com/llvm/llvm-project/pull/102407 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits