https://github.com/ssahasra created
https://github.com/llvm/llvm-project/pull/199622
Add a statement attribute that suppresses MakeAvailable/MakeVisible cache
operations on AMDGPU atomic instructions while preserving memory ordering
(waits).
The attribute takes a string argument specifying the mode. Currently "none" is
the only supported mode. The resulting atomic or fence instruction carries
!mmra !{!"amdgcn-av", !"none"} metadata.
Assisted-By: Claude Opus 4.6
>From e8963264d6dd7a37eedab7aff53d4ad008214fff Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe <[email protected]>
Date: Mon, 25 May 2026 13:14:43 +0530
Subject: [PATCH] [Clang][AMDGPU] Add ``amdgcn_av("none")`` attribute for
atomic expressions
Add a statement attribute that suppresses MakeAvailable/MakeVisible
cache operations on AMDGPU atomic instructions while preserving memory
ordering (waits).
The attribute takes a string argument specifying the mode. Currently
"none" is the only supported mode. The resulting atomic or fence
instruction carries !mmra !{!"amdgcn-av", !"none"} metadata.
Assisted-By: Claude Opus 4.6
---
clang/include/clang/Basic/Attr.td | 6 +
clang/include/clang/Basic/AttrDocs.td | 23 +++
clang/include/clang/Basic/DiagnosticGroups.td | 2 +
.../clang/Basic/DiagnosticSemaKinds.td | 4 +
clang/lib/CodeGen/CGAtomic.cpp | 9 +-
clang/lib/CodeGen/CGBuiltin.cpp | 68 +++++----
clang/lib/CodeGen/CGStmt.cpp | 5 +
clang/lib/CodeGen/CodeGenFunction.h | 4 +
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 1 +
clang/lib/CodeGen/Targets/AMDGPU.cpp | 6 +
clang/lib/Sema/SemaStmtAttr.cpp | 57 ++++++++
.../CodeGen/AMDGPU/amdgcn-av-non-atomic.cpp | 43 ++++++
.../CodeGen/AMDGPU/amdgcn-av-none-attr.cpp | 131 ++++++++++++++++++
13 files changed, 332 insertions(+), 27 deletions(-)
create mode 100644 clang/test/CodeGen/AMDGPU/amdgcn-av-non-atomic.cpp
create mode 100644 clang/test/CodeGen/AMDGPU/amdgcn-av-none-attr.cpp
diff --git a/clang/include/clang/Basic/Attr.td
b/clang/include/clang/Basic/Attr.td
index 70b5773f95b08..b11b3ed51efc4 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -2520,6 +2520,12 @@ def AMDGPUMaxNumWorkGroups : InheritableAttr {
let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">;
}
+def AMDGCNAV : StmtAttr, TargetSpecificAttr<TargetAMDGPU> {
+ let Spellings = [Clang<"amdgcn_av">];
+ let Args = [StringArgument<"Mode">];
+ let Documentation = [AMDGCNAVDocs];
+}
+
def BPFPreserveAccessIndex : InheritableAttr,
TargetSpecificAttr<TargetBPF> {
let Spellings = [Clang<"preserve_access_index">];
diff --git a/clang/include/clang/Basic/AttrDocs.td
b/clang/include/clang/Basic/AttrDocs.td
index 87b9053be7cb6..f2835bffd4bc5 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -3543,6 +3543,29 @@ An error will be given if:
}];
}
+def AMDGCNAVDocs : Documentation {
+ let Category = DocCatAMDGPUAttributes;
+ let Content = [{
+This attribute controls MakeAvailable and MakeVisible cache operations on
+AMDGPU synchronization operations. It takes a string argument specifying the
+mode.
+
+When placed on a statement containing a C/C++ atomic builtin call, the
+resulting atomic or fence instruction will carry ``!mmra !{!"amdgcn-av",
+!"<mode>"}`` metadata.
+
+The supported modes are:
+
+- ``"none"``: Skip cache writeback (on release) and cache invalidation (on
+ acquire), while preserving memory ordering (waits).
+
+.. code-block:: c++
+
+ [[clang::amdgcn_av("none")]] __atomic_thread_fence(__ATOMIC_SEQ_CST);
+ [[clang::amdgcn_av("none")]] __atomic_fetch_add(ptr, 1, __ATOMIC_ACQ_REL);
+ }];
+}
+
def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> {
let Content = [{
Clang supports several different calling conventions, depending on the target
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td
b/clang/include/clang/Basic/DiagnosticGroups.td
index 8031f99419bdc..51787935e1280 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1968,3 +1968,5 @@ def ExperimentalOption : DiagGroup<"experimental-option">;
// Warnings about unguarded usages of AMDGPU target specific constructs
def UnguardedBuiltinUsageAMDGPU : DiagGroup<"amdgpu-unguarded-builtin-usage">;
+
+def AMDGCNAVNonAtomic : DiagGroup<"amdgcn-av-non-atomic">;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index dbe6cb2c3a41c..c91b5774d0272 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -14232,6 +14232,10 @@ def note_amdgcn_unguarded_builtin_silence
def err_amdgcn_coop_atomic_invalid_as : Error<"cooperative atomic requires a
global or generic pointer">;
+def warn_amdgcn_av_requires_atomic : Warning<
+ "%0 attribute only applies to atomic operations">,
+ InGroup<AMDGCNAVNonAtomic>;
+
def err_amdgcn_dmask_has_too_many_bits_set
: Error<"dmask argument cannot have more bits set than there are elements "
"in return type">;
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 270965b109943..92ee116ca8dc0 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -632,6 +632,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr
*E, Address Dest,
llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
Load->setAtomic(Order, Scope);
Load->setVolatile(E->isVolatile());
+ CGF.getTargetHooks().setTargetAtomicMetadata(CGF, *Load, E);
CGF.maybeAttachRangeForLoad(Load, E->getValueType(), E->getExprLoc());
auto *I = CGF.Builder.CreateStore(Load, Dest);
CGF.addInstToCurrentSourceAtom(I, Load);
@@ -649,6 +650,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr
*E, Address Dest,
llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr);
Store->setAtomic(Order, Scope);
Store->setVolatile(E->isVolatile());
+ CGF.getTargetHooks().setTargetAtomicMetadata(CGF, *Store, E);
CGF.addInstToCurrentSourceAtom(Store, LoadVal1);
return;
}
@@ -795,6 +797,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr
*E, Address Dest,
CGF.Builder.CreateStore(CGF.Builder.getInt8(0), Ptr);
Store->setAtomic(Order, Scope);
Store->setVolatile(E->isVolatile());
+ CGF.getTargetHooks().setTargetAtomicMetadata(CGF, *Store, E);
CGF.addInstToCurrentSourceAtom(Store, nullptr);
return;
}
@@ -1586,6 +1589,7 @@ llvm::Value
*AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO,
Addr = castToAtomicIntPointer(Addr);
llvm::LoadInst *Load = CGF.Builder.CreateLoad(Addr, "atomic-load");
Load->setAtomic(AO);
+ CGF.getTargetHooks().setTargetAtomicMetadata(CGF, *Load);
// Other decoration.
if (IsVolatile)
@@ -1753,6 +1757,7 @@ std::pair<llvm::Value *, llvm::Value *>
AtomicInfo::EmitAtomicCompareExchangeOp(
// Other decoration.
Inst->setVolatile(LVal.isVolatileQualified());
Inst->setWeak(IsWeak);
+ CGF.getTargetHooks().setTargetAtomicMetadata(CGF, *Inst);
// Okay, turn that back into the original value type.
auto *PreviousVal = CGF.Builder.CreateExtractValue(Inst, /*Idxs=*/0);
@@ -2108,8 +2113,10 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue,
LValue dest,
else if (AO == llvm::AtomicOrdering::AcquireRelease)
AO = llvm::AtomicOrdering::Release;
// Initializations don't need to be atomic.
- if (!isInit)
+ if (!isInit) {
store->setAtomic(AO);
+ getTargetHooks().setTargetAtomicMetadata(*this, *store);
+ }
// Other decoration.
if (IsVolatile)
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b1d727cb5e0ad..4e36d4fe1dc32 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5367,24 +5367,28 @@ RValue CodeGenFunction::EmitBuiltinExpr(const
GlobalDecl GD, unsigned BuiltinID,
Value *Order = EmitScalarExpr(E->getArg(0));
if (isa<llvm::ConstantInt>(Order)) {
int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
+ llvm::FenceInst *Fence = nullptr;
switch (ord) {
case 0: // memory_order_relaxed
default: // invalid order
break;
case 1: // memory_order_consume
case 2: // memory_order_acquire
- Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
+ Fence = Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
break;
case 3: // memory_order_release
- Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
+ Fence = Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
break;
case 4: // memory_order_acq_rel
- Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
+ Fence = Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
SSID);
break;
case 5: // memory_order_seq_cst
- Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
SSID);
+ Fence = Builder.CreateFence(
+ llvm::AtomicOrdering::SequentiallyConsistent, SSID);
break;
}
+ if (Fence)
+ getTargetHooks().setTargetAtomicMetadata(*this, *Fence);
return RValue::get(nullptr);
}
@@ -5399,23 +5403,29 @@ RValue CodeGenFunction::EmitBuiltinExpr(const
GlobalDecl GD, unsigned BuiltinID,
llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
Builder.SetInsertPoint(AcquireBB);
- Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
+ getTargetHooks().setTargetAtomicMetadata(
+ *this, *Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID));
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32(1), AcquireBB);
SI->addCase(Builder.getInt32(2), AcquireBB);
Builder.SetInsertPoint(ReleaseBB);
- Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
+ getTargetHooks().setTargetAtomicMetadata(
+ *this, *Builder.CreateFence(llvm::AtomicOrdering::Release, SSID));
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32(3), ReleaseBB);
Builder.SetInsertPoint(AcqRelBB);
- Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
+ getTargetHooks().setTargetAtomicMetadata(
+ *this,
+ *Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID));
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32(4), AcqRelBB);
Builder.SetInsertPoint(SeqCstBB);
- Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
+ getTargetHooks().setTargetAtomicMetadata(
+ *this, *Builder.CreateFence(
+ llvm::AtomicOrdering::SequentiallyConsistent, SSID));
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32(5), SeqCstBB);
@@ -5433,40 +5443,43 @@ RValue CodeGenFunction::EmitBuiltinExpr(const
GlobalDecl GD, unsigned BuiltinID,
SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
? ScopeModel->map(Scp->getZExtValue())
: ScopeModel->map(ScopeModel->getFallBackValue());
+ llvm::FenceInst *Fence = nullptr;
switch (Ord->getZExtValue()) {
case 0: // memory_order_relaxed
default: // invalid order
break;
case 1: // memory_order_consume
case 2: // memory_order_acquire
- Builder.CreateFence(
+ Fence = Builder.CreateFence(
llvm::AtomicOrdering::Acquire,
getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
llvm::AtomicOrdering::Acquire,
getLLVMContext()));
break;
case 3: // memory_order_release
- Builder.CreateFence(
+ Fence = Builder.CreateFence(
llvm::AtomicOrdering::Release,
getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
llvm::AtomicOrdering::Release,
getLLVMContext()));
break;
case 4: // memory_order_acq_rel
- Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
- getTargetHooks().getLLVMSyncScopeID(
- getLangOpts(), SS,
- llvm::AtomicOrdering::AcquireRelease,
- getLLVMContext()));
+ Fence = Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
+ getTargetHooks().getLLVMSyncScopeID(
+ getLangOpts(), SS,
+ llvm::AtomicOrdering::AcquireRelease,
+ getLLVMContext()));
break;
case 5: // memory_order_seq_cst
- Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
- getTargetHooks().getLLVMSyncScopeID(
- getLangOpts(), SS,
- llvm::AtomicOrdering::SequentiallyConsistent,
- getLLVMContext()));
+ Fence = Builder.CreateFence(
+ llvm::AtomicOrdering::SequentiallyConsistent,
+ getTargetHooks().getLLVMSyncScopeID(
+ getLangOpts(), SS,
llvm::AtomicOrdering::SequentiallyConsistent,
+ getLLVMContext()));
break;
}
+ if (Fence)
+ getTargetHooks().setTargetAtomicMetadata(*this, *Fence);
return RValue::get(nullptr);
}
@@ -5525,9 +5538,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl
GD, unsigned BuiltinID,
SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
? ScopeModel->map(Scp->getZExtValue())
: ScopeModel->map(ScopeModel->getFallBackValue());
- Builder.CreateFence(Ordering,
- getTargetHooks().getLLVMSyncScopeID(
- getLangOpts(), SS, Ordering,
getLLVMContext()));
+ llvm::FenceInst *Fence = Builder.CreateFence(
+ Ordering, getTargetHooks().getLLVMSyncScopeID(
+ getLangOpts(), SS, Ordering, getLLVMContext()));
+ getTargetHooks().setTargetAtomicMetadata(*this, *Fence);
Builder.CreateBr(ContBB);
} else {
llvm::DenseMap<unsigned, llvm::BasicBlock *> BBs;
@@ -5541,9 +5555,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl
GD, unsigned BuiltinID,
SI->addCase(Builder.getInt32(Scp), B);
Builder.SetInsertPoint(B);
- Builder.CreateFence(Ordering, getTargetHooks().getLLVMSyncScopeID(
- getLangOpts(),
ScopeModel->map(Scp),
- Ordering, getLLVMContext()));
+ llvm::FenceInst *Fence = Builder.CreateFence(
+ Ordering, getTargetHooks().getLLVMSyncScopeID(
+ getLangOpts(), ScopeModel->map(Scp), Ordering,
+ getLLVMContext()));
+ getTargetHooks().setTargetAtomicMetadata(*this, *Fence);
Builder.CreateBr(ContBB);
}
}
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index b70667d04d1f6..e150f5da61f2e 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -778,6 +778,7 @@ void CodeGenFunction::EmitAttributedStmt(const
AttributedStmt &S) {
bool noinline = false;
bool alwaysinline = false;
bool noconvergent = false;
+ StringRef amdgcnAVMode;
HLSLControlFlowHintAttr::Spelling flattenOrBranch =
HLSLControlFlowHintAttr::SpellingNotCalculated;
const CallExpr *musttail = nullptr;
@@ -815,6 +816,9 @@ void CodeGenFunction::EmitAttributedStmt(const
AttributedStmt &S) {
case attr::Atomic:
AA = cast<AtomicAttr>(A);
break;
+ case attr::AMDGCNAV:
+ amdgcnAVMode = cast<AMDGCNAVAttr>(A)->getMode();
+ break;
case attr::HLSLControlFlowHint: {
flattenOrBranch =
cast<HLSLControlFlowHintAttr>(A)->getSemanticSpelling();
} break;
@@ -824,6 +828,7 @@ void CodeGenFunction::EmitAttributedStmt(const
AttributedStmt &S) {
SaveAndRestore save_noinline(InNoInlineAttributedStmt, noinline);
SaveAndRestore save_alwaysinline(InAlwaysInlineAttributedStmt, alwaysinline);
SaveAndRestore save_noconvergent(InNoConvergentAttributedStmt, noconvergent);
+ SaveAndRestore save_amdgcnav(AMDGCNAVMode, amdgcnAVMode);
SaveAndRestore save_musttail(MustTailCall, musttail);
SaveAndRestore save_flattenOrBranch(HLSLControlFlowAttr, flattenOrBranch);
CGAtomicOptionsRAII AORAII(CGM, AA);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h
b/clang/lib/CodeGen/CodeGenFunction.h
index 3ce0ef1235561..fc31ec6aa8a19 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -622,6 +622,10 @@ class CodeGenFunction : public CodeGenTypeCache {
/// True if the current statement has noconvergent attribute.
bool InNoConvergentAttributedStmt = false;
+ /// The mode string from the amdgcn_av attribute on the current statement,
+ /// or empty if the attribute is not present.
+ StringRef AMDGCNAVMode;
+
/// HLSL Branch attribute.
HLSLControlFlowHintAttr::Spelling HLSLControlFlowAttr =
HLSLControlFlowHintAttr::SpellingNotCalculated;
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index a88dbb71b3ddf..3816dd638fe14 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -1875,6 +1875,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned
BuiltinID,
FenceInst *Fence = Builder.CreateFence(AO, SSID);
if (E->getNumArgs() > 2)
AddAMDGPUFenceAddressSpaceMMRA(Fence, E);
+ getTargetHooks().setTargetAtomicMetadata(*this, *Fence);
return Fence;
}
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp
b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 0d36f166328c7..4a9ae3ab8f7f9 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -10,6 +10,7 @@
#include "TargetInfo.h"
#include "clang/AST/DeclCXX.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
#include "llvm/Support/AMDGPUAddrSpace.h"
using namespace clang;
@@ -633,6 +634,11 @@ void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata(
AtomicInst.setMetadata(llvm::LLVMContext::MD_noalias_addrspace, ASRange);
}
+ if (!CGF.AMDGCNAVMode.empty()) {
+ llvm::MMRAMetadata::appendTag(AtomicInst,
+ {{"amdgcn-av", CGF.AMDGCNAVMode}});
+ }
+
if (!RMW)
return;
diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp
index 58d5332565d10..b13b1d2256a92 100644
--- a/clang/lib/Sema/SemaStmtAttr.cpp
+++ b/clang/lib/Sema/SemaStmtAttr.cpp
@@ -351,6 +351,61 @@ static Attr *handleMustTailAttr(Sema &S, Stmt *St, const
ParsedAttr &A,
return ::new (S.Context) MustTailAttr(S.Context, A);
}
+/// Return true if St is an atomic operation or fence builtin call.
+static bool isAtomicOp(const Stmt *St) {
+ const Expr *E = dyn_cast<Expr>(St);
+ if (!E)
+ return false;
+
+ E = E->IgnoreParenCasts();
+
+ if (isa<AtomicExpr>(E))
+ return true;
+
+ const CallExpr *CE = dyn_cast<CallExpr>(E);
+ if (!CE)
+ return false;
+
+ unsigned BuiltinID = CE->getBuiltinCallee();
+ switch (BuiltinID) {
+ case Builtin::BI__atomic_thread_fence:
+ case Builtin::BI__atomic_signal_fence:
+ case Builtin::BI__c11_atomic_thread_fence:
+ case Builtin::BI__c11_atomic_signal_fence:
+ case Builtin::BI__scoped_atomic_thread_fence:
+ return true;
+ default:
+ break;
+ }
+
+ // Check for target-specific fence builtins.
+ if (const FunctionDecl *FD = CE->getDirectCallee()) {
+ if (FD->getName() == "__builtin_amdgcn_fence")
+ return true;
+ }
+
+ return false;
+}
+
+static Attr *handleAMDGCNAVAttr(Sema &S, Stmt *St, const ParsedAttr &A,
+ SourceRange Range) {
+ StringRef Mode;
+ if (!S.checkStringLiteralArgumentAttr(A, 0, Mode))
+ return nullptr;
+
+ if (Mode != "none") {
+ S.Diag(A.getLoc(), diag::warn_attribute_type_not_supported) << A << Mode;
+ return nullptr;
+ }
+
+ if (!isAtomicOp(St)) {
+ S.Diag(A.getLoc(), diag::warn_amdgcn_av_requires_atomic) << A;
+ return nullptr;
+ }
+
+ return ::new (S.Context) AMDGCNAVAttr(S.Context, A, Mode);
+}
+
static Attr *handleLikely(Sema &S, Stmt *St, const ParsedAttr &A,
SourceRange Range) {
@@ -730,6 +785,8 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const
ParsedAttr &A,
return handleNoInlineAttr(S, St, A, Range);
case ParsedAttr::AT_MustTail:
return handleMustTailAttr(S, St, A, Range);
+ case ParsedAttr::AT_AMDGCNAV:
+ return handleAMDGCNAVAttr(S, St, A, Range);
case ParsedAttr::AT_Likely:
return handleLikely(S, St, A, Range);
case ParsedAttr::AT_Unlikely:
diff --git a/clang/test/CodeGen/AMDGPU/amdgcn-av-non-atomic.cpp
b/clang/test/CodeGen/AMDGPU/amdgcn-av-non-atomic.cpp
new file mode 100644
index 0000000000000..1f193a865be97
--- /dev/null
+++ b/clang/test/CodeGen/AMDGPU/amdgcn-av-non-atomic.cpp
@@ -0,0 +1,43 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 %s -emit-llvm -O0 -verify -o - \
+// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s
+
+// Test that [[clang::amdgcn_av("none")]] on non-atomic statements emits a
+// warning and does NOT produce !mmra metadata.
+
+// CHECK-LABEL: define {{.*}} @_Z16test_plain_storePii(
+// CHECK-NOT: !mmra
+// CHECK: ret void
+void test_plain_store(int *p, int val) {
+ [[clang::amdgcn_av("none")]] *p = val; // expected-warning
{{'clang::amdgcn_av' attribute only applies to atomic operations}}
+}
+
+// CHECK-LABEL: define {{.*}} @_Z15test_plain_callv(
+// CHECK-NOT: !mmra
+// CHECK: ret void
+extern void foo();
+void test_plain_call() {
+ [[clang::amdgcn_av("none")]] foo(); // expected-warning {{'clang::amdgcn_av'
attribute only applies to atomic operations}}
+}
+
+// CHECK-LABEL: define {{.*}} @_Z18test_for_with_atomPi(
+// CHECK-NOT: !mmra
+// CHECK: ret void
+void test_for_with_atom(int *p) {
+ // The attribute on a for loop should warn even if the body contains atomics.
+ [[clang::amdgcn_av("none")]] for (;;) { // expected-warning
{{'clang::amdgcn_av' attribute only applies to atomic operations}}
+ __atomic_fetch_add(p, 1, __ATOMIC_SEQ_CST);
+ break;
+ }
+}
+
+// The attribute on an if statement should warn even if the condition is
atomic.
+// CHECK-LABEL: define {{.*}} @_Z20test_if_atomic_condnPi(
+// CHECK-NOT: !mmra
+// CHECK: ret void
+void test_if_atomic_condn(int *p) {
+ [[clang::amdgcn_av("none")]] if (__atomic_load_n(p, __ATOMIC_ACQUIRE)) { //
expected-warning {{'clang::amdgcn_av' attribute only applies to atomic
operations}}
+ }
+}
+
+// CHECK-NOT: amdgcn-av
diff --git a/clang/test/CodeGen/AMDGPU/amdgcn-av-none-attr.cpp
b/clang/test/CodeGen/AMDGPU/amdgcn-av-none-attr.cpp
new file mode 100644
index 0000000000000..798c03cae45eb
--- /dev/null
+++ b/clang/test/CodeGen/AMDGPU/amdgcn-av-none-attr.cpp
@@ -0,0 +1,131 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 4
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
+// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s
+
+// Test that [[clang::amdgcn_av("none")]] on atomic builtins emits
+// !mmra !{!"amdgcn-av", !"none"} metadata on the resulting IR instructions.
+
+// CHECK-LABEL: define dso_local void @_Z21test_atomic_fetch_addPi(
+// CHECK-SAME: ptr noundef [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5)
[[P_ADDR]] to ptr
+// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]],
align 4
+// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]]
seq_cst, align 4, !mmra [[META2:![0-9]+]], !amdgpu.no.fine.grained.memory
[[META4:![0-9]+]], !amdgpu.no.remote.memory [[META4]]
+// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP]],
align 4
+// CHECK-NEXT: ret void
+//
+void test_atomic_fetch_add(int *p) {
+ [[clang::amdgcn_av("none")]] __atomic_fetch_add(p, 1, __ATOMIC_SEQ_CST);
+}
+
+// CHECK-LABEL: define dso_local void @_Z17test_atomic_storePi(
+// CHECK-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5)
[[P_ADDR]] to ptr
+// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 42, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]],
align 4
+// CHECK-NEXT: store atomic i32 [[TMP1]], ptr [[TMP0]] release, align 4,
!mmra [[META2]]
+// CHECK-NEXT: ret void
+//
+void test_atomic_store(int *p) {
+ [[clang::amdgcn_av("none")]] __atomic_store_n(p, 42, __ATOMIC_RELEASE);
+}
+
+// CHECK-LABEL: define dso_local void @_Z16test_atomic_loadPi(
+// CHECK-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5)
[[P_ADDR]] to ptr
+// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] acquire, align
4, !mmra [[META2]]
+// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP]],
align 4
+// CHECK-NEXT: ret void
+//
+void test_atomic_load(int *p) {
+ [[clang::amdgcn_av("none")]] __atomic_load_n(p, __ATOMIC_ACQUIRE);
+}
+
+// CHECK-LABEL: define dso_local void @_Z19test_atomic_cmpxchgPi(
+// CHECK-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[EXPECTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5)
[[P_ADDR]] to ptr
+// CHECK-NEXT: [[EXPECTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5)
[[EXPECTED]] to ptr
+// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 0, ptr [[EXPECTED_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[EXPECTED_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]],
align 4
+// CHECK-NEXT: [[TMP3:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP1]], i32
[[TMP2]] seq_cst acquire, align 4, !mmra [[META2]]
+// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0
+// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1
+// CHECK-NEXT: br i1 [[TMP5]], label [[CMPXCHG_CONTINUE:%.*]], label
[[CMPXCHG_STORE_EXPECTED:%.*]]
+// CHECK: cmpxchg.store_expected:
+// CHECK-NEXT: store i32 [[TMP4]], ptr [[EXPECTED_ASCAST]], align 4
+// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
+// CHECK: cmpxchg.continue:
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr addrspace(5) [[CMPXCHG_BOOL]],
align 1
+// CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr addrspace(5) [[CMPXCHG_BOOL]],
align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP6]], 0
+// CHECK-NEXT: ret void
+//
+void test_atomic_cmpxchg(int *p) {
+ int expected = 0;
+ [[clang::amdgcn_av("none")]] __atomic_compare_exchange_n(p, &expected, 1,
false, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE);
+}
+
+// CHECK-LABEL: define dso_local void @_Z24test_atomic_thread_fencev(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: fence seq_cst, !mmra [[META2]]
+// CHECK-NEXT: ret void
+//
+void test_atomic_thread_fence() {
+ [[clang::amdgcn_av("none")]] __atomic_thread_fence(__ATOMIC_SEQ_CST);
+}
+
+// Verify that without the attribute, no !mmra metadata is emitted.
+// CHECK-LABEL: define dso_local void @_Z12test_no_attrPi(
+// CHECK-SAME: ptr noundef [[P:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5)
[[P_ADDR]] to ptr
+// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 1, ptr addrspace(5) [[DOTATOMICTMP]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[DOTATOMICTMP]],
align 4
+// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]]
seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]],
!amdgpu.no.remote.memory [[META4]]
+// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(5) [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[ATOMIC_TEMP]],
align 4
+// CHECK-NEXT: ret void
+//
+void test_no_attr(int *p) {
+ __atomic_fetch_add(p, 1, __ATOMIC_SEQ_CST);
+}
+//.
+// CHECK: [[META2]] = !{[[META3:![0-9]+]]}
+// CHECK: [[META3]] = !{!"amdgcn-av", !"none"}
+// CHECK: [[META4]] = !{}
+//.
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits