Author: Nikita Popov Date: 2026-05-28T11:24:11+02:00 New Revision: 4503872fe57315e6df7dbc93128dbe6a02011872
URL: https://github.com/llvm/llvm-project/commit/4503872fe57315e6df7dbc93128dbe6a02011872 DIFF: https://github.com/llvm/llvm-project/commit/4503872fe57315e6df7dbc93128dbe6a02011872.diff LOG: [IR][FunctionAttrs] Clarify memory effects of atomics (#193768) FunctionAttrs was treating atomic instructions, including with ordering stronger than monotonic, as only reading/writing their operand. I don't think doing this is correct, because we model the ordering constraints of synchronizing atomics via reading/writing "all" memory. So e.g. if you have a function with a release store on an argument, marking it as argmem-only is wrong, because that would permit reordering accesses to other locations around it. (What this PR is doing is not *sufficient* to model this correctly due to the fence-like effects on not-yet-escaped memory, but it brings us closer to correctness.) I initially tried to implement mayReadFromMemory() and mayWriteToMemory() on top of getMemoryEffects(), but this caused significant compile-time regressions, so I've kept the logic duplicated. Added: Modified: clang/test/CodeGenOpenCL/atomic-builtins-default-to-device-scope.cl llvm/include/llvm/IR/Instruction.h llvm/lib/IR/Instruction.cpp llvm/lib/Transforms/IPO/FunctionAttrs.cpp llvm/test/Transforms/FunctionAttrs/atomic.ll llvm/test/Transforms/FunctionAttrs/nocapture.ll llvm/test/Transforms/FunctionAttrs/nosync.ll llvm/test/Transforms/FunctionAttrs/writeonly.ll Removed: ################################################################################ diff --git a/clang/test/CodeGenOpenCL/atomic-builtins-default-to-device-scope.cl b/clang/test/CodeGenOpenCL/atomic-builtins-default-to-device-scope.cl index 0cf961fc572b9..b3464f0306a7a 100644 --- a/clang/test/CodeGenOpenCL/atomic-builtins-default-to-device-scope.cl +++ b/clang/test/CodeGenOpenCL/atomic-builtins-default-to-device-scope.cl @@ -5,26 +5,26 @@ // RUN: | FileCheck %s --check-prefix=SPIRV // AMDGCN-LABEL: define dso_local i32 @load( -// AMDGCN-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// AMDGCN-SAME: ptr noundef captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[TMP0:%.*]] = load atomic i32, ptr [[P]] syncscope("agent") seq_cst, align 4 // AMDGCN-NEXT: ret i32 [[TMP0]] // // SPIRV-LABEL: define spir_func i32 @load( -// SPIRV-SAME: ptr addrspace(4) noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// SPIRV-SAME: ptr addrspace(4) noundef captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // SPIRV-NEXT: [[ENTRY:.*:]] // SPIRV-NEXT: [[TMP0:%.*]] = load atomic i32, ptr addrspace(4) [[P]] syncscope("device") seq_cst, align 4 // SPIRV-NEXT: ret i32 [[TMP0]] // int load(int *p) { return __atomic_load_n(p, __ATOMIC_SEQ_CST); } // AMDGCN-LABEL: define dso_local void @store( -// AMDGCN-SAME: ptr noundef writeonly captures(none) [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: store atomic i32 [[X]], ptr [[P]] syncscope("agent") seq_cst, align 4 // AMDGCN-NEXT: ret void // // SPIRV-LABEL: define spir_func void @store( -// SPIRV-SAME: ptr addrspace(4) noundef writeonly captures(none) [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-SAME: ptr addrspace(4) noundef captures(none) [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // SPIRV-NEXT: [[ENTRY:.*:]] // SPIRV-NEXT: store atomic i32 [[X]], ptr addrspace(4) [[P]] syncscope("device") seq_cst, align 4 // SPIRV-NEXT: ret void @@ -33,7 +33,7 @@ void store(int *p, int x) { return __atomic_store_n(p, x, __ATOMIC_SEQ_CST); } // AMDGCN-LABEL: define dso_local i32 @add( // AMDGCN-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw add ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw add ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META7:![0-9]+]], !amdgpu.no.remote.memory [[META7]] // AMDGCN-NEXT: ret i32 [[TMP0]] // // SPIRV-LABEL: define spir_func i32 @add( @@ -46,7 +46,7 @@ int add(int *p, int x) { return __atomic_fetch_add(p, x, __ATOMIC_SEQ_CST); } // AMDGCN-LABEL: define dso_local float @fadd( // AMDGCN-SAME: ptr noundef captures(none) [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META7]], !amdgpu.no.remote.memory [[META7]] // AMDGCN-NEXT: ret float [[TMP0]] // // SPIRV-LABEL: define spir_func float @fadd( @@ -59,7 +59,7 @@ float fadd(float *p, float x) { return __atomic_fetch_add(p, x, __ATOMIC_SEQ_CST // AMDGCN-LABEL: define dso_local i32 @sub( // AMDGCN-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw sub ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw sub ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META7]], !amdgpu.no.remote.memory [[META7]] // AMDGCN-NEXT: ret i32 [[TMP0]] // // SPIRV-LABEL: define spir_func i32 @sub( @@ -72,7 +72,7 @@ int sub(int *p, int x) { return __atomic_fetch_sub(p, x, __ATOMIC_SEQ_CST); } // AMDGCN-LABEL: define dso_local float @fsub( // AMDGCN-SAME: ptr noundef captures(none) [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META7]], !amdgpu.no.remote.memory [[META7]] // AMDGCN-NEXT: ret float [[TMP0]] // // SPIRV-LABEL: define spir_func float @fsub( @@ -85,7 +85,7 @@ float fsub(float *p, float x) { return __atomic_fetch_sub(p, x, __ATOMIC_SEQ_CST // AMDGCN-LABEL: define dso_local i32 @and( // AMDGCN-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw and ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw and ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META7]], !amdgpu.no.remote.memory [[META7]] // AMDGCN-NEXT: ret i32 [[TMP0]] // // SPIRV-LABEL: define spir_func i32 @and( @@ -98,7 +98,7 @@ int and(int *p, int x) { return __atomic_fetch_and(p, x, __ATOMIC_SEQ_CST); } // AMDGCN-LABEL: define dso_local i32 @nand( // AMDGCN-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw nand ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw nand ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META7]], !amdgpu.no.remote.memory [[META7]] // AMDGCN-NEXT: ret i32 [[TMP0]] // // SPIRV-LABEL: define spir_func i32 @nand( @@ -111,7 +111,7 @@ int nand(int *p, int x) { return __atomic_fetch_nand(p, x, __ATOMIC_SEQ_CST); } // AMDGCN-LABEL: define dso_local i32 @or( // AMDGCN-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw or ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw or ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META7]], !amdgpu.no.remote.memory [[META7]] // AMDGCN-NEXT: ret i32 [[TMP0]] // // SPIRV-LABEL: define spir_func i32 @or( @@ -124,7 +124,7 @@ int or(int *p, int x) { return __atomic_fetch_or(p, x, __ATOMIC_SEQ_CST); } // AMDGCN-LABEL: define dso_local i32 @xor( // AMDGCN-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw xor ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw xor ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META7]], !amdgpu.no.remote.memory [[META7]] // AMDGCN-NEXT: ret i32 [[TMP0]] // // SPIRV-LABEL: define spir_func i32 @xor( @@ -137,7 +137,7 @@ int xor(int *p, int x) { return __atomic_fetch_xor(p, x, __ATOMIC_SEQ_CST); } // AMDGCN-LABEL: define dso_local i32 @min( // AMDGCN-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw min ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw min ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META7]], !amdgpu.no.remote.memory [[META7]] // AMDGCN-NEXT: ret i32 [[TMP0]] // // SPIRV-LABEL: define spir_func i32 @min( @@ -150,7 +150,7 @@ int min(int *p, int x) { return __atomic_fetch_min(p, x, __ATOMIC_SEQ_CST); } // AMDGCN-LABEL: define dso_local float @fmin( // AMDGCN-SAME: ptr noundef captures(none) [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fmin ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fmin ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META7]], !amdgpu.no.remote.memory [[META7]] // AMDGCN-NEXT: ret float [[TMP0]] // // SPIRV-LABEL: define spir_func float @fmin( @@ -163,7 +163,7 @@ float fmin(float *p, float x) { return __atomic_fetch_min(p, x, __ATOMIC_SEQ_CST // AMDGCN-LABEL: define dso_local i32 @max( // AMDGCN-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw max ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw max ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META7]], !amdgpu.no.remote.memory [[META7]] // AMDGCN-NEXT: ret i32 [[TMP0]] // // SPIRV-LABEL: define spir_func i32 @max( @@ -176,7 +176,7 @@ int max(int *p, int x) { return __atomic_fetch_max(p, x, __ATOMIC_SEQ_CST); } // AMDGCN-LABEL: define dso_local float @fmax( // AMDGCN-SAME: ptr noundef captures(none) [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fmax ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fmax ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META7]], !amdgpu.no.remote.memory [[META7]] // AMDGCN-NEXT: ret float [[TMP0]] // // SPIRV-LABEL: define spir_func float @fmax( @@ -189,7 +189,7 @@ float fmax(float *p, float x) { return __atomic_fetch_max(p, x, __ATOMIC_SEQ_CST // AMDGCN-LABEL: define dso_local i32 @xchg( // AMDGCN-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw xchg ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw xchg ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META7]], !amdgpu.no.remote.memory [[META7]] // AMDGCN-NEXT: ret i32 [[TMP0]] // // SPIRV-LABEL: define spir_func i32 @xchg( @@ -233,3 +233,6 @@ int cmpxchg(int *p, int x, int y) { return __atomic_compare_exchange(p, &x, &y, // SPIRV-NEXT: ret i32 [[CONV]] // int cmpxchg_weak(int *p, int x, int y) { return __atomic_compare_exchange(p, &x, &y, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } +//. +// AMDGCN: [[META7]] = !{} +//. diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index e08021d330d09..50812f656c0be 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -24,6 +24,7 @@ #include "llvm/IR/Value.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ModRef.h" #include <cstdint> #include <utility> @@ -835,6 +836,10 @@ class Instruction : public User, return Opcode == Xor; } + /// Return memory effects of the instruction. argmem here refers to the + /// operands of the instruction. + LLVM_ABI MemoryEffects getMemoryEffects() const LLVM_READONLY; + /// Return true if this instruction may modify memory. LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY; diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index ac209ffa72aae..71229c4c44a4a 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -1061,6 +1061,58 @@ bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const { return false; } +MemoryEffects Instruction::getMemoryEffects() const { + auto GetEffects = [](ModRefInfo BaseMR, AtomicOrdering Ordering, + bool IsVolatile) { + if (isStrongerThanMonotonic(Ordering)) + return MemoryEffects::unknown(); + + if (IsVolatile) + return MemoryEffects::inaccessibleOrArgMemOnly(); + + if (isStrongerThanUnordered(Ordering)) + return MemoryEffects::argMemOnly(); + + return MemoryEffects::argMemOnly(BaseMR); + }; + switch (getOpcode()) { + default: + return MemoryEffects::none(); + case Instruction::VAArg: + return MemoryEffects::argMemOnly(); + case Instruction::CatchPad: + case Instruction::CatchRet: + case Instruction::Fence: + return MemoryEffects::unknown(); + case Instruction::Call: + case Instruction::Invoke: + case Instruction::CallBr: + return cast<CallBase>(this)->getMemoryEffects(); + case Instruction::Load: { + auto *LI = cast<LoadInst>(this); + return GetEffects(ModRefInfo::Ref, LI->getOrdering(), LI->isVolatile()); + } + case Instruction::Store: { + auto *SI = cast<StoreInst>(this); + return GetEffects(ModRefInfo::Mod, SI->getOrdering(), SI->isVolatile()); + } + case Instruction::AtomicRMW: { + auto *RMW = cast<AtomicRMWInst>(this); + return GetEffects(ModRefInfo::ModRef, RMW->getOrdering(), + RMW->isVolatile()); + } + case Instruction::AtomicCmpXchg: { + auto *CX = cast<AtomicCmpXchgInst>(this); + return GetEffects(ModRefInfo::ModRef, CX->getSuccessOrdering(), + CX->isVolatile()); + } + } +} + +// This is duplicating the logic from getMemoryEffects() for performance +// reasons. Computing the full MemoryEffects just to perform a Mod/Ref check +// is expensive. + bool Instruction::mayReadFromMemory() const { switch (getOpcode()) { default: return false; diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index afc48747a6bcd..b09f4f20489a7 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -186,6 +186,18 @@ checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR, // Additional locations accessed if the SCC accesses argmem. MemoryEffects RecursiveArgME = MemoryEffects::none(); + auto AddNonArgMemoryEffects = [&ME](MemoryEffects InstME) { + // Merge instruction memory effects, including inaccessible and errno + // memory, but excluding argument memory, which is handled separately. + ME |= InstME.getWithoutLoc(IRMemLocation::ArgMem); + + // If the instruction accesses captured memory (currently part of "other") + // and an argument is captured (currently not tracked), then it may also + // access argument memory. + ModRefInfo OtherMR = InstME.getModRef(IRMemLocation::Other); + ME |= MemoryEffects::argMemOnly(OtherMR); + }; + // Inalloca and preallocated arguments are always clobbered by the call. if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) || F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) @@ -222,16 +234,7 @@ checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR, if (isa<PseudoProbeInst>(I)) continue; - // Merge callee's memory effects into caller's ones, including - // inaccessible and errno memory, but excluding argument memory, which is - // handled separately. - ME |= CallME.getWithoutLoc(IRMemLocation::ArgMem); - - // If the call accesses captured memory (currently part of "other") and - // an argument is captured (currently not tracked), then it may also - // access argument memory. - ModRefInfo OtherMR = CallME.getModRef(IRMemLocation::Other); - ME |= MemoryEffects::argMemOnly(OtherMR); + AddNonArgMemoryEffects(CallME); // Check whether all pointer arguments point to local memory, and // ignore calls that only access local memory. @@ -241,27 +244,20 @@ checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR, continue; } - ModRefInfo MR = ModRefInfo::NoModRef; - if (I.mayWriteToMemory()) - MR |= ModRefInfo::Mod; - if (I.mayReadFromMemory()) - MR |= ModRefInfo::Ref; - if (MR == ModRefInfo::NoModRef) + MemoryEffects InstME = I.getMemoryEffects(); + if (InstME.doesNotAccessMemory()) continue; std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(&I); if (!Loc) { // If no location is known, conservatively assume anything can be // accessed. - ME |= MemoryEffects(MR); + ME |= MemoryEffects(InstME.getModRef()); continue; } - // Volatile operations may access inaccessible memory. - if (I.isVolatile()) - ME |= MemoryEffects::inaccessibleMemOnly(MR); - - addLocAccess(ME, *Loc, MR, AAR); + AddNonArgMemoryEffects(InstME); + addLocAccess(ME, *Loc, InstME.getModRef(IRMemLocation::ArgMem), AAR); } return {OrigME & ME, RecursiveArgME}; @@ -962,9 +958,9 @@ determinePointerAccessAttrs(Argument *A, } case Instruction::Load: - // A volatile load has side effects beyond what readonly can be relied - // upon. - if (cast<LoadInst>(I)->isVolatile()) + // Volatile and ordered atomic accesses are modelled as reading and + // writing the location. + if (!cast<LoadInst>(I)->isUnordered()) return Attribute::None; IsRead = true; @@ -975,9 +971,9 @@ determinePointerAccessAttrs(Argument *A, // untrackable capture return Attribute::None; - // A volatile store has side effects beyond what writeonly can be relied - // upon. - if (cast<StoreInst>(I)->isVolatile()) + // Volatile and ordered atomic accesses are modelled as reading and + // writing the location. + if (!cast<StoreInst>(I)->isUnordered()) return Attribute::None; IsWrite = true; diff --git a/llvm/test/Transforms/FunctionAttrs/atomic.ll b/llvm/test/Transforms/FunctionAttrs/atomic.ll index bd99ba2f0b18e..8d62a8d20388c 100644 --- a/llvm/test/Transforms/FunctionAttrs/atomic.ll +++ b/llvm/test/Transforms/FunctionAttrs/atomic.ll @@ -1,14 +1,16 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --version 6 ; RUN: opt -passes=function-attrs -S < %s | FileCheck %s -; Atomic load/store to local doesn't affect whether a function is -; readnone/readonly. +; While it would be fine in this specific case (the alloca does not escape), +; we generally can't ignore synchronizing operations on allocas and should not +; infer readnone here. Non-escaping cases will typically be optimized by SROA. define i32 @test1(i32 %x) uwtable ssp { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind ssp willreturn memory(none) uwtable -; CHECK-LABEL: @test1( -; CHECK-NEXT: entry: +; CHECK: Function Attrs: mustprogress norecurse nounwind ssp willreturn uwtable +; CHECK-LABEL: define i32 @test1( +; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store atomic i32 [[X:%.*]], ptr [[X_ADDR]] seq_cst, align 4 +; CHECK-NEXT: store atomic i32 [[X]], ptr [[X_ADDR]] seq_cst, align 4 ; CHECK-NEXT: [[R:%.*]] = load atomic i32, ptr [[X_ADDR]] seq_cst, align 4 ; CHECK-NEXT: ret i32 [[R]] ; @@ -19,15 +21,134 @@ entry: ret i32 %r } -; A function with an Acquire load is not readonly. -define i32 @test2(ptr %x) uwtable ssp { -; CHECK: Function Attrs: mustprogress norecurse nounwind ssp willreturn memory(argmem: readwrite) uwtable -; CHECK-LABEL: @test2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[R:%.*]] = load atomic i32, ptr [[X:%.*]] seq_cst, align 4 +define i32 @load_monotonic(ptr %x) { +; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define i32 @load_monotonic( +; CHECK-SAME: ptr captures(none) [[X:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[R:%.*]] = load atomic i32, ptr [[X]] monotonic, align 4 +; CHECK-NEXT: ret i32 [[R]] +; + %r = load atomic i32, ptr %x monotonic, align 4 + ret i32 %r +} + +define i32 @load_acquire(ptr %x) { +; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn +; CHECK-LABEL: define i32 @load_acquire( +; CHECK-SAME: ptr captures(none) [[X:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[R:%.*]] = load atomic i32, ptr [[X]] acquire, align 4 +; CHECK-NEXT: ret i32 [[R]] +; + %r = load atomic i32, ptr %x acquire, align 4 + ret i32 %r +} + +define i32 @load_seq_cst(ptr %x) { +; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn +; CHECK-LABEL: define i32 @load_seq_cst( +; CHECK-SAME: ptr captures(none) [[X:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[R:%.*]] = load atomic i32, ptr [[X]] seq_cst, align 4 ; CHECK-NEXT: ret i32 [[R]] ; -entry: %r = load atomic i32, ptr %x seq_cst, align 4 ret i32 %r } + +define void @store_monotonic(ptr %x) { +; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @store_monotonic( +; CHECK-SAME: ptr captures(none) [[X:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: store atomic i32 0, ptr [[X]] monotonic, align 4 +; CHECK-NEXT: ret void +; + store atomic i32 0, ptr %x monotonic, align 4 + ret void +} + +define void @store_release(ptr %x) { +; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn +; CHECK-LABEL: define void @store_release( +; CHECK-SAME: ptr captures(none) [[X:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: store atomic i32 0, ptr [[X]] release, align 4 +; CHECK-NEXT: ret void +; + store atomic i32 0, ptr %x release, align 4 + ret void +} + +define void @store_seq_cst(ptr %x) { +; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn +; CHECK-LABEL: define void @store_seq_cst( +; CHECK-SAME: ptr captures(none) [[X:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: store atomic i32 0, ptr [[X]] seq_cst, align 4 +; CHECK-NEXT: ret void +; + store atomic i32 0, ptr %x seq_cst, align 4 + ret void +} + +define void @atomicrmw_monotonic_arg(ptr %x) { +; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @atomicrmw_monotonic_arg( +; CHECK-SAME: ptr captures(none) [[X:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[X]], i32 1 monotonic, align 4 +; CHECK-NEXT: ret void +; + atomicrmw add ptr %x, i32 1 monotonic, align 4 + ret void +} + +define void @atomicrmw_acq_rel_arg(ptr %x) { +; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn +; CHECK-LABEL: define void @atomicrmw_acq_rel_arg( +; CHECK-SAME: ptr captures(none) [[X:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[X]], i32 1 acq_rel, align 4 +; CHECK-NEXT: ret void +; + atomicrmw add ptr %x, i32 1 acq_rel, align 4 + ret void +} + +define void @atomicrmw_monotonic_volatile_arg(ptr %x) { +; CHECK: Function Attrs: norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) +; CHECK-LABEL: define void @atomicrmw_monotonic_volatile_arg( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw volatile add ptr [[X]], i32 1 monotonic, align 4 +; CHECK-NEXT: ret void +; + atomicrmw volatile add ptr %x, i32 1 monotonic, align 4 + ret void +} + +define void @cmpxchg_monotonic_arg(ptr %x) { +; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn memory(argmem: readwrite) +; CHECK-LABEL: define void @cmpxchg_monotonic_arg( +; CHECK-SAME: ptr captures(none) [[X:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[X]], i32 0, i32 1 monotonic monotonic, align 4 +; CHECK-NEXT: ret void +; + cmpxchg ptr %x, i32 0, i32 1 monotonic monotonic + ret void +} + +define void @cmpxchg_acq_rel_arg(ptr %x) { +; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn +; CHECK-LABEL: define void @cmpxchg_acq_rel_arg( +; CHECK-SAME: ptr captures(none) [[X:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[X]], i32 0, i32 1 acq_rel monotonic, align 4 +; CHECK-NEXT: ret void +; + cmpxchg ptr %x, i32 0, i32 1 acq_rel monotonic + ret void +} + +define void @cmpxchg_monotonic_volatile_arg(ptr %x) { +; CHECK: Function Attrs: norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) +; CHECK-LABEL: define void @cmpxchg_monotonic_volatile_arg( +; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[TMP1:%.*]] = cmpxchg volatile ptr [[X]], i32 0, i32 1 monotonic monotonic, align 4 +; CHECK-NEXT: ret void +; + cmpxchg volatile ptr %x, i32 0, i32 1 monotonic monotonic + ret void +} diff --git a/llvm/test/Transforms/FunctionAttrs/nocapture.ll b/llvm/test/Transforms/FunctionAttrs/nocapture.ll index 7e9a93cb186ed..aab801a9a7bf7 100644 --- a/llvm/test/Transforms/FunctionAttrs/nocapture.ll +++ b/llvm/test/Transforms/FunctionAttrs/nocapture.ll @@ -646,7 +646,7 @@ define void @test6_2(ptr %x6_2, ptr %y6_2, ptr %z6_2) { } define void @test_cmpxchg(ptr %p) { -; FNATTRS: Function Attrs: mustprogress norecurse nounwind willreturn memory(argmem: readwrite) +; FNATTRS: Function Attrs: mustprogress norecurse nounwind willreturn ; FNATTRS-LABEL: define void @test_cmpxchg ; FNATTRS-SAME: (ptr captures(none) [[P:%.*]]) #[[ATTR13:[0-9]+]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], i32 0, i32 1 acquire monotonic, align 4 @@ -663,7 +663,7 @@ define void @test_cmpxchg(ptr %p) { } define void @test_cmpxchg_ptr(ptr %p, ptr %q) { -; FNATTRS: Function Attrs: mustprogress norecurse nounwind willreturn memory(argmem: readwrite) +; FNATTRS: Function Attrs: mustprogress norecurse nounwind willreturn ; FNATTRS-LABEL: define void @test_cmpxchg_ptr ; FNATTRS-SAME: (ptr captures(none) [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR13]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], ptr null, ptr [[Q]] acquire monotonic, align 8 @@ -680,7 +680,7 @@ define void @test_cmpxchg_ptr(ptr %p, ptr %q) { } define void @test_atomicrmw(ptr %p) { -; FNATTRS: Function Attrs: mustprogress norecurse nounwind willreturn memory(argmem: readwrite) +; FNATTRS: Function Attrs: mustprogress norecurse nounwind willreturn ; FNATTRS-LABEL: define void @test_atomicrmw ; FNATTRS-SAME: (ptr captures(none) [[P:%.*]]) #[[ATTR13]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[P]], i32 1 seq_cst, align 4 diff --git a/llvm/test/Transforms/FunctionAttrs/nosync.ll b/llvm/test/Transforms/FunctionAttrs/nosync.ll index c00eac2fc04b1..cb204a5a0fd2d 100644 --- a/llvm/test/Transforms/FunctionAttrs/nosync.ll +++ b/llvm/test/Transforms/FunctionAttrs/nosync.ll @@ -48,7 +48,7 @@ define i32 @test4(i32 %a, i32 %b) { ; negative case - explicit sync define void @test5(ptr %p) { -; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn memory(argmem: readwrite) +; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn ; CHECK-LABEL: @test5( ; CHECK-NEXT: store atomic i8 0, ptr [[P:%.*]] seq_cst, align 1 ; CHECK-NEXT: ret void @@ -59,7 +59,7 @@ define void @test5(ptr %p) { ; negative case - explicit sync define i8 @test6(ptr %p) { -; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn memory(argmem: readwrite) +; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn ; CHECK-LABEL: @test6( ; CHECK-NEXT: [[V:%.*]] = load atomic i8, ptr [[P:%.*]] seq_cst, align 1 ; CHECK-NEXT: ret i8 [[V]] @@ -70,7 +70,7 @@ define i8 @test6(ptr %p) { ; negative case - explicit sync define void @test7(ptr %p) { -; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn memory(argmem: readwrite) +; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn ; CHECK-LABEL: @test7( ; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[P:%.*]], i8 0 seq_cst, align 1 ; CHECK-NEXT: ret void @@ -126,7 +126,7 @@ define void @store_monotonic(ptr nocapture %0) norecurse nounwind uwtable { ; negative, should not deduce nosync ; atomic load with acquire ordering. define i32 @load_acquire(ptr nocapture readonly %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn memory(argmem: readwrite) uwtable +; CHECK: Function Attrs: mustprogress norecurse nounwind willreturn uwtable ; CHECK-LABEL: @load_acquire( ; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr [[TMP0:%.*]] acquire, align 4 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -160,7 +160,7 @@ define void @store_unordered(ptr nocapture %0) norecurse nounwind uwtable { ; negative, should not deduce nosync ; atomic load with release ordering define void @load_release(ptr nocapture %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) uwtable +; CHECK: Function Attrs: norecurse nounwind uwtable ; CHECK-LABEL: @load_release( ; CHECK-NEXT: store atomic volatile i32 10, ptr [[TMP0:%.*]] release, align 4 ; CHECK-NEXT: ret void @@ -171,7 +171,7 @@ define void @load_release(ptr nocapture %0) norecurse nounwind uwtable { ; negative volatile, relaxed atomic define void @load_volatile_release(ptr nocapture %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) uwtable +; CHECK: Function Attrs: norecurse nounwind uwtable ; CHECK-LABEL: @load_volatile_release( ; CHECK-NEXT: store atomic volatile i32 10, ptr [[TMP0:%.*]] release, align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/FunctionAttrs/writeonly.ll b/llvm/test/Transforms/FunctionAttrs/writeonly.ll index 325bc53d2615b..5342ad6f7000d 100644 --- a/llvm/test/Transforms/FunctionAttrs/writeonly.ll +++ b/llvm/test/Transforms/FunctionAttrs/writeonly.ll @@ -162,7 +162,7 @@ define void @test_volatile(ptr %p) { } define void @test_atomicrmw(ptr %p) { -; FNATTRS: Function Attrs: mustprogress norecurse nounwind willreturn memory(argmem: readwrite) +; FNATTRS: Function Attrs: mustprogress norecurse nounwind willreturn ; FNATTRS-LABEL: define {{[^@]+}}@test_atomicrmw ; FNATTRS-SAME: (ptr captures(none) [[P:%.*]]) #[[ATTR7:[0-9]+]] { ; FNATTRS-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[P]], i8 0 seq_cst, align 1 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
