yaxunl updated this revision to Diff 265479.
yaxunl marked an inline comment as done.
yaxunl edited the summary of this revision.
yaxunl added a comment.
Herald added subscribers: kerbowa, nhaehnle, jvesely.
Added TargetInfo::isFPAtomicFetchAddSubSupported to guard fp atomic.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D71726/new/
https://reviews.llvm.org/D71726
Files:
clang/include/clang/Basic/DiagnosticSemaKinds.td
clang/include/clang/Basic/TargetInfo.h
clang/lib/Basic/Targets/AMDGPU.h
clang/lib/CodeGen/CGAtomic.cpp
clang/lib/Sema/SemaChecking.cpp
clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
clang/test/CodeGenOpenCL/atomic-ops.cl
clang/test/SemaOpenCL/atomic-ops.cl
Index: clang/test/SemaOpenCL/atomic-ops.cl
===================================================================
--- clang/test/SemaOpenCL/atomic-ops.cl
+++ clang/test/SemaOpenCL/atomic-ops.cl
@@ -1,5 +1,7 @@
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -fsyntax-only -triple=spir64
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -fsyntax-only -triple=amdgcn-amdhsa-amd-opencl
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -verify=expected,spir \
+// RUN: -fsyntax-only -triple=spir64
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -fsyntax-only \
+// RUN: -triple=amdgcn-amd-amdhsa
// Basic parsing/Sema tests for __opencl_atomic_*
@@ -36,7 +38,7 @@
atomic_int gn;
void f(atomic_int *i, const atomic_int *ci,
- atomic_intptr_t *p, atomic_float *d,
+ atomic_intptr_t *p, atomic_float *d, atomic_double *d2,
int *I, const int *CI,
intptr_t *P, float *D, struct S *s1, struct S *s2,
global atomic_int *i_g, local atomic_int *i_l, private atomic_int *i_p,
@@ -70,7 +72,8 @@
__opencl_atomic_fetch_add(i, 1, memory_order_seq_cst, memory_scope_work_group);
__opencl_atomic_fetch_add(p, 1, memory_order_seq_cst, memory_scope_work_group);
- __opencl_atomic_fetch_add(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to atomic integer or pointer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}}
+ __opencl_atomic_fetch_add(d, 1, memory_order_seq_cst, memory_scope_work_group); // spir-error {{address argument to atomic operation must be a pointer to atomic integer or pointer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}}
+ __opencl_atomic_fetch_add(d2, 1, memory_order_seq_cst, memory_scope_work_group); // spir-error {{address argument to atomic operation must be a pointer to atomic integer or pointer ('__generic atomic_double *' (aka '__generic _Atomic(double) *') invalid)}}
__opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group);
__opencl_atomic_fetch_and(p, 1, memory_order_seq_cst, memory_scope_work_group);
__opencl_atomic_fetch_and(d, 1, memory_order_seq_cst, memory_scope_work_group); // expected-error {{address argument to atomic operation must be a pointer to atomic integer ('__generic atomic_float *' (aka '__generic _Atomic(float) *') invalid)}}
Index: clang/test/CodeGenOpenCL/atomic-ops.cl
===================================================================
--- clang/test/CodeGenOpenCL/atomic-ops.cl
+++ clang/test/CodeGenOpenCL/atomic-ops.cl
@@ -1,12 +1,17 @@
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-amdgizcl | opt -instnamer -S | FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa \
+// RUN: | opt -instnamer -S | FileCheck %s
// Also test serialization of atomic operations here, to avoid duplicating the test.
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-amdgizcl
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | opt -instnamer -S | FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa \
+// RUN: -emit-llvm -o - | opt -instnamer -S | FileCheck %s
#ifndef ALREADY_INCLUDED
#define ALREADY_INCLUDED
+#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
+#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable
+
typedef __INTPTR_TYPE__ intptr_t;
typedef int int8 __attribute__((ext_vector_type(8)));
@@ -185,6 +190,18 @@
return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group);
}
+float ff4(global atomic_float *d, float a) {
+ // CHECK-LABEL: @ff4
+ // CHECK: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic
+ return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
+}
+
+float ff5(global atomic_double *d, double a) {
+ // CHECK-LABEL: @ff5
+ // CHECK: atomicrmw fadd double addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic
+ return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
+}
+
// CHECK-LABEL: @atomic_init_foo
void atomic_init_foo()
{
Index: clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
+// RUN: -fcuda-is-device -target-cpu gfx906 | FileCheck %s
+// REQUIRES: amdgpu-registered-target
+
+#include "Inputs/cuda.h"
+#include <stdatomic.h>
+
+__device__ float ffp1(float *p) {
+ // CHECK-LABEL: @_Z4ffp1Pf
+ // CHECK: atomicrmw fadd float* {{.*}} monotonic
+ return __atomic_fetch_add(p, 1.0f, memory_order_relaxed);
+}
+
+__device__ double ffp2(double *p) {
+ // CHECK-LABEL: @_Z4ffp2Pd
+ // CHECK: atomicrmw fsub double* {{.*}} monotonic
+ return __atomic_fetch_sub(p, 1.0, memory_order_relaxed);
+}
+
+// long double is the same as double for amdgcn.
+__device__ long double ffp3(long double *p) {
+ // CHECK-LABEL: @_Z4ffp3Pe
+ // CHECK: atomicrmw fsub double* {{.*}} monotonic
+ return __atomic_fetch_sub(p, 1.0, memory_order_relaxed);
+}
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -4366,11 +4366,18 @@
// For an arithmetic operation, the implied arithmetic must be well-formed.
if (Form == Arithmetic) {
// gcc does not enforce these rules for GNU atomics, but we do so for sanity.
- if (IsAddSub && !ValType->isIntegerType()
- && !ValType->isPointerType()) {
- Diag(ExprRange.getBegin(), diag::err_atomic_op_needs_atomic_int_or_ptr)
- << IsC11 << Ptr->getType() << Ptr->getSourceRange();
- return ExprError();
+ if (IsAddSub && !ValType->isIntegerType() && !ValType->isPointerType()) {
+ if (!Context.getTargetInfo().isFPAtomicFetchAddSubSupported()) {
+ Diag(ExprRange.getBegin(), diag::err_atomic_op_needs_atomic_int_or_ptr)
+ << IsC11 << Ptr->getType() << Ptr->getSourceRange();
+ return ExprError();
+ }
+ if (!ValType->isFloatingType()) {
+ Diag(ExprRange.getBegin(),
+ diag::err_atomic_op_needs_atomic_int_ptr_or_fp)
+ << IsC11 << Ptr->getType() << Ptr->getSourceRange();
+ return ExprError();
+ }
}
if (!IsAddSub && !ValType->isIntegerType()) {
Diag(ExprRange.getBegin(), diag::err_atomic_op_needs_atomic_int)
@@ -4495,7 +4502,8 @@
// passed by address. For the rest, GNU uses by-address and C11 uses
// by-value.
assert(Form != Load);
- if (Form == Init || (Form == Arithmetic && ValType->isIntegerType()))
+ if (Form == Init || (Form == Arithmetic && ValType->isIntegerType()) ||
+ (IsAddSub && ValType->isFloatingType()))
Ty = ValType;
else if (Form == Copy || Form == Xchg) {
if (IsPassedByAddress) {
Index: clang/lib/CodeGen/CGAtomic.cpp
===================================================================
--- clang/lib/CodeGen/CGAtomic.cpp
+++ clang/lib/CodeGen/CGAtomic.cpp
@@ -594,21 +594,25 @@
break;
case AtomicExpr::AO__atomic_add_fetch:
- PostOp = llvm::Instruction::Add;
+ PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FAdd
+ : llvm::Instruction::Add;
LLVM_FALLTHROUGH;
case AtomicExpr::AO__c11_atomic_fetch_add:
case AtomicExpr::AO__opencl_atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_add:
- Op = llvm::AtomicRMWInst::Add;
+ Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FAdd
+ : llvm::AtomicRMWInst::Add;
break;
case AtomicExpr::AO__atomic_sub_fetch:
- PostOp = llvm::Instruction::Sub;
+ PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FSub
+ : llvm::Instruction::Sub;
LLVM_FALLTHROUGH;
case AtomicExpr::AO__c11_atomic_fetch_sub:
case AtomicExpr::AO__opencl_atomic_fetch_sub:
case AtomicExpr::AO__atomic_fetch_sub:
- Op = llvm::AtomicRMWInst::Sub;
+ Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FSub
+ : llvm::AtomicRMWInst::Sub;
break;
case AtomicExpr::AO__atomic_min_fetch:
@@ -806,6 +810,7 @@
bool Oversized = getContext().toBits(sizeChars) > MaxInlineWidthInBits;
bool Misaligned = (Ptr.getAlignment() % sizeChars) != 0;
bool UseLibcall = Misaligned | Oversized;
+ bool ShouldCastToIntPtrTy = true;
if (UseLibcall) {
CGM.getDiags().Report(E->getBeginLoc(), diag::warn_atomic_op_misaligned)
@@ -875,11 +880,16 @@
EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Temp, Val1Ty));
break;
}
- LLVM_FALLTHROUGH;
+ LLVM_FALLTHROUGH;
case AtomicExpr::AO__atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_sub:
case AtomicExpr::AO__atomic_add_fetch:
case AtomicExpr::AO__atomic_sub_fetch:
+ if (MemTy->isFloatingType()) {
+ ShouldCastToIntPtrTy = false;
+ }
+ LLVM_FALLTHROUGH;
+
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__c11_atomic_exchange:
case AtomicExpr::AO__opencl_atomic_store:
@@ -920,15 +930,23 @@
LValue AtomicVal = MakeAddrLValue(Ptr, AtomicTy);
AtomicInfo Atomics(*this, AtomicVal);
- Ptr = Atomics.emitCastToAtomicIntPointer(Ptr);
- if (Val1.isValid()) Val1 = Atomics.convertToAtomicIntPointer(Val1);
- if (Val2.isValid()) Val2 = Atomics.convertToAtomicIntPointer(Val2);
- if (Dest.isValid())
- Dest = Atomics.emitCastToAtomicIntPointer(Dest);
- else if (E->isCmpXChg())
+ if (ShouldCastToIntPtrTy) {
+ Ptr = Atomics.emitCastToAtomicIntPointer(Ptr);
+ if (Val1.isValid())
+ Val1 = Atomics.convertToAtomicIntPointer(Val1);
+ if (Val2.isValid())
+ Val2 = Atomics.convertToAtomicIntPointer(Val2);
+ }
+ if (Dest.isValid()) {
+ if (ShouldCastToIntPtrTy)
+ Dest = Atomics.emitCastToAtomicIntPointer(Dest);
+ } else if (E->isCmpXChg())
Dest = CreateMemTemp(RValTy, "cmpxchg.bool");
- else if (!RValTy->isVoidType())
- Dest = Atomics.emitCastToAtomicIntPointer(Atomics.CreateTempAlloca());
+ else if (!RValTy->isVoidType()) {
+ Dest = Atomics.CreateTempAlloca();
+ if (ShouldCastToIntPtrTy)
+ Dest = Atomics.emitCastToAtomicIntPointer(Dest);
+ }
// Use a library call. See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary .
if (UseLibcall) {
Index: clang/lib/Basic/Targets/AMDGPU.h
===================================================================
--- clang/lib/Basic/Targets/AMDGPU.h
+++ clang/lib/Basic/Targets/AMDGPU.h
@@ -354,6 +354,8 @@
}
void setAuxTarget(const TargetInfo *Aux) override;
+
+ bool isFPAtomicFetchAddSubSupported() const override { return true; }
};
} // namespace targets
Index: clang/include/clang/Basic/TargetInfo.h
===================================================================
--- clang/include/clang/Basic/TargetInfo.h
+++ clang/include/clang/Basic/TargetInfo.h
@@ -1414,6 +1414,9 @@
/// Whether target allows debuginfo types for decl only variables.
virtual bool allowDebugInfoForExternalVar() const { return false; }
+ /// Whether floating point atomic fetch add/sub is supported.
+ virtual bool isFPAtomicFetchAddSubSupported() const { return false; }
+
protected:
/// Copy type and layout related info.
void copyAuxTarget(const TargetInfo *Aux);
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7880,6 +7880,9 @@
def err_atomic_op_needs_trivial_copy : Error<
"address argument to atomic operation must be a pointer to a "
"trivially-copyable type (%0 invalid)">;
+def err_atomic_op_needs_atomic_int_ptr_or_fp : Error<
+ "address argument to atomic operation must be a pointer to %select{|atomic }0"
+ "integer, pointer or floating point type (%1 invalid)">;
def err_atomic_op_needs_atomic_int_or_ptr : Error<
"address argument to atomic operation must be a pointer to %select{|atomic }0"
"integer or pointer (%1 invalid)">;
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits