https://github.com/macurtis-amd created https://github.com/llvm/llvm-project/pull/154380
Fixes a bug on AMDGPU targets where a pointer was stored as address space 5, but then loaded as address space 0. Issue found as part of [Kokkos](https://github.com/kokkos/kokkos) testing, specifically `hip.atomics` (see [core/unit_test/TestAtomics.hpp](https://github.com/kokkos/kokkos/blob/develop/core/unit_test/TestAtomics.hpp)). Issue was introduced by commit [39ec9de7c230](https://github.com/llvm/llvm-project/commit/39ec9de7c230) - [clang][CodeGen] sret args should always point to the alloca AS, so use that (https://github.com/llvm/llvm-project/pull/114062). >From 1af9c61c2ea4797145fc3e41a4fdebe6548a7dd9 Mon Sep 17 00:00:00 2001 From: Matthew Curtis <macur...@amd.com> Date: Mon, 18 Aug 2025 06:26:49 -0500 Subject: [PATCH] [clang][CodeGen] add addr space cast if needed when storing ptrs --- clang/lib/CodeGen/CGExpr.cpp | 12 ++++++ clang/test/CodeGenCXX/amdgcn-func-arg.cpp | 7 ++-- clang/test/CodeGenHIP/store-addr-space.hip | 47 ++++++++++++++++++++++ 3 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 clang/test/CodeGenHIP/store-addr-space.hip diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index d229d81d6b934..c02e84eb753e9 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2209,6 +2209,18 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, } } + // When storing a pointer, perform address space cast if needed. + if (auto *ValueTy = dyn_cast<llvm::PointerType>(Value->getType())) { + if (auto *MemTy = dyn_cast<llvm::PointerType>(Addr.getElementType())) { + LangAS ValueAS = getLangASFromTargetAS(ValueTy->getAddressSpace()); + LangAS MemAS = getLangASFromTargetAS(MemTy->getAddressSpace()); + if (ValueAS != MemAS) { + Value = + getTargetHooks().performAddrSpaceCast(*this, Value, ValueAS, MemTy); + } + } + } + Value = EmitToMemory(Value, Ty); LValue AtomicLValue = diff --git a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp index a5f83dc91b038..21945bfc36677 100644 --- a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp +++ b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp @@ -24,9 +24,10 @@ void func_with_ref_arg(B &b); // CHECK-NEXT: [[P:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[A_INDIRECT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_INDIRECT_ADDR]] to ptr // CHECK-NEXT: [[P_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P]] to ptr -// CHECK-NEXT: store ptr addrspace(5) [[A:%.*]], ptr [[A_INDIRECT_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr -// CHECK-NEXT: store ptr [[A_ASCAST]], ptr [[P_ASCAST]], align 8 +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A:%.*]] to ptr +// CHECK-NEXT: store ptr [[A_ASCAST]], ptr [[A_INDIRECT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[A_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr +// CHECK-NEXT: store ptr [[A_ASCAST1]], ptr [[P_ASCAST]], align 8 // CHECK-NEXT: ret void // void func_with_indirect_arg(A a) { diff --git a/clang/test/CodeGenHIP/store-addr-space.hip b/clang/test/CodeGenHIP/store-addr-space.hip new file mode 100644 index 0000000000000..46ab1157d0704 --- /dev/null +++ b/clang/test/CodeGenHIP/store-addr-space.hip @@ -0,0 +1,47 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions "bar" --version 5 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -fcuda-is-device \ +// RUN: -o - %s | FileCheck --check-prefix=AMDGCN --enable-var-scope %s + +struct Foo { + unsigned long long val; +// + __attribute__((device)) inline Foo() { val = 0; } + __attribute__((device)) inline Foo(const Foo &src) { val = src.val; } + __attribute__((device)) inline Foo(const volatile Foo &src) { val = src.val; } +}; + +// AMDGCN-LABEL: define dso_local void @_Z3barPK3Foo( +// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// AMDGCN-NEXT: [[SRC_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// AMDGCN-NEXT: [[DST:%.*]] = alloca [[UNION_ANON:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[RESULT_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_PTR]] to ptr +// AMDGCN-NEXT: [[SRC_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_PTR_ADDR]] to ptr +// AMDGCN-NEXT: [[DST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST]] to ptr +// AMDGCN-NEXT: store ptr addrspace(5) [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 4 +// AMDGCN-NEXT: store ptr [[SRC_PTR]], ptr [[SRC_PTR_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr +// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT_ASCAST]]) #[[ATTR1:[0-9]+]] +// AMDGCN-NEXT: [[AGG_RESULT_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr +// AMDGCN-NEXT: store ptr [[AGG_RESULT_ASCAST1]], ptr [[DST_ASCAST]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC_PTR_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL]], align 8 +// AMDGCN-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DST_ASCAST]], align 8 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 0 +// AMDGCN-NEXT: store i64 [[TMP1]], ptr [[ARRAYIDX]], align 8 +// AMDGCN-NEXT: ret void +// +__attribute__((device)) Foo bar(const Foo *const src_ptr) { + Foo result; + + union { + Foo* const ptr; + unsigned long long * const ptr64; + } dst = {&result}; + + dst.ptr64[0] = src_ptr->val; + return result; +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits