https://github.com/macurtis-amd created 
https://github.com/llvm/llvm-project/pull/154380

Fixes a bug on AMDGPU targets where a pointer was stored as address space 5, 
but then loaded as address space 0.

Issue found as part of [Kokkos](https://github.com/kokkos/kokkos) testing, 
specifically `hip.atomics` (see 
[core/unit_test/TestAtomics.hpp](https://github.com/kokkos/kokkos/blob/develop/core/unit_test/TestAtomics.hpp)).

Issue was introduced by commit 
[39ec9de7c230](https://github.com/llvm/llvm-project/commit/39ec9de7c230) - 
[clang][CodeGen] sret args should always point to the alloca AS, so use that 
(https://github.com/llvm/llvm-project/pull/114062).

>From 1af9c61c2ea4797145fc3e41a4fdebe6548a7dd9 Mon Sep 17 00:00:00 2001
From: Matthew Curtis <macur...@amd.com>
Date: Mon, 18 Aug 2025 06:26:49 -0500
Subject: [PATCH] [clang][CodeGen] add addr space cast if needed when storing
 ptrs

---
 clang/lib/CodeGen/CGExpr.cpp               | 12 ++++++
 clang/test/CodeGenCXX/amdgcn-func-arg.cpp  |  7 ++--
 clang/test/CodeGenHIP/store-addr-space.hip | 47 ++++++++++++++++++++++
 3 files changed, 63 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/CodeGenHIP/store-addr-space.hip

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index d229d81d6b934..c02e84eb753e9 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2209,6 +2209,18 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value 
*Value, Address Addr,
     }
   }
 
+  // When storing a pointer, perform address space cast if needed.
+  if (auto *ValueTy = dyn_cast<llvm::PointerType>(Value->getType())) {
+    if (auto *MemTy = dyn_cast<llvm::PointerType>(Addr.getElementType())) {
+      LangAS ValueAS = getLangASFromTargetAS(ValueTy->getAddressSpace());
+      LangAS MemAS = getLangASFromTargetAS(MemTy->getAddressSpace());
+      if (ValueAS != MemAS) {
+        Value =
+            getTargetHooks().performAddrSpaceCast(*this, Value, ValueAS, 
MemTy);
+      }
+    }
+  }
+
   Value = EmitToMemory(Value, Ty);
 
   LValue AtomicLValue =
diff --git a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp 
b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
index a5f83dc91b038..21945bfc36677 100644
--- a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
+++ b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
@@ -24,9 +24,10 @@ void func_with_ref_arg(B &b);
 // CHECK-NEXT:    [[P:%.*]] = alloca ptr, align 8, addrspace(5)
 // CHECK-NEXT:    [[A_INDIRECT_ADDR_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[A_INDIRECT_ADDR]] to ptr
 // CHECK-NEXT:    [[P_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P]] to 
ptr
-// CHECK-NEXT:    store ptr addrspace(5) [[A:%.*]], ptr 
[[A_INDIRECT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to 
ptr
-// CHECK-NEXT:    store ptr [[A_ASCAST]], ptr [[P_ASCAST]], align 8
+// CHECK-NEXT:    [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A:%.*]] 
to ptr
+// CHECK-NEXT:    store ptr [[A_ASCAST]], ptr [[A_INDIRECT_ADDR_ASCAST]], 
align 8
+// CHECK-NEXT:    [[A_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[A]] to 
ptr
+// CHECK-NEXT:    store ptr [[A_ASCAST1]], ptr [[P_ASCAST]], align 8
 // CHECK-NEXT:    ret void
 //
 void func_with_indirect_arg(A a) {
diff --git a/clang/test/CodeGenHIP/store-addr-space.hip 
b/clang/test/CodeGenHIP/store-addr-space.hip
new file mode 100644
index 0000000000000..46ab1157d0704
--- /dev/null
+++ b/clang/test/CodeGenHIP/store-addr-space.hip
@@ -0,0 +1,47 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --functions "bar" --version 5
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm 
-fcuda-is-device \
+// RUN:   -o - %s | FileCheck --check-prefix=AMDGCN --enable-var-scope %s
+
+struct Foo {
+  unsigned long long val;
+//
+  __attribute__((device)) inline Foo() { val = 0; }
+  __attribute__((device)) inline Foo(const Foo &src) { val = src.val; }
+  __attribute__((device)) inline Foo(const volatile Foo &src) { val = src.val; 
}
+};
+
+// AMDGCN-LABEL: define dso_local void @_Z3barPK3Foo(
+// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable 
sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef 
[[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGCN-NEXT:  [[ENTRY:.*:]]
+// AMDGCN-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr addrspace(5), align 4, 
addrspace(5)
+// AMDGCN-NEXT:    [[SRC_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN-NEXT:    [[DST:%.*]] = alloca [[UNION_ANON:%.*]], align 8, 
addrspace(5)
+// AMDGCN-NEXT:    [[RESULT_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RESULT_PTR]] to ptr
+// AMDGCN-NEXT:    [[SRC_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[SRC_PTR_ADDR]] to ptr
+// AMDGCN-NEXT:    [[DST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST]] 
to ptr
+// AMDGCN-NEXT:    store ptr addrspace(5) [[AGG_RESULT]], ptr 
[[RESULT_PTR_ASCAST]], align 4
+// AMDGCN-NEXT:    store ptr [[SRC_PTR]], ptr [[SRC_PTR_ADDR_ASCAST]], align 8
+// AMDGCN-NEXT:    [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[AGG_RESULT]] to ptr
+// AMDGCN-NEXT:    call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 
dereferenceable(8) [[AGG_RESULT_ASCAST]]) #[[ATTR1:[0-9]+]]
+// AMDGCN-NEXT:    [[AGG_RESULT_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) 
[[AGG_RESULT]] to ptr
+// AMDGCN-NEXT:    store ptr [[AGG_RESULT_ASCAST1]], ptr [[DST_ASCAST]], align 
8
+// AMDGCN-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[SRC_PTR_ADDR_ASCAST]], align 
8
+// AMDGCN-NEXT:    [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], 
ptr [[TMP0]], i32 0, i32 0
+// AMDGCN-NEXT:    [[TMP1:%.*]] = load i64, ptr [[VAL]], align 8
+// AMDGCN-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DST_ASCAST]], align 8
+// AMDGCN-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr 
[[TMP2]], i64 0
+// AMDGCN-NEXT:    store i64 [[TMP1]], ptr [[ARRAYIDX]], align 8
+// AMDGCN-NEXT:    ret void
+//
+__attribute__((device)) Foo bar(const Foo *const src_ptr) {
+  Foo result;
+
+  union {
+    Foo* const ptr;
+    unsigned long long * const ptr64;
+  } dst = {&result};
+
+  dst.ptr64[0] = src_ptr->val;
+  return result;
+}

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to