https://github.com/bogner created 
https://github.com/llvm/llvm-project/pull/173093

Now that DXILMemIntrinsics can deal with resources, just use memcpy here.

>From c4ce2100af41b6c7bf0f360adbb45cbb7515e906 Mon Sep 17 00:00:00 2001
From: Justin Bogner <[email protected]>
Date: Sun, 9 Nov 2025 17:47:58 -0800
Subject: [PATCH] [HLSL] Use memcpy for HLSL buffer copies that don't have
 padding

Now that DXILMemIntrinsics can deal with resources, just use memcpy
here.
---
 clang/lib/CodeGen/CGHLSLRuntime.cpp         | 11 ++++++-----
 clang/test/CodeGenHLSL/ArrayAssignable.hlsl | 10 ++--------
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp 
b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 2e9602d1b3793..2791ae8a4bb95 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -1532,11 +1532,12 @@ class HLSLBufferCopyEmitter {
   bool emitCopy(QualType CType) {
     LayoutTy = HLSLBufferLayoutBuilder(CGF.CGM).layOutType(CType);
 
-    // TODO: We should be able to fall back to a regular memcpy if the layout
-    // type doesn't have any padding, but that runs into issues in the backend
-    // currently.
-    //
-    // See https://github.com/llvm/wg-hlsl/issues/351
+    // If the layout type matches the original type, we can just fall back to a
+    // regular memcpy.
+    llvm::Type *OrigTy = CGF.CGM.getTypes().ConvertTypeForMem(CType);
+    if (LayoutTy == OrigTy)
+      return false;
+
     emitCopyAtIndices(LayoutTy, llvm::ConstantInt::get(CGF.SizeTy, 0),
                       llvm::ConstantInt::get(CGF.SizeTy, 0));
     return true;
diff --git a/clang/test/CodeGenHLSL/ArrayAssignable.hlsl 
b/clang/test/CodeGenHLSL/ArrayAssignable.hlsl
index d1bfc6db8b504..adb2ca00faac0 100644
--- a/clang/test/CodeGenHLSL/ArrayAssignable.hlsl
+++ b/clang/test/CodeGenHLSL/ArrayAssignable.hlsl
@@ -153,17 +153,11 @@ void arr_assign8() {
   C = c1;
 }
 
-// TODO: We should be able to just memcpy here.
-// See https://github.com/llvm/wg-hlsl/issues/351
+// Since everything is aligned on 16 byte boundaries, we just get memcpy.
 //
 // CHECK-LABEL: define hidden void {{.*}}arr_assign9
 // CHECK: [[C:%.*]] = alloca [2 x <4 x i32>], align 16
-// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[C]], 
i32 0
-// CHECK-NEXT: [[L0:%.*]] = load <4 x i32>, ptr addrspace(2) @c2, align 16
-// CHECK-NEXT: store <4 x i32> [[L0]], ptr [[V0]], align 16
-// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[C]], 
i32 0, i32 1
-// CHECK-NEXT: [[L1:%.*]] = load <4 x i32>, ptr addrspace(2) getelementptr 
inbounds ([2 x <4 x i32>], ptr addrspace(2) @c2, i32 0, i32 1), align 16
-// CHECK-NEXT: store <4 x i32> [[L1]], ptr [[V1]], align 16
+// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 16 [[C]], ptr 
addrspace(2) align 16 @c2, i32 32, i1 false)
 // CHECK-NEXT: ret void
 void arr_assign9() {
   int4 C[2];

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to