[PATCH] D155986: [clang][AMDGPU]: Don't use byval for struct arguments in function ABI

2023-08-11 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield added a comment.

Thanks! Happy to see function calls getting cheaper


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155986/new/

https://reviews.llvm.org/D155986

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D155986: [clang][AMDGPU]: Don't use byval for struct arguments in function ABI

2023-08-11 Thread Changpeng Fang via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGd77c62053c94: [clang][AMDGPU]: Dont use byval for 
struct arguments in function ABI (authored by cfang).
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Changed prior to commit:
  https://reviews.llvm.org/D155986?vs=549545=549546#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155986/new/

https://reviews.llvm.org/D155986

Files:
  clang/docs/ReleaseNotes.rst
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/Targets/AMDGPU.cpp
  clang/test/CodeGenCUDA/kernel-args.cu
  clang/test/CodeGenCXX/amdgcn-func-arg.cpp
  clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
  clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
  clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
  clang/test/CodeGenOpenCL/byval.cl
  llvm/docs/AMDGPUUsage.rst

Index: llvm/docs/AMDGPUUsage.rst
===
--- llvm/docs/AMDGPUUsage.rst
+++ llvm/docs/AMDGPUUsage.rst
@@ -13812,6 +13812,10 @@
 9.  All other registers are unspecified.
 10. Any necessary ``s_waitcnt`` has been performed to ensure memory is available
 to the function.
+11: Use pass-by-reference (byref) in stead of pass-by-value (byval) for struct
+arguments in C ABI. Callee is responsible for allocating stack memory and
+copying the value of the struct if modified. Note that the backend still
+supports byval for struct arguments.
 
 On exit from a function:
 
Index: clang/test/CodeGenOpenCL/byval.cl
===
--- clang/test/CodeGenOpenCL/byval.cl
+++ clang/test/CodeGenOpenCL/byval.cl
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn %s | FileCheck %s
-
+// RUN: %clang_cc1 -emit-llvm -o - -triple i686-pc-darwin %s | FileCheck -check-prefix=X86 %s
+// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn %s | FileCheck -check-prefix=AMDGCN %s
 struct A {
   int x[100];
 };
@@ -8,8 +8,10 @@
 
 int g() {
   struct A a;
-  // CHECK: call i32 @f(ptr addrspace(5) noundef byval{{.*}}%a)
+  // X86:call i32 @f(ptr noundef nonnull byval(%struct.A) align 4 %a)
+  // AMDGCN: call i32 @f(ptr addrspace(5) noundef byref{{.*}}%a)
   return f(a);
 }
 
-// CHECK: declare i32 @f(ptr addrspace(5) noundef byval{{.*}})
+// X86:   declare i32 @f(ptr noundef byval(%struct.A) align 4)
+// AMDGCN: declare i32 @f(ptr addrspace(5) noundef byref{{.*}})
Index: clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
===
--- clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -448,11 +448,11 @@
 // CHECK: define{{.*}} void @func_reg_state_lo(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2)
 void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { }
 
-// CHECK: define{{.*}} void @func_reg_state_hi(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 noundef %arg4, ptr addrspace(5) nocapture noundef readnone byval(%struct.struct_arg) align 4 %s)
+// CHECK: define{{.*}} void @func_reg_state_hi(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 noundef %arg4, ptr addrspace(5) nocapture noundef readnone byref(%struct.struct_arg) align 4 %{{.*}})
 void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { }
 
 // XXX - Why don't the inner structs flatten?
-// CHECK: define{{.*}} void @func_reg_state_num_regs_nested_struct(<4 x i32> noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, ptr addrspace(5) nocapture noundef readnone byval(%struct.num_regs_nested_struct) align 8 %arg4)
+// CHECK: define{{.*}} void @func_reg_state_num_regs_nested_struct(<4 x i32> noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, ptr addrspace(5) nocapture noundef readnone byref(%struct.num_regs_nested_struct) align 8 %{{.*}})
 void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { }
 
 // CHECK: define{{.*}} void @func_double_nested_struct_arg(<4 x i32> noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2)
@@ -467,7 +467,7 @@
 // CHECK: define{{.*}} void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8