[PATCH] D89980: [hip] Remove the coercion on aggregate kernel arguments.
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. hliao marked an inline comment as done. Closed by commit rG8920ef06a138: [hip] Remove the coercion on aggregate kernel arguments. (authored by hliao). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D89980/new/ https://reviews.llvm.org/D89980 Files: clang/lib/CodeGen/TargetInfo.cpp clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu clang/test/CodeGenCUDA/kernel-args.cu Index: clang/test/CodeGenCUDA/kernel-args.cu === --- clang/test/CodeGenCUDA/kernel-args.cu +++ clang/test/CodeGenCUDA/kernel-args.cu @@ -1,22 +1,23 @@ -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device \ // RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s -// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda- -fcuda-is-device \ +// RUN: %clang_cc1 -x cuda -triple nvptx64-nvidia-cuda- -fcuda-is-device \ // RUN: -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s #include "Inputs/cuda.h" struct A { int a[32]; + float *p; }; -// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}}) -// NVPTX: define void @_Z6kernel1A(%struct.A* byval(%struct.A) align 4 %x) +// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}}) +// NVPTX: define void @_Z6kernel1A(%struct.A* byval(%struct.A) align 8 %x) __global__ void kernel(A x) { } class Kernel { public: - // AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}}) - // NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval(%struct.A) align 4 %x) + // AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}}) + // NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval(%struct.A) align 8 %x) static __global__ void memberKernel(A x){} template static __global__ void templateMemberKernel(T x) {} }; @@ -29,11 +30,11 @@ void test() { Kernel K; - // AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}} - // NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval(%struct.A) align 4 %x) + // AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}} + // NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval(%struct.A) align 8 %x) launch((void*)templateKernel); - // AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}} - // NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval(%struct.A) align 4 %x) + // AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}} + // NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval(%struct.A) align 8 %x) launch((void*)Kernel::templateMemberKernel); } Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu === --- clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -9,8 +9,6 @@ // Coerced struct from `struct S` without all generic pointers lowered into // global ones. -// COMMON: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* } -// COMMON: %struct.T.coerce = type { [2 x float addrspace(1)*] } // On the host-side compilation, generic pointer won't be coerced. // HOST-NOT: %struct.S.coerce @@ -61,15 +59,17 @@ // `by-val` struct will be coerced into a similar struct with all generic // pointers lowerd into global ones. // HOST: define void @_Z22__device_stub__kernel41S(i32* %s.coerce0, float* %s.coerce1) -// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce) -// OPT: [[P0:%.*]] = extractvalue %struct.S.coerce %s.coerce, 0 -// OPT: [[P1:%.*]] = extractvalue %struct.S.coerce %s.coerce, 1 -// OPT: [[V0:%.*]] = load i32, i32 addrspace(1)* [[P0]], align 4 +// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel41S(%struct.S addrspace(4)*{{.*}} byref(%struct.S) align 8 %0) +// OPT: [[R0:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 0 +// OPT: [[P0:%.*]] = load i32*, i32* addrspace(4)* [[R0]], align 8 +// OPT: [[R1:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 1 +// OPT: [[P1:%.*]] = load float*, float* addrspace(4)* [[R1]], align 8 +// OPT: [[V0:%.*]] = load i32, i32* [[P0]], align 4 // OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1 -// OPT: store i32
[PATCH] D89980: [hip] Remove the coercion on aggregate kernel arguments.
hliao marked an inline comment as done. hliao added inline comments. Comment at: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu:55 struct S { int *x; arsenm wrote: > Should also have a case with a single element struct, which will be treated > like a scalar in test kernel8 Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D89980/new/ https://reviews.llvm.org/D89980 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D89980: [hip] Remove the coercion on aggregate kernel arguments.
hliao updated this revision to Diff 304969. hliao added a comment. Add a test case for the single element struct. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D89980/new/ https://reviews.llvm.org/D89980 Files: clang/lib/CodeGen/TargetInfo.cpp clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu clang/test/CodeGenCUDA/kernel-args.cu Index: clang/test/CodeGenCUDA/kernel-args.cu === --- clang/test/CodeGenCUDA/kernel-args.cu +++ clang/test/CodeGenCUDA/kernel-args.cu @@ -1,22 +1,23 @@ -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device \ // RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s -// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda- -fcuda-is-device \ +// RUN: %clang_cc1 -x cuda -triple nvptx64-nvidia-cuda- -fcuda-is-device \ // RUN: -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s #include "Inputs/cuda.h" struct A { int a[32]; + float *p; }; -// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}}) -// NVPTX: define void @_Z6kernel1A(%struct.A* byval(%struct.A) align 4 %x) +// AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}}) +// NVPTX: define void @_Z6kernel1A(%struct.A* byval(%struct.A) align 8 %x) __global__ void kernel(A x) { } class Kernel { public: - // AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}}) - // NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval(%struct.A) align 4 %x) + // AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}}) + // NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval(%struct.A) align 8 %x) static __global__ void memberKernel(A x){} template static __global__ void templateMemberKernel(T x) {} }; @@ -29,11 +30,11 @@ void test() { Kernel K; - // AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}} - // NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval(%struct.A) align 4 %x) + // AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}} + // NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval(%struct.A) align 8 %x) launch((void*)templateKernel); - // AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A addrspace(4)* byref(%struct.A) align 4 %{{.+}} - // NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval(%struct.A) align 4 %x) + // AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}} + // NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval(%struct.A) align 8 %x) launch((void*)Kernel::templateMemberKernel); } Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu === --- clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -9,8 +9,6 @@ // Coerced struct from `struct S` without all generic pointers lowered into // global ones. -// COMMON: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* } -// COMMON: %struct.T.coerce = type { [2 x float addrspace(1)*] } // On the host-side compilation, generic pointer won't be coerced. // HOST-NOT: %struct.S.coerce @@ -61,15 +59,17 @@ // `by-val` struct will be coerced into a similar struct with all generic // pointers lowerd into global ones. // HOST: define void @_Z22__device_stub__kernel41S(i32* %s.coerce0, float* %s.coerce1) -// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce) -// OPT: [[P0:%.*]] = extractvalue %struct.S.coerce %s.coerce, 0 -// OPT: [[P1:%.*]] = extractvalue %struct.S.coerce %s.coerce, 1 -// OPT: [[V0:%.*]] = load i32, i32 addrspace(1)* [[P0]], align 4 +// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel41S(%struct.S addrspace(4)*{{.*}} byref(%struct.S) align 8 %0) +// OPT: [[R0:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 0 +// OPT: [[P0:%.*]] = load i32*, i32* addrspace(4)* [[R0]], align 8 +// OPT: [[R1:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 1 +// OPT: [[P1:%.*]] = load float*, float* addrspace(4)* [[R1]], align 8 +// OPT: [[V0:%.*]] = load i32, i32* [[P0]], align 4 // OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1 -// OPT: store i32 [[INC]], i32 addrspace(1)* [[P0]], align 4 -// OPT: [[V1:%.*]] = load float, float addrspace(1)* [[P1]], align 4 +// OPT: store i32 [[INC]], i32* [[P0]], align 4 +//
[PATCH] D89980: [hip] Remove the coercion on aggregate kernel arguments.
arsenm added inline comments. Comment at: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu:55 struct S { int *x; Should also have a case with a single element struct, which will be treated like a scalar Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D89980/new/ https://reviews.llvm.org/D89980 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D89980: [hip] Remove the coercion on aggregate kernel arguments.
hliao added a comment. PING for review Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D89980/new/ https://reviews.llvm.org/D89980 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits