On Wed, 2017-09-13 at 18:56 -0700, Matt Arsenault wrote:
> ---
> tests/cl/program/execute/call-clobbers-amdgcn.cl | 68 +++
> tests/cl/program/execute/calls-struct.cl | 177 +++
> tests/cl/program/execute/calls-workitem-id.cl| 75 +++
> tests/cl/program/execute/calls.cl| 605
> +++
> tests/cl/program/execute/tail-calls.cl | 305
This would be a lot easier to review if it were 1 test per patch, I
could also push some of them right away.
can you split it?
> 5 files changed, 1230 insertions(+)
> create mode 100644 tests/cl/program/execute/call-clobbers-amdgcn.cl
> create mode 100644 tests/cl/program/execute/calls-struct.cl
> create mode 100644 tests/cl/program/execute/calls-workitem-id.cl
> create mode 100644 tests/cl/program/execute/calls.cl
> create mode 100644 tests/cl/program/execute/tail-calls.cl
>
> diff --git a/tests/cl/program/execute/call-clobbers-amdgcn.cl
> b/tests/cl/program/execute/call-clobbers-amdgcn.cl
> new file mode 100644
> index 0..66243ddbe
> --- /dev/null
> +++ b/tests/cl/program/execute/call-clobbers-amdgcn.cl
> @@ -0,0 +1,68 @@
> +/*!
> +
> +[config]
> +name: calls
I think the names should be unique, but that might only apply to test
names. Did you see python complains when running these?
> +clc_version_min: 10
> +
> +
> +[test]
> +name: callee saved sgpr
> +kernel_name: call_clobber_s40
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 0xabcd1234
> +
> +[test]
> +name: callee saved vgpr
> +kernel_name: call_clobber_v40
> +dimensions: 1
> +global_size: 1 0 0
> +arg_out: 0 buffer int[1] 0xabcd1234
> +
> +!*/
> +
> +#ifndef __AMDGCN__
> +#error This test is only for amdgcn
> +#endif
This needs "device_regexp" in config section to skip instead of fail on
other platforms/devices.
> +
> +__attribute__((noinline))
> +void clobber_s40()
> +{
> +__asm volatile("s_mov_b32 s40, 0xdead" : : : "s40");
> +}
> +
> +kernel void call_clobber_s40(__global int* ret)
> +{
> +__asm volatile("s_mov_b32 s40, 0xabcd1234" : : : "s40");
> +
> +clobber_s40();
> +
> +int tmp;
> +
> +__asm volatile("v_mov_b32 %0, s40"
> + : "=v"(tmp)
> + :
> + : "s40");
> +*ret = tmp;
> +}
> +
> +__attribute__((noinline))
> +void clobber_v40()
> +{
> +__asm volatile("v_mov_b32 v40, 0xdead" : : : "v40");
> +}
> +
> +kernel void call_clobber_v40(__global int* ret)
> +{
> +__asm volatile("v_mov_b32 v40, 0xabcd1234" : : : "v40");
> +
> +clobber_v40();
> +
> +int tmp;
> +__asm volatile("v_mov_b32 %0, v40"
> + : "=v"(tmp)
> + :
> + : "v40");
> +*ret = tmp;
> +}
> +
> diff --git a/tests/cl/program/execute/calls-struct.cl
> b/tests/cl/program/execute/calls-struct.cl
> new file mode 100644
> index 0..2e8176c8e
> --- /dev/null
> +++ b/tests/cl/program/execute/calls-struct.cl
> @@ -0,0 +1,177 @@
> +/*!
> +
> +[config]
> +name: calls
> +clc_version_min: 10
> +
> +[test]
> +name: byval struct
> +kernel_name: call_i32_func_byval_Char_IntArray
> +dimensions: 1
> +global_size: 16 0 0
> +
> +arg_out: 0 buffer int[16]\
> + 1021 1022 1023 1024 1025 1026 1027 1028 \
> + 1029 1030 1031 1032 1033 1034 1035 1036
> +
> +arg_out: 1 buffer int[16] \
> + 14 14 14 14 \
> + 14 14 14 14 \
> + 14 14 14 14 \
> + 14 14 14 14 \
> +
> +arg_in: 2 buffer int[16] \
> + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
> +
> +
> +[test]
> +name: sret struct
> +kernel_name: call_sret_Char_IntArray_func
> +dimensions: 1
> +global_size: 16 0 0
> +
> +arg_out: 0 buffer int[16]\
> + 921 922 923 924 925 926 927 928 \
> + 929 930 931 932 933 934 935 936
> +
> +arg_in: 1 buffer int[16] \
> + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
> +
> +
> +[test]
> +name: byval struct and sret struct
> +kernel_name: call_sret_Char_IntArray_func_byval_Char_IntArray
> +dimensions: 1
> +global_size: 16 0 0
> +
> +arg_out: 0 buffer int[16]\
> + 86 87 88 89 \
> + 90 91 92 93 \
> + 94 95 96 97 \
> + 98 99 100 101
> +
> +arg_out: 1 buffer int[16]\
> + 134 135 136 137 \
> + 138 139 140 141 \
> + 142 143 144 145 \
> + 146 147 148 149
> +
> +arg_in: 2 buffer int[16] \
> + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
> +
> +!*/
> +
> +typedef struct ByVal_Char_IntArray {
> +char c;
> +int i[4];
> +} ByVal_Char_IntArray;
> +
> +__attribute__((noinline))
afaik, noinline is not defined in CLC, so it should be ifdefed on
__clang__
> +int i32_func_byval_Char_IntArray(ByVal_Char_IntArray st)
> +{
> +st.i[0] += 100;
> +
> +int sum = 0;
> +for (int i = 0; i < 4; ++i)
> +{
> +sum += st.i[i];
> +}
> +
> +sum += st.c;
> +return sum;
> +}
> +
> +kernel void call_i32_func_byval_Char_IntArray(global int* out0,
> + global int* out1,
> + global