On Wed, 2017-09-13 at 18:56 -0700, Matt Arsenault wrote: > --- > tests/cl/program/execute/call-clobbers-amdgcn.cl | 68 +++ > tests/cl/program/execute/calls-struct.cl | 177 +++++++ > tests/cl/program/execute/calls-workitem-id.cl | 75 +++ > tests/cl/program/execute/calls.cl | 605 > +++++++++++++++++++++++ > tests/cl/program/execute/tail-calls.cl | 305 ++++++++++++
This would be a lot easier to review if it were 1 test per patch, I could also push some of them right away. can you split it? > 5 files changed, 1230 insertions(+) > create mode 100644 tests/cl/program/execute/call-clobbers-amdgcn.cl > create mode 100644 tests/cl/program/execute/calls-struct.cl > create mode 100644 tests/cl/program/execute/calls-workitem-id.cl > create mode 100644 tests/cl/program/execute/calls.cl > create mode 100644 tests/cl/program/execute/tail-calls.cl > > diff --git a/tests/cl/program/execute/call-clobbers-amdgcn.cl > b/tests/cl/program/execute/call-clobbers-amdgcn.cl > new file mode 100644 > index 000000000..66243ddbe > --- /dev/null > +++ b/tests/cl/program/execute/call-clobbers-amdgcn.cl > @@ -0,0 +1,68 @@ > +/*! > + > +[config] > +name: calls I think the names should be unique, but that might only apply to test names. Did you see python complains when running these? > +clc_version_min: 10 > + > + > +[test] > +name: callee saved sgpr > +kernel_name: call_clobber_s40 > +dimensions: 1 > +global_size: 1 0 0 > +arg_out: 0 buffer int[1] 0xabcd1234 > + > +[test] > +name: callee saved vgpr > +kernel_name: call_clobber_v40 > +dimensions: 1 > +global_size: 1 0 0 > +arg_out: 0 buffer int[1] 0xabcd1234 > + > +!*/ > + > +#ifndef __AMDGCN__ > +#error This test is only for amdgcn > +#endif This needs "device_regexp" in config section to skip instead of fail on other platforms/devices. > + > +__attribute__((noinline)) > +void clobber_s40() > +{ > + __asm volatile("s_mov_b32 s40, 0xdead" : : : "s40"); > +} > + > +kernel void call_clobber_s40(__global int* ret) > +{ > + __asm volatile("s_mov_b32 s40, 0xabcd1234" : : : "s40"); > + > + clobber_s40(); > + > + int tmp; > + > + __asm volatile("v_mov_b32 %0, s40" > + : "=v"(tmp) > + : > + : "s40"); > + *ret = tmp; > +} > + > +__attribute__((noinline)) > +void clobber_v40() > +{ > + __asm volatile("v_mov_b32 v40, 0xdead" : : : "v40"); > +} > + > +kernel void call_clobber_v40(__global int* ret) > +{ > + __asm volatile("v_mov_b32 v40, 0xabcd1234" : : : "v40"); > + > + clobber_v40(); > + > + int tmp; > + __asm volatile("v_mov_b32 %0, v40" > + : "=v"(tmp) > + : > + : "v40"); > + *ret = tmp; > +} > + > diff --git a/tests/cl/program/execute/calls-struct.cl > b/tests/cl/program/execute/calls-struct.cl > new file mode 100644 > index 000000000..2e8176c8e > --- /dev/null > +++ b/tests/cl/program/execute/calls-struct.cl > @@ -0,0 +1,177 @@ > +/*! > + > +[config] > +name: calls > +clc_version_min: 10 > + > +[test] > +name: byval struct > +kernel_name: call_i32_func_byval_Char_IntArray > +dimensions: 1 > +global_size: 16 0 0 > + > +arg_out: 0 buffer int[16] \ > + 1021 1022 1023 1024 1025 1026 1027 1028 \ > + 1029 1030 1031 1032 1033 1034 1035 1036 > + > +arg_out: 1 buffer int[16] \ > + 14 14 14 14 \ > + 14 14 14 14 \ > + 14 14 14 14 \ > + 14 14 14 14 \ > + > +arg_in: 2 buffer int[16] \ > + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 > + > + > +[test] > +name: sret struct > +kernel_name: call_sret_Char_IntArray_func > +dimensions: 1 > +global_size: 16 0 0 > + > +arg_out: 0 buffer int[16] \ > + 921 922 923 924 925 926 927 928 \ > + 929 930 931 932 933 934 935 936 > + > +arg_in: 1 buffer int[16] \ > + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 > + > + > +[test] > +name: byval struct and sret struct > +kernel_name: call_sret_Char_IntArray_func_byval_Char_IntArray > +dimensions: 1 > +global_size: 16 0 0 > + > +arg_out: 0 buffer int[16] \ > + 86 87 88 89 \ > + 90 91 92 93 \ > + 94 95 96 97 \ > + 98 99 100 101 > + > +arg_out: 1 buffer int[16] \ > + 134 135 136 137 \ > + 138 139 140 141 \ > + 142 143 144 145 \ > + 146 147 148 149 > + > +arg_in: 2 buffer int[16] \ > + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 > + > +!*/ > + > +typedef struct ByVal_Char_IntArray { > + char c; > + int i[4]; > +} ByVal_Char_IntArray; > + > +__attribute__((noinline)) afaik, noinline is not defined in CLC, so it should be ifdefed on __clang__ > +int i32_func_byval_Char_IntArray(ByVal_Char_IntArray st) > +{ > + st.i[0] += 100; > + > + int sum = 0; > + for (int i = 0; i < 4; ++i) > + { > + sum += st.i[i]; > + } > + > + sum += st.c; > + return sum; > +} > + > +kernel void call_i32_func_byval_Char_IntArray(global int* out0, > + global int* out1, > + global int* input) > +{ > + ByVal_Char_IntArray st; > + st.c = 15; > + > + int id = get_global_id(0); > + > + int val = input[id]; > + st.i[0] = 14; > + st.i[1] = -8; > + st.i[2] = val; > + st.i[3] = 900; > + > + int result = i32_func_byval_Char_IntArray(st); > + out0[id] = result; > + out1[id] = st.i[0]; > +} > + > +__attribute__((noinline)) > +ByVal_Char_IntArray sret_Char_IntArray_func(global int* input, int id) > +{ > + ByVal_Char_IntArray st; > + st.c = 15; > + > + int val = input[id]; > + st.i[0] = 14; > + st.i[1] = -8; > + st.i[2] = val; > + st.i[3] = 900; > + > + return st; > +} > + > +kernel void call_sret_Char_IntArray_func(global int* output, global int* > input) > +{ > + int id = get_global_id(0); > + ByVal_Char_IntArray st = sret_Char_IntArray_func(input, id); > + > + int sum = 0; > + for (int i = 0; i < 4; ++i) > + { > + sum += st.i[i]; > + } > + > + sum += st.c; > + output[id] = sum; > +} > + > +__attribute__((noinline)) > +ByVal_Char_IntArray > sret_Char_IntArray_func_byval_Char_IntArray(ByVal_Char_IntArray st) > +{ > + st.c += 15; > + > + st.i[0] += 14; > + st.i[1] -= 8; > + st.i[2] += 9; > + st.i[3] += 18; > + > + return st; > +} > + > +kernel void call_sret_Char_IntArray_func_byval_Char_IntArray(global int* > output0, > + global int* > output1, > + global int* > input) > +{ > + int id = get_global_id(0); > + > + volatile ByVal_Char_IntArray st0; > + st0.c = -20; > + > + int val = input[id]; > + st0.i[0] = 14; > + st0.i[1] = -8; > + st0.i[2] = val; > + st0.i[3] = 100; > + > + ByVal_Char_IntArray st1 = > sret_Char_IntArray_func_byval_Char_IntArray(st0); > + > + int sum0 = 0; > + int sum1 = 0; > + for (int i = 0; i < 4; ++i) > + { > + sum0 += st0.i[i]; > + sum1 += st1.i[i]; > + } > + > + sum0 += st0.c; > + sum1 += st1.c; > + > + output0[id] = sum0; > + output1[id] = sum1; > +} > diff --git a/tests/cl/program/execute/calls-workitem-id.cl > b/tests/cl/program/execute/calls-workitem-id.cl > new file mode 100644 > index 000000000..6be3a2acf > --- /dev/null > +++ b/tests/cl/program/execute/calls-workitem-id.cl > @@ -0,0 +1,75 @@ > +/*! > + > +[config] > +name: calls > +clc_version_min: 10 > + > +[test] > +name: pass_get_global_id_0 please use spaces instead of underscores for test names > +kernel_name: kernel_call_pass_get_global_id_0 > +dimensions: 1 > +global_size: 64 0 0 > +arg_out: 0 buffer uint[64] \ > + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 \ > + 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 \ > + 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 \ > + 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 > + > +[test] > +name: pass_get_global_id_012 > +kernel_name: kernel_call_pass_get_global_id_012 > +dimensions: 3 > +global_size: 8 4 2 > +arg_out: 0 buffer uint[64] \ > + 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 \ > + 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 \ > + 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 \ > + 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 > + > +arg_out: 1 buffer uint[64] \ > + 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 \ > + 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 \ > + 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 \ > + 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 > + > +arg_out: 2 buffer uint[64] \ > + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ > + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ > + 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 \ > + 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 > + > +!*/ > + > +__attribute__((noinline)) > +void func_get_global_id_0(volatile global uint* out) > +{ > + uint gid = get_global_id(0); > + out[gid] = gid; > +} > + > +kernel void kernel_call_pass_get_global_id_0(global uint *out) > +{ > + func_get_global_id_0(out); > +} > + > +__attribute__((noinline)) > +void func_get_global_id_012(volatile global uint* out0, > + volatile global uint* out1, > + volatile global uint* out2) > +{ > + uint id0 = get_global_id(0); > + uint id1 = get_global_id(1); > + uint id2 = get_global_id(2); > + uint flat_id = (id2 * get_global_size(1) + id1) * get_global_size(0) + > id0; > + > + out0[flat_id] = id0; > + out1[flat_id] = id1; > + out2[flat_id] = id2; > +} > + > +kernel void kernel_call_pass_get_global_id_012(global uint *out0, > + global uint *out1, > + global uint *out2) > +{ > + func_get_global_id_012(out0, out1, out2); > +} > diff --git a/tests/cl/program/execute/calls.cl > b/tests/cl/program/execute/calls.cl > new file mode 100644 > index 000000000..e772aabcf > --- /dev/null > +++ b/tests/cl/program/execute/calls.cl > @@ -0,0 +1,605 @@ > +/*! > + > +[config] > +name: calls > +clc_version_min: 10 > + > +[test] > +name: void_func_void > +kernel_name: call_void_func_void > +dimensions: 1 > +global_size: 1 0 0 > +arg_out: 0 buffer int[1] 12345 > + > +[test] > +name: i32_func_void > +kernel_name: call_i32_func_void > +dimensions: 1 > +global_size: 1 0 0 > +arg_out: 0 buffer int[1] 0x12345 > + > +[test] > +name: i64_func_void > +kernel_name: call_i64_func_void > +dimensions: 1 > +global_size: 1 0 0 > +arg_out: 0 buffer long[1] 0x100000000000 > + > + > +[test] > +name: call_i32_func_void_callee_stack > +kernel_name: call_i32_func_void_callee_stack > +dimensions: 1 > +global_size: 1 0 0 > +arg_out: 0 buffer int[1] 290 > + > +[test] > +name: call_i32_func_p0i32_i32_caller_stack > +kernel_name: call_i32_func_p0i32_i32_caller_stack > +dimensions: 1 > +global_size: 1 0 0 > +arg_out: 0 buffer int[1] 175 > + > +[test] > +name: call_i32_func_p0i32_i32_indirect_kernel_stack > +kernel_name: call_i32_func_p0i32_i32_indirect_kernel_stack > +dimensions: 1 > +global_size: 1 0 0 > +arg_out: 0 buffer int[1] 241 > + > +[test] > +name: call_i32_func_p0i32_i32_indirect_function_stack > +kernel_name: call_i32_func_p0i32_i32_indirect_function_stack > +dimensions: 1 > +global_size: 1 0 0 > +arg_out: 0 buffer int[1] 291 > + > +[test] > +name: callee stack corruption > +kernel_name: kernel_call_nested_stack_usage > +dimensions: 1 > +global_size: 10 0 0 > + > +arg_out: 0 buffer int4[10] \ > + 53 48 156 160 \ > + 84 248 102 150 \ > + 102 56 217 106 \ > + 100 123 151 139 \ > + 80 150 135 163 \ > + 223 99 117 199 \ > + 187 262 223 169 \ > + 277 129 73 121 \ > + 162 165 138 137 \ > + 204 207 223 145 \ > + > + > +arg_in: 1 buffer int4[10] \ > + 0 13 76 46 \ > + 4 74 33 63 \ > + 26 9 95 7 \ > + 41 54 47 29 \ > + 15 68 38 39 \ > + 91 43 14 95 \ > + 44 83 69 70 \ > + 89 54 14 45 \ > + 77 63 21 21 \ > + 64 70 80 70 > + > +arg_in: 2 buffer int4[10] \ > + 53 22 4 68 \ > + 76 100 36 24 \ > + 50 38 27 92 \ > + 18 15 57 81 \ > + 50 14 59 85 \ > + 41 13 89 9 \ > + 99 96 85 29 \ > + 99 21 45 31 \ > + 8 39 96 95 \ > + 76 67 63 5 > + > +[test] > +name: nested_calls > +kernel_name: kernel_nested_calls > +dimensions: 1 > +global_size: 4 0 0 > + > +arg_out: 0 buffer int[4] \ > + 1 7 155 -4 > + > +arg_in: 1 buffer int[4] \ > + 0 100 1234 -912 > + > +arg_in: 2 buffer int[4] \ > + 1 4 2 45 > + > + > +[test] > +name: stack_arg > +kernel_name: kernel_call_stack_arg > +dimensions: 1 > +global_size: 10 0 0 > + > + > +arg_out: 0 buffer int4[10] \ > + 11440 1348 29304 16698 \ > + 47975 3626 30850 13224 \ > + 8235 30495 31995 1455 \ > + 16048 40512 33992 7028 \ > + 9450 5356 21330 23130 \ > + 21120 35186 52896 49968 \ > + 34083 28520 0 0 \ > + 12384 41492 4420 17880 \ > + 37310 19320 37518 13175 \ > + 23852 16014 22734 24284 \ > + > + > +arg_in: 1 buffer int4[10] \ > + 0 13 76 46 \ > + 63 76 100 36 \ > + 27 92 53 46 \ > + 53 50 96 75 \ > + 99 41 14 57 \ > + 35 45 81 94 \ > + 80 71 74 1 \ > + 78 73 32 42 \ > + 60 17 83 15 \ > + 13 53 31 59 > + > +arg_in: 2 buffer int4[10] \ > + 53 22 4 68 \ > + 24 99 72 76 \ > + 95 5 76 77 \ > + 56 89 63 85 \ > + 25 49 46 97 \ > + 65 21 68 91 \ > + 89 53 46 6 \ > + 68 68 20 84 \ > + 99 25 23 10 \ > + 52 43 26 37 > + > +arg_in: 3 buffer int4[10] \ > + 68 94 38 52 \ > + 65 7 63 89 \ > + 83 12 1 69 \ > + 16 21 72 13 \ > + 12 20 32 63 \ > + 25 86 47 51 \ > + 72 49 67 68 \ > + 71 83 9 8 \ > + 22 64 70 80 \ > + 39 45 48 39 > + > +arg_in: 4 buffer int4[10] \ > + 83 3 5 53 \ > + 27 44 77 48 \ > + 87 63 74 73 \ > + 9 27 0 41 \ > + 12 65 62 81 \ > + 60 82 76 46 \ > + 20 92 87 89 \ > + 77 63 21 21 \ > + 70 76 67 63 \ > + 28 7 37 25 > + > +arg_in: 5 buffer int4[10] \ > + 67 0 38 6 \ > + 24 27 36 16 \ > + 100 89 23 30 \ > + 2 71 94 24 \ > + 25 48 39 20 \ > + 96 63 44 83 \ > + 54 14 45 99 \ > + 8 39 96 95 \ > + 5 60 22 32 \ > + 67 68 51 73 > + > +arg_in: 6 buffer int4[10] \ > + 42 69 59 93 \ > + 49 90 91 6 \ > + 35 51 59 85 \ > + 18 32 89 65 \ > + 2 91 43 14 \ > + 69 70 99 96 \ > + 21 45 31 51 \ > + 39 27 69 28 \ > + 70 11 77 53 \ > + 72 95 46 94 > + > +arg_in: 7 buffer int4[10] \ > + 85 53 9 66 \ > + 91 50 52 32 \ > + 41 84 27 41 \ > + 15 68 38 39 \ > + 95 41 13 89 \ > + 85 29 54 51 \ > + 89 44 47 81 \ > + 78 79 42 28 \ > + 55 59 33 71 \ > + 32 46 52 66 > + > +arg_in: 8 buffer int4[10] \ > + 42 70 91 76 \ > + 99 49 26 9 \ > + 54 47 29 18 \ > + 50 14 59 85 \ > + 9 16 7 36 \ > + 10 41 58 88 \ > + 36 21 100 15 \ > + 19 1 19 99 \ > + 14 16 49 86 \ > + 40 61 99 15 > + > +arg_in: 9 buffer int4[10] \ > + 26 4 74 33 \ > + 95 7 50 38 \ > + 15 57 81 3 \ > + 59 96 56 14 \ > + 25 13 79 45 \ > + 44 73 87 72 \ > + 63 62 0 0 \ > + 24 82 13 40 \ > + 82 56 74 31 \ > + 67 34 54 52 > + > +!*/ > + > +// The inline asm is necessary to defeat interprocedural sparse > +// conditional constant propagation eliminating some of the trivial > +// calls. > +#ifdef __AMDGCN__ > +#define USE_ASM 1 > +#endif I think it'd be better to use build options to disable the opt pass instead (or all optimizations, like optimization-options-cl1X.cl tests). Jan > + > +__attribute__((noinline)) > +void void_func_void(void) > +{ > +#if USE_ASM > + __asm(""); > +#endif > +} > + > +kernel void call_void_func_void(__global int* ret) > +{ > + void_func_void(); > + *ret = 12345; > +} > + > +__attribute__((noinline)) > +int i32_func_void(void) > +{ > + int ret; > +#if USE_ASM > + __asm("v_mov_b32 %0, 0x12345" : "=v"(ret)); > +#else > + ret = 0x12345; > +#endif > + > + return ret; > +} > + > +kernel void call_i32_func_void(__global int* ret) > +{ > + *ret = i32_func_void(); > +} > + > +__attribute__((noinline)) > +long i64_func_void(void) > +{ > + long ret; > +#if USE_ASM > + __asm("v_lshlrev_b64 %0, 44, 1" : "=v"(ret)); > +#else > + ret = 1ull << 44; > +#endif > + return ret; > +} > + > +kernel void call_i64_func_void(__global long* ret) > +{ > + *ret = i64_func_void(); > +} > + > + > +__attribute__((noinline)) > +int i32_func_void_callee_stack(void) > +{ > + int ret; > +#if USE_ASM > + __asm("v_mov_b32 %0, 0x64" : "=v"(ret)); > +#else > + ret = 0x64; > +#endif > + > + volatile int alloca[20]; > + > + for (int i = 0; i < 20; ++i) > + { > + alloca[i] = i; > + } > + > + for (int i = 0; i < 20; ++i) > + { > + ret += alloca[i]; > + } > + > + return ret; > +} > + > +kernel void call_i32_func_void_callee_stack(__global int* ret) > +{ > + volatile int alloca[10]; > + > + for (int i = 0; i < 10; ++i) > + { > + alloca[i] = 0xffff; > + } > + > + > + *ret = i32_func_void_callee_stack(); > +} > + > +__attribute__((noinline)) > +int i32_func_p0i32_i32_caller_stack(volatile int* stack, int n) > +{ > + int ret; > +#if USE_ASM > + __asm("v_mov_b32 %0, 0x64" : "=v"(ret)); > +#else > + ret = 0x64; > +#endif > + > + for (int i = 0; i < n; ++i) > + { > + ret += stack[i]; > + } > + > + return ret; > +} > + > +kernel void call_i32_func_p0i32_i32_caller_stack(__global int* ret) > +{ > + volatile int alloca[10]; > + > + for (int i = 0; i < 10; ++i) > + { > + alloca[i] = 3 + i; > + } > + > + *ret = i32_func_p0i32_i32_caller_stack(alloca, 10); > +} > + > +__attribute__((noinline)) > +int i32_func_p0i32_i32_indirect_stack(volatile int* stack, int n) > +{ > + int ret; > +#if USE_ASM > + __asm("v_mov_b32 %0, 0x64" : "=v"(ret)); > +#else > + ret = 0x64; > +#endif > + for (int i = 0; i < n; ++i) > + { > + ret += stack[i]; > + } > + > + return ret; > +} > + > +// Access a stack object in the parent kernel's frame. > +__attribute__((noinline)) > +int i32_func_p0i32_i32_pass_kernel_stack(volatile int* stack, int n) > +{ > + int ret; > +#if USE_ASM > + __asm("v_mov_b32 %0, 0x42" : "=v"(ret)); > +#else > + ret = 0x42; > +#endif > + > + volatile int local_object[10]; > + for (int i = 0; i < 10; ++i) > + local_object[i] = -1; > + > + ret += i32_func_p0i32_i32_indirect_stack(stack, n); > + > + return ret; > +} > + > +kernel void call_i32_func_p0i32_i32_indirect_kernel_stack(volatile __global > int* ret) > +{ > + volatile int alloca[10]; > + > + for (int i = 0; i < 10; ++i) > + { > + alloca[i] = 3 + i; > + } > + > + *ret = i32_func_p0i32_i32_pass_kernel_stack(alloca, 10); > +} > + > +// Access a stack object in a parent non-kernel function's stack frame. > +__attribute__((noinline)) > +int i32_func_void_pass_function_stack() > +{ > + int ret; > +#if USE_ASM > + __asm("v_mov_b32 %0, 0x42" : "=v"(ret)); > +#else > + ret = 0x42; > +#endif > + > + volatile int local_object[10]; > + for (int i = 0; i < 10; ++i) > + local_object[i] = 8 + i; > + > + ret += i32_func_p0i32_i32_indirect_stack(local_object, 10); > + return ret; > +} > + > +kernel void call_i32_func_p0i32_i32_indirect_function_stack(__global int* > ret) > +{ > + *ret = i32_func_void_pass_function_stack(); > +} > + > +__attribute__((noinline)) > +int4 v4i32_func_v4i32_v4i32_stack(int4 arg0, int4 arg1) > +{ > + // Force stack usage. > + volatile int4 args[8] = { arg0, arg1 }; > + > + int4 total = 0; > + for (int i = 0; i < 8; ++i) > + { > + total += args[i]; > + } > + > + return total; > +} > + > +// Make sure using stack in a callee function from a callee function > +// doesn't corrupt caller's stack objects. > +__attribute__((noinline)) > +int4 nested_stack_usage_v4i32_func_v4i32_v4i32(int4 arg0, int4 arg1) > +{ > + volatile int stack_object[4]; > + for (int i = 0; i < 4; ++i) { > + const int test_val = 0x04030200 | i; > + stack_object[i] = test_val; > + } > + > + arg0 *= 2; > + > + int4 result = v4i32_func_v4i32_v4i32_stack(arg0, arg1); > + > + // Check for stack corruption > + for (int i = 0; i < 4; ++i) > + { > + const int test_val = 0x04030200 | i; > + if (stack_object[i] != test_val) > + result = -1; > + } > + > + return result; > +} > + > +kernel void kernel_call_nested_stack_usage(global int4* output, > + global int4* input0, > + global int4* input1) > +{ > + int id = get_global_id(0); > + output[id] = nested_stack_usage_v4i32_func_v4i32_v4i32( > + input0[id], > + input1[id]); > +} > + > +__attribute__((noinline)) > +int func_div_add(int x, int y) > +{ > + return x / y + 4; > +} > + > +__attribute__((noinline)) > +int call_i32_func_i32_i32(int x, int y, volatile int* ptr) > +{ > + int tmp = func_div_add(x, y) >> 2; > + return tmp + *ptr; > +} > + > +kernel void kernel_nested_calls(global int* output, > + global int* input0, > + global int* input1) > +{ > + int id = get_global_id(0); > + volatile int zero = 0; > + output[id] = call_i32_func_i32_i32(input0[id], input1[id], &zero); > +} > + > +__attribute__((noinline)) > +int4 v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32( > + int4 arg0, int4 arg1, int4 arg2, int4 arg3, > + int4 arg4, int4 arg5, int4 arg6, int4 arg7, > + int4 arg8) > +{ > + // Try to make sure we can't clobber the incoming stack arguments > + // with local stack objects. > + volatile int4 args[8] = { arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 > }; > + volatile int4 last_arg = arg8; > + > + int4 total = 0; > + for (int i = 0; i < 8; ++i) > + { > + total += args[i]; > + } > + > + return total * last_arg; > +} > + > + // Test argument passed on stack, but doesn't use byval. > +__attribute__((noinline)) > +int4 > stack_arg_v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32( > + int4 arg0, int4 arg1, int4 arg2, int4 arg3, > + int4 arg4, int4 arg5, int4 arg6, int4 arg7, > + int4 arg8) > +{ > + volatile int stack_object[8]; > + for (int i = 0; i < 8; ++i) { > + const int test_val = 0x04030200 | i; > + stack_object[i] = test_val; > + } > + > + arg0 *= 2; > + > + int4 result = > v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32( > + arg0, arg1, arg2, arg3, arg4, > + arg5, arg6, arg7, arg8); > + > + // Check for stack corruption. > + for (int i = 0; i < 8; ++i) > + { > + const int test_val = 0x04030200 | i; > + if (stack_object[i] != test_val) > + result = -1; > + } > + > + return result; > +} > + > +kernel void kernel_call_stack_arg(global int4* output, > + global int4* input0, > + global int4* input1, > + global int4* input2, > + global int4* input3, > + global int4* input4, > + global int4* input5, > + global int4* input6, > + global int4* input7, > + global int4* input8) > +{ > + int id = get_global_id(0); > + > + volatile int stack_object[8]; > + for (int i = 0; i < 8; ++i) { > + const int test_val = 0x05060700 | i; > + stack_object[i] = test_val; > + } > + > + output[id] = > stack_arg_v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32( > + input0[id], > + input1[id], > + input2[id], > + input3[id], > + input4[id], > + input5[id], > + input6[id], > + input7[id], > + input8[id]); > + > + // Check for stack corruption. > + for (int i = 0; i < 8; ++i) > + { > + const int test_val = 0x05060700 | i; > + if (stack_object[i] != test_val) > + output[id] = -1; > + } > + > +} > diff --git a/tests/cl/program/execute/tail-calls.cl > b/tests/cl/program/execute/tail-calls.cl > new file mode 100644 > index 000000000..3f102dcdc > --- /dev/null > +++ b/tests/cl/program/execute/tail-calls.cl > @@ -0,0 +1,305 @@ > +/*! > + > +[config] > +name: calls > +clc_version_min: 10 > + > +[test] > +name: basic_tail_call > +kernel_name: kernel_call_tailcall > +dimensions: 1 > +global_size: 4 0 0 > + > +arg_out: 0 buffer int[4] \ > + 4 11 107 -12 > + > +arg_in: 1 buffer int[4] \ > + 0 100 1234 -912 > + > +arg_in: 2 buffer int[4] \ > + 1 4 2 45 > + > +[test] > +name: tail_call_extra_arg > +kernel_name: kernel_call_tailcall_extra_arg > +dimensions: 1 > +global_size: 4 0 0 > + > +arg_out: 0 buffer int[4] \ > + 2 112 1340 -882 > + > +arg_in: 1 buffer int[4] \ > + 0 100 1234 -912 > + > +arg_in: 2 buffer int[4] \ > + 1 4 2 45 > + > +[test] > +name: tail_call_fewer_args > +kernel_name: kernel_call_tailcall_fewer_args > +dimensions: 1 > +global_size: 4 0 0 > + > +arg_out: 0 buffer int[4] \ > + 4 8 81 -10 > + > +arg_in: 1 buffer int[4] \ > + 0 100 1234 -912 > + > +arg_in: 2 buffer int[4] \ > + 1 4 2 45 > + > +arg_in: 3 buffer int[4] \ > + 3 8 4 9 > + > +[test] > +name: tail_call_stack_args > +kernel_name: kernel_call_tailcall_stack_passed_args > +dimensions: 1 > +global_size: 10 0 0 > + > +arg_out: 0 buffer int4[10] \ > + 11440 8762 10296 13156 \ > + 19649 31311 18081 24745 \ > + 10476 11772 17766 11070 \ > + 22165 18005 28665 35945 \ > + 624 938 768 990 \ > + 30618 28791 30240 31815 \ > + 49851 47676 46806 47676 \ > + 4400 4272 3392 2632 \ > + 10582 8712 8514 7854 \ > + 19737 21199 23865 18533 \ > + > + > +arg_in: 1 buffer int4[10] \ > + 0 13 76 46 \ > + 4 74 33 63 \ > + 26 9 95 7 \ > + 41 54 47 29 \ > + 15 68 38 39 \ > + 91 43 14 95 \ > + 44 83 69 70 \ > + 89 54 14 45 \ > + 77 63 21 21 \ > + 64 70 80 70 > + > +arg_in: 2 buffer int4[10] \ > + 53 22 4 68 \ > + 76 100 36 24 \ > + 50 38 27 92 \ > + 18 15 57 81 \ > + 50 14 59 85 \ > + 41 13 89 9 \ > + 99 96 85 29 \ > + 99 21 45 31 \ > + 8 39 96 95 \ > + 76 67 63 5 > + > +arg_in: 3 buffer int4[10] \ > + 68 94 38 52 \ > + 99 72 76 65 \ > + 53 46 95 5 \ > + 3 53 50 96 \ > + 59 96 56 14 \ > + 16 7 36 25 \ > + 54 51 10 41 \ > + 51 89 44 47 \ > + 39 27 69 28 \ > + 60 22 32 70 > + > +arg_in: 4 buffer int4[10] \ > + 83 3 5 53 \ > + 7 63 89 27 \ > + 76 77 83 12 \ > + 75 56 89 63 \ > + 99 41 14 57 \ > + 13 79 45 35 \ > + 58 88 44 73 \ > + 81 36 21 100 \ > + 78 79 42 28 \ > + 11 77 53 55 > + > +arg_in: 5 buffer int4[10] \ > + 67 0 38 6 \ > + 44 77 48 24 \ > + 1 69 87 63 \ > + 85 16 21 72 \ > + 25 49 46 97 \ > + 45 81 94 65 \ > + 87 72 80 71 \ > + 15 63 62 0 \ > + 19 1 19 99 \ > + 59 33 71 14 > + > +arg_in: 6 buffer int4[10] \ > + 42 69 59 93 \ > + 27 36 16 49 \ > + 74 73 100 89 \ > + 13 9 27 0 \ > + 12 20 32 63 \ > + 21 68 91 25 \ > + 74 1 89 53 \ > + 0 78 73 32 \ > + 24 82 13 40 \ > + 16 49 86 82 > + > +arg_in: 7 buffer int4[10] \ > + 85 53 9 66 \ > + 90 91 6 91 \ > + 23 30 35 51 \ > + 41 2 71 94 \ > + 12 65 62 81 \ > + 86 47 51 60 \ > + 46 6 72 49 \ > + 42 68 68 20 \ > + 60 17 83 15 \ > + 56 74 31 13 > + > +arg_in: 8 buffer int4[10] \ > + 42 70 91 76 \ > + 50 52 32 99 \ > + 59 85 41 84 \ > + 24 18 32 89 \ > + 25 48 39 20 \ > + 82 76 46 96 \ > + 67 68 20 92 \ > + 84 71 83 9 \ > + 99 25 23 10 \ > + 53 31 59 52 > + > +arg_in: 9 buffer int[10] \ > + 26 \ > + 49 \ > + 27 \ > + 65 \ > + 2 \ > + 63 \ > + 87 \ > + 8 \ > + 22 \ > + 43 > + > +!*/ > + > +__attribute__((noinline)) > +int i32_func_i32_i32(int x, int y) > +{ > + return x / y + 4; > +} > + > +__attribute__((noinline)) > +int i32_func_i32_i32_i32(int x, int y, int z) > +{ > + return x / y + z; > +} > + > +// Test a basic tail call > +__attribute__((noinline)) > +int tailcall_i32_func_i32_i32(int x, int y) > +{ > + x += 5; > + y += 10; > + return i32_func_i32_i32(x, y); > +} > + > +// Test a basic tail call with more arguments in the callee than > +// caller. > +__attribute__((noinline)) > +int tailcall_i32_func_i32_i32_extra_arg(int x, int y) > +{ > + int z = x + y + 1; > + x += 5; > + y += 10; > + return i32_func_i32_i32_i32(x, y, z); > +} > + > +// Test a basic tail call with fewere arguments in the callee than > +// caller. > +__attribute__((noinline)) > +int tailcall_i32_func_i32_i32_i32_fewer_args(int x, int y, int z) > +{ > + x += 5; > + y += 10; > + return i32_func_i32_i32(x, y + z); > +} > + > +kernel void kernel_call_tailcall(global int* output, > + global int* input0, > + global int* input1) > +{ > + int id = get_global_id(0); > + output[id] = tailcall_i32_func_i32_i32(input0[id], input1[id]); > +} > + > +kernel void kernel_call_tailcall_extra_arg(global int* output, > + global int* input0, > + global int* input1) > +{ > + int id = get_global_id(0); > + output[id] = tailcall_i32_func_i32_i32_extra_arg(input0[id], input1[id]); > +} > + > +kernel void kernel_call_tailcall_fewer_args(global int* output, > + global int* input0, > + global int* input1, > + global int* input2) > +{ > + int id = get_global_id(0); > + output[id] = tailcall_i32_func_i32_i32_i32_fewer_args(input0[id], > input1[id], input2[id]); > +} > +__attribute__((noinline)) > +int4 v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_i32( > + int4 arg0, int4 arg1, int4 arg2, int4 arg3, > + int4 arg4, int4 arg5, int4 arg6, int4 arg7, > + int arg8) > +{ > + // Try to make sure we can't clobber the incoming stack arguments > + // with local stack objects. > + volatile int4 args[8] = { arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 > }; > + volatile int scalar_arg = arg8; > + > + int4 total = 0; > + for (int i = 0; i < 8; ++i) > + { > + total += args[i]; > + } > + > + return total * scalar_arg; > +} > + > +// Test a basic tail call > +__attribute__((noinline)) > +int4 tailcall_v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_i32( > + int4 arg0, int4 arg1, int4 arg2, int4 arg3, > + int4 arg4, int4 arg5, int4 arg6, int4 arg7, > + int arg8) > +{ > + arg0 *= 2; > + return v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_i32( > + arg0, arg1, arg2, arg3, arg4, > + arg5, arg6, arg7, arg8); > +} > + > +kernel void kernel_call_tailcall_stack_passed_args(global int4* output, > + global int4* input0, > + global int4* input1, > + global int4* input2, > + global int4* input3, > + global int4* input4, > + global int4* input5, > + global int4* input6, > + global int4* input7, > + global int* input8) > +{ > + int id = get_global_id(0); > + output[id] = > tailcall_v4i32_func_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_v4i32_i32( > + input0[id], > + input1[id], > + input2[id], > + input3[id], > + input4[id], > + input5[id], > + input6[id], > + input7[id], > + input8[id]); > +} -- Jan Vesely <jan.ves...@rutgers.edu>
signature.asc
Description: This is a digitally signed message part
_______________________________________________ Piglit mailing list Piglit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/piglit