Hi, When a display manager is running on an nvidia card, all CUDA kernel launches get a 5 seconds watchdog timer.
Consequently, when running the libgomp testsuite with nvptx accelerator and GOMP_NVPTX_JIT=-O0 we run into a few FAILs like this: ... libgomp: cuStreamSynchronize error: the launch timed out and was terminated FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/parallel-dims.c \ -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none -O0 \ execution test ... Fix this by scaling down the failing test-cases. Tested on x86_64-linux with nvptx accelerator. OK for trunk? Thanks, - Tom [libgomp, testsuite] Scale down some OpenACC test-cases libgomp/ChangeLog: 2022-03-25 Tom de Vries <[email protected]> PR libgomp/105042 * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Reduce execution time. * testsuite/libgomp.oacc-c-c++-common/vred2d-128.c: Same. * testsuite/libgomp.oacc-fortran/parallel-dims.f90: Same. --- .../libgomp.oacc-c-c++-common/parallel-dims.c | 39 +++++++++++----------- .../libgomp.oacc-c-c++-common/vred2d-128.c | 2 +- .../libgomp.oacc-fortran/parallel-dims.f90 | 10 +++--- 3 files changed, 27 insertions(+), 24 deletions(-) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c index b1cfe37df8a..d9e4bd0d75f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c @@ -49,6 +49,7 @@ static int acc_vector () return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); } +#define N 50 int main () { @@ -76,7 +77,7 @@ int main () { /* We're actually executing with num_gangs (1). */ gangs_actual = 1; - for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i) + for (int i = N * gangs_actual; i > -N * gangs_actual; --i) { gangs_min = gangs_max = acc_gang (); workers_min = workers_max = acc_worker (); @@ -115,7 +116,7 @@ int main () reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */ - for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i) + for (int i = N * gangs_actual; i > -N * gangs_actual; --i) { gangs_min = gangs_max = acc_gang (); workers_min = workers_max = acc_worker (); @@ -154,7 +155,7 @@ int main () reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */ - for (int i = 100 * workers_actual; i > -100 * workers_actual; --i) + for (int i = N * workers_actual; i > -N * workers_actual; --i) { gangs_min = gangs_max = acc_gang (); workers_min = workers_max = acc_worker (); @@ -200,7 +201,7 @@ int main () reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */ - for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i) + for (int i = N * vectors_actual; i > -N * vectors_actual; --i) { gangs_min = gangs_max = acc_gang (); workers_min = workers_max = acc_worker (); @@ -250,7 +251,7 @@ int main () } /* As we're executing GR not GP, don't multiply with a "gangs_actual" factor. */ - for (int i = 100 /* * gangs_actual */; i > -100 /* * gangs_actual */; --i) + for (int i = N /* * gangs_actual */; i > -N /* * gangs_actual */; --i) { gangs_min = gangs_max = acc_gang (); workers_min = workers_max = acc_worker (); @@ -291,7 +292,7 @@ int main () reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */ - for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i) + for (int i = N * gangs_actual; i > -N * gangs_actual; --i) { gangs_min = gangs_max = acc_gang (); workers_min = workers_max = acc_worker (); @@ -348,7 +349,7 @@ int main () reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */ - for (int i = 100 * workers_actual; i > -100 * workers_actual; --i) + for (int i = N * workers_actual; i > -N * workers_actual; --i) { gangs_min = gangs_max = acc_gang (); workers_min = workers_max = acc_worker (); @@ -411,7 +412,7 @@ int main () reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */ - for (int i = 100 * workers_actual; i > -100 * workers_actual; --i) + for (int i = N * workers_actual; i > -N * workers_actual; --i) { gangs_min = gangs_max = acc_gang (); workers_min = workers_max = acc_worker (); @@ -468,7 +469,7 @@ int main () reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */ - for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i) + for (int i = N * vectors_actual; i > -N * vectors_actual; --i) { gangs_min = gangs_max = acc_gang (); workers_min = workers_max = acc_worker (); @@ -528,7 +529,7 @@ int main () reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */ - for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i) + for (int i = N * vectors_actual; i > -N * vectors_actual; --i) { gangs_min = gangs_max = acc_gang (); workers_min = workers_max = acc_worker (); @@ -602,20 +603,20 @@ int main () /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */ - for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i) + for (int i = N * gangs_actual; i > -N * gangs_actual; --i) #pragma acc loop /* { dg-line l_loop_j[incr c_loop_j] } */ \ worker \ reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */ /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */ /* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_j$c_loop_j } */ - for (int j = 100 * workers_actual; j > -100 * workers_actual; --j) + for (int j = N * workers_actual; j > -N * workers_actual; --j) #pragma acc loop /* { dg-line l_loop_k[incr c_loop_k] } */ \ vector \ reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_k$c_loop_k } */ /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_k$c_loop_k } */ - for (int k = 100 * vectors_actual; k > -100 * vectors_actual; --k) + for (int k = N * vectors_actual; k > -N * vectors_actual; --k) { gangs_min = gangs_max = acc_gang (); workers_min = workers_max = acc_worker (); @@ -664,7 +665,7 @@ int main () /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */ - for (int i = 100; i > -100; --i) + for (int i = N; i > -N; --i) { /* This is to make the loop unparallelizable. */ asm volatile ("" : : : "memory"); @@ -714,7 +715,7 @@ int main () /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */ - for (int i = 100; i > -100; --i) + for (int i = N; i > -N; --i) { /* This is to make the loop unparallelizable. */ asm volatile ("" : : : "memory"); @@ -745,7 +746,7 @@ int main () /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */ /* { dg-warning {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */ { - for (int i = 100; i > -100; i--) + for (int i = N; i > -N; i--) { gangs_min = gangs_max = acc_gang (); workers_min = workers_max = acc_worker (); @@ -789,20 +790,20 @@ int main () /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */ - for (int i = 100; i > -100; i--) + for (int i = N; i > -N; i--) #pragma acc loop /* { dg-line l_loop_j[incr c_loop_j] } */ \ worker \ reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */ /* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */ /* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_j$c_loop_j } */ - for (int j = 100; j > -100; j--) + for (int j = N; j > -N; j--) #pragma acc loop /* { dg-line l_loop_k[incr c_loop_k] } */ \ vector \ reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_k$c_loop_k } */ /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_k$c_loop_k } */ - for (int k = 100 * vectors_actual; k > -100 * vectors_actual; k--) + for (int k = N * vectors_actual; k > -N * vectors_actual; k--) { gangs_min = gangs_max = acc_gang (); workers_min = workers_max = acc_worker (); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c index 489f26ad9f2..12df1063d90 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c @@ -4,7 +4,7 @@ #include <assert.h> -#define n 10000 +#define n 2500 int a1[n], a2[n]; #define gentest(name, outer, inner) \ diff --git a/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90 b/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90 index cd3f3555b78..ee08cfcb429 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/parallel-dims.f90 @@ -44,6 +44,8 @@ program main integer :: vectors_actual integer :: i, j, k + + integer, parameter :: N = 50 call acc_init (acc_device_default) ! OpenACC parallel construct. @@ -69,7 +71,7 @@ program main !$acc serial & !$acc reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) ! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } } ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } - do i = 100, -99, -1 + do i = N, -(N-1), -1 gangs_min = acc_gang (); gangs_max = acc_gang (); workers_min = acc_worker (); @@ -108,14 +110,14 @@ program main end if !$acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } - do i = 100, -99, -1 + do i = N, -(N-1), -1 !$acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) ! { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } ! { dg-note {variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } - do j = 100, -99, -1 + do j = N, -(N-1), -1 !$acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) ! { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } - do k = 100 * vectors_actual, -99 * vectors_actual, -1 + do k = N * vectors_actual, -(N-1) * vectors_actual, -1 gangs_min = acc_gang (); gangs_max = acc_gang (); workers_min = acc_worker ();
