Tom suggested that it would be more efficient to use static_nochunk for loop scheduling for acc gang loops when the static argument isn't present. We also decided that gang(static:*) should be scheduled using static_nochunk too. A chunk_size of 1 is probably too conservative for gangs running on gpus.
I also updated the nvptx-specific gang loop test case. My first attempt wasn't scanning all of the values in the array. This corrects that, and it also teaches that test what to expect for gang(static:*). This patch has been committed to gomp-4_0-branch. Cesar
2015-06-17 Cesar Philippidis <ce...@codesourcery.com> gcc/ * omp-low.c (extract_omp_for_data): Only use static_chunk scheduling for acc gang loops when a the gang clause contains a non-'*' static argument. libgomp/ * testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: Update the expected behavior for gang(static:*). diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 0300ed7..f7e13d3 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -771,9 +771,12 @@ extract_omp_for_data (gomp_for *for_stmt, struct omp_for_data *fd, if (gang) { chunk_size = OMP_CLAUSE_GANG_STATIC_EXPR (gang); - } - if (!chunk_size || chunk_size == integer_minus_one_node) + /* gang (static:*) is represented by -1. */ + if (chunk_size == integer_minus_one_node) + chunk_size = NULL_TREE; + } + else chunk_size = build_int_cst (TREE_TYPE (fd->loop.v), 1); fd->chunk_size = chunk_size; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c index 8ff2005..aa35dcb 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c @@ -4,18 +4,27 @@ #define N 100 -int test(int *a, int sarg) +int +test_static(int *a, int num_gangs, int sarg) { - int i, j, gang; + int i, j; if (sarg == 0) sarg = 1; - for (i = 0, gang = 0; i < N; i+=sarg, gang++) - { - for (j = 0; j < sarg; j++) - assert (a[i] == gang % 10); - } + for (i = 0; i < N / sarg; i++) + for (j = 0; j < sarg; j++) + assert (a[i*sarg+j] == i % num_gangs); +} + +int +test_nonstatic(int *a, int gangs) +{ + int i, j; + + for (i = 0; i < N; i+=gangs) + for (j = 0; j < gangs; j++) + assert (a[i+j] == i/gangs); } int @@ -28,31 +37,38 @@ main () for (i = 0; i < 100; i++) a[i] = __builtin_GOACC_ctaid (0); - test (a, 0); + test_nonstatic (a, 10); #pragma acc parallel loop gang (static:1) num_gangs (10) for (i = 0; i < 100; i++) a[i] = __builtin_GOACC_ctaid (0); - test (a, 1); + test_static (a, 10, 1); #pragma acc parallel loop gang (static:2) num_gangs (10) for (i = 0; i < 100; i++) a[i] = __builtin_GOACC_ctaid (0); - test (a, 2); + test_static (a, 10, 2); #pragma acc parallel loop gang (static:5) num_gangs (10) for (i = 0; i < 100; i++) a[i] = __builtin_GOACC_ctaid (0); - test (a, 5); + test_static (a, 10, 5); #pragma acc parallel loop gang (static:20) num_gangs (10) for (i = 0; i < 100; i++) a[i] = __builtin_GOACC_ctaid (0); - test (a, 20); + test_static (a, 10, 20); + + /* Non-static gang. */ +#pragma acc parallel loop gang num_gangs (10) + for (i = 0; i < 100; i++) + a[i] = __builtin_GOACC_ctaid (0); + + test_nonstatic (a, 10); return 0; }