Tom suggested that it would be more efficient to use static_nochunk for
loop scheduling for acc gang loops when the static argument isn't
present. We also decided that gang(static:*) should be scheduled using
static_nochunk too. A chunk_size of 1 is probably too conservative for
gangs running on gpus.

I also updated the nvptx-specific gang loop test case. My first attempt
wasn't scanning all of the values in the array. This corrects that, and
it also teaches that test what to expect for gang(static:*).

This patch has been committed to gomp-4_0-branch.

Cesar
2015-06-17  Cesar Philippidis  <ce...@codesourcery.com>

	gcc/
	* omp-low.c (extract_omp_for_data): Only use static_chunk
	scheduling for acc gang loops when a the gang clause contains
	a non-'*' static argument.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: Update
	the expected behavior for gang(static:*).


diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 0300ed7..f7e13d3 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -771,9 +771,12 @@ extract_omp_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
       if (gang)
 	{
 	  chunk_size = OMP_CLAUSE_GANG_STATIC_EXPR (gang);
-	}
 
-      if (!chunk_size || chunk_size == integer_minus_one_node)
+	  /* gang (static:*) is represented by -1.  */
+	  if (chunk_size == integer_minus_one_node)
+	    chunk_size = NULL_TREE;
+	}
+      else
 	chunk_size = build_int_cst (TREE_TYPE (fd->loop.v), 1);
 
       fd->chunk_size = chunk_size;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
index 8ff2005..aa35dcb 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
@@ -4,18 +4,27 @@
 
 #define N 100
 
-int test(int *a, int sarg)
+int
+test_static(int *a, int num_gangs, int sarg)
 {
-  int i, j, gang;
+  int i, j;
 
   if (sarg == 0)
     sarg = 1;
 
-  for (i = 0, gang = 0; i < N; i+=sarg, gang++)
-    {
-      for (j = 0; j < sarg; j++)
-	assert (a[i] == gang % 10);
-    }
+  for (i = 0; i < N / sarg; i++)
+    for (j = 0; j < sarg; j++)
+      assert (a[i*sarg+j] == i % num_gangs);
+}
+
+int
+test_nonstatic(int *a, int gangs)
+{
+  int i, j;
+
+  for (i = 0; i < N; i+=gangs)
+    for (j = 0; j < gangs; j++)
+      assert (a[i+j] == i/gangs);
 }
 
 int
@@ -28,31 +37,38 @@ main ()
   for (i = 0; i < 100; i++)
     a[i] = __builtin_GOACC_ctaid (0);
 
-  test (a, 0);
+  test_nonstatic (a, 10);
 
 #pragma acc parallel loop gang (static:1) num_gangs (10)
   for (i = 0; i < 100; i++)
     a[i] = __builtin_GOACC_ctaid (0);
 
-  test (a, 1);
+  test_static (a, 10, 1);
 
 #pragma acc parallel loop gang (static:2) num_gangs (10)
   for (i = 0; i < 100; i++)
     a[i] = __builtin_GOACC_ctaid (0);
 
-  test (a, 2);
+  test_static (a, 10, 2);
 
 #pragma acc parallel loop gang (static:5) num_gangs (10)
   for (i = 0; i < 100; i++)
     a[i] = __builtin_GOACC_ctaid (0);
 
-  test (a, 5);
+  test_static (a, 10, 5);
 
 #pragma acc parallel loop gang (static:20) num_gangs (10)
   for (i = 0; i < 100; i++)
     a[i] = __builtin_GOACC_ctaid (0);
 
-  test (a, 20);
+  test_static (a, 10, 20);
+
+  /* Non-static gang.  */
+#pragma acc parallel loop gang num_gangs (10)
+  for (i = 0; i < 100; i++)
+    a[i] = __builtin_GOACC_ctaid (0);
+
+  test_nonstatic (a, 10);
 
   return 0;
 }

Reply via email to