I've applied this patch to gomp-4_0-branch to add support for fortran
allocatable scalars inside OpenACC declare constructs. In order to
update the declared variable on the device, the fortran FE now uses the
GOMP_MAP_ALWAYS_POINTERS for declared allocatable scalars. That
necessitated some minor tweaking in the runtime. Note that I didn't add
support for allocatable derived types in this patch as the OpenACC 2.5
spec is still unclear on them.

Included in this patch is a bug fix for non-declared allocatable
scalars. Specifically, prior to this patch, the gimplifier would treat
allocatable scalar variables like C pointers, so only the pointer would
be updated on the accelerator and not the value being pointed to. If the
user explicitly specified a data clause for the variable, the fortran FE
itself would generate code to copy the value being pointed to along with
the pointer itself. I.e. this bug only affects implicit firstprivate
allocatable scalar variables. To resolve this issue, this patch teaches
lower_omp_target to pass the value being pointed to by a
GOMP_MAP_FIRSTPRIVATE_INT data mapping instead of the pointer itself to
the accelerator, then on entry to the offloaded region, lower_omp_target
will create a local copy of the pointed to value on target and along
with a local pointer to it.

Cesar
2017-04-19  Cesar Philippidis  <ce...@codesourcery.com>

	gcc/fortran/
	* trans-decl.c (add_clause): Populate sym->backend_decl so that it
	can be used to determine if two symbols are unique.
	* trans-openmp.c (gfc_trans_oacc_executable_directive): Use
	GOMP_MAP_ALWAYS_POINTER for fortran allocatable pointers.
	* trans-stmt.c (gfc_trans_allocate): Call
	gfc_trans_oacc_declare_allocate for declared allocatable pointers.
	(gfc_trans_deallocate): Likewise.

	gcc/
	* omp-low.c (convert_to_firstprivate_int): Handle pointer types.
	(convert_from_firstprivate_int): Likewise.  Create local storage for
	the values being pointed too.  Add new argument orig_type.
	(lower_omp_target): Allow pointer types with GOMP_MAP_FIRSTPRIVATE_INT.

	gcc/testsuite/
	* gfortran.dg/goacc/declare-allocatable-1.f90: Update test case to
	exercise allocatable scalars.

	libgomp/
	* oacc-parallel.c (GOACC_enter_exit_data): Add support for
	GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}.
	(GOACC_update): Add support for GOMP_MAP_ALWAYS_POINTER.
	* testsuite/libgomp.oacc-fortran/allocatable-scalar.f90: New test.
	* testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90: New test.
	* testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90: New test.
	* testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90: New test.


diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c
index b4db6b0..c0efc1a 100644
--- a/gcc/fortran/trans-decl.c
+++ b/gcc/fortran/trans-decl.c
@@ -5935,6 +5935,9 @@ add_clause (gfc_symbol *sym, gfc_omp_map_op map_op)
   if (!module_oacc_clauses)
     module_oacc_clauses = gfc_get_omp_clauses ();
 
+  if (sym->backend_decl == NULL)
+    gfc_get_symbol_decl (sym);
+
   for (n = module_oacc_clauses->lists[OMP_LIST_MAP]; n != NULL; n = n->next)
     if (n->sym->backend_decl == sym->backend_decl)
       return;
diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index 238eebe..3718da2 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -3336,6 +3336,18 @@ gfc_trans_oacc_executable_directive (gfc_code *code)
   gfc_start_block (&block);
   oacc_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses,
 					code->loc);
+
+  /* Promote GOMP_MAP_FIRSTPRIVATE_POINTER to GOMP_MAP_ALWAYS_POINTER for
+     variables inside OpenACC update directives.  */
+  if (code->op == EXEC_OACC_UPDATE)
+    for (tree c = oacc_clauses; c; c = OMP_CLAUSE_CHAIN (c))
+      {
+        if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
+	    && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER)
+	  OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_ALWAYS_POINTER);
+      }
+
+
   stmt = build1_loc (input_location, construct_code, void_type_node,
 		     oacc_clauses);
   gfc_add_expr_to_block (&block, stmt);
diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c
index dcf17694..faf19e6 100644
--- a/gcc/fortran/trans-stmt.c
+++ b/gcc/fortran/trans-stmt.c
@@ -5883,6 +5883,10 @@ gfc_trans_allocate (gfc_code * code)
 	      tmp = gfc_nullify_alloc_comp (expr->ts.u.derived, tmp, 0);
 	      gfc_add_expr_to_block (&se.pre, tmp);
 	    }
+
+	  /* Allocate memory for OpenACC declared variables.  */
+	  if (expr->symtree->n.sym->attr.oacc_declare_create)
+	    gfc_trans_oacc_declare_allocate (&se.pre, expr, true);
 	}
       else
 	{
@@ -6360,6 +6364,10 @@ gfc_trans_deallocate (gfc_code *code)
 	}
       else
 	{
+	  /* Deallocate memory for OpenACC declared variables.  */
+	  if (expr->symtree->n.sym->attr.oacc_declare_create)
+	    gfc_trans_oacc_declare_allocate (&se.pre, expr, false);
+
 	  tmp = gfc_deallocate_scalar_with_status (se.expr, pstat, false,
 						   al->expr, al->expr->ts);
 	  gfc_add_expr_to_block (&se.pre, tmp);
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index a584a44..5c41edc 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -16568,7 +16568,7 @@ convert_to_firstprivate_int (tree var, gimple_seq *gs)
 
   if (INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
   {
-    if (is_reference (var))
+    if (is_reference (var) || POINTER_TYPE_P (type))
       {
 	tmp = create_tmp_var (type);
 	gimplify_assign (tmp, build_simple_mem_ref (var), gs);
@@ -16605,7 +16605,8 @@ convert_to_firstprivate_int (tree var, gimple_seq *gs)
 /* Like convert_to_firstprivate_int, but restore the original type.  */
 
 static tree
-convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs)
+convert_from_firstprivate_int (tree var, tree orig_type, bool is_ref,
+			       gimple_seq *gs)
 {
   tree type = TREE_TYPE (var);
   tree new_type = NULL_TREE;
@@ -16614,7 +16615,31 @@ convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs)
   gcc_assert (TREE_CODE (var) == MEM_REF);
   var = TREE_OPERAND (var, 0);
 
-  if (INTEGRAL_TYPE_P (var) || POINTER_TYPE_P (type))
+  if (is_ref || POINTER_TYPE_P (orig_type))
+    {
+      tree_code code = NOP_EXPR;
+
+      if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == COMPLEX_TYPE)
+	code = VIEW_CONVERT_EXPR;
+
+      if (code == VIEW_CONVERT_EXPR
+	  && TYPE_SIZE (type) != TYPE_SIZE (orig_type))
+	{
+	  tree ptype = build_pointer_type (type);
+	  var = fold_build1 (code, ptype, build_fold_addr_expr (var));
+	  var = build_simple_mem_ref (var);
+	}
+      else
+	var = fold_build1 (code, type, var);
+
+      tree inst = create_tmp_var (type);
+      gimplify_assign (inst, var, gs);
+      var = build_fold_addr_expr (inst);
+
+      return var;
+    }
+
+  if (INTEGRAL_TYPE_P (var))
     return fold_convert (type, var);
 
   switch (tree_to_uhwi (TYPE_SIZE (type)))
@@ -16631,13 +16656,6 @@ convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs)
   gimplify_assign (tmp, var, gs);
   var = fold_build1 (VIEW_CONVERT_EXPR, type, tmp);
 
-  if (is_ref)
-    {
-      tmp = create_tmp_var (build_pointer_type (type));
-      gimplify_assign (tmp, build_fold_addr_expr (var), gs);
-      var = tmp;
-    }
-
   return var;
 }
 
@@ -16846,16 +16864,19 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 	      {
 		gcc_assert (is_gimple_omp_oacc (ctx->stmt));
 		if (oacc_firstprivate_int)
-		  x = convert_from_firstprivate_int (x, is_reference (var),
+		  x = convert_from_firstprivate_int (x, TREE_TYPE (new_var),
+						     is_reference (var),
 						     &fplist);
-		else if (is_reference (new_var)
-		    && TREE_CODE (var_type) != POINTER_TYPE)
+		else if (is_reference (new_var))
 		  {
 		    /* Create a local object to hold the instance
 		       value.  */
 		    const char *id = IDENTIFIER_POINTER (DECL_NAME (new_var));
 		    tree inst = create_tmp_var (TREE_TYPE (var_type), id);
-		    gimplify_assign (inst, fold_indirect_ref (x), &fplist);
+		    if (TREE_CODE (var_type) == POINTER_TYPE)
+		      gimplify_assign (inst, x, &fplist);
+		    else
+		      gimplify_assign (inst, fold_indirect_ref (x), &fplist);
 		    x = build_fold_addr_expr (inst);
 		  }
 		gimplify_assign (new_var, x, &fplist);
@@ -17103,8 +17124,9 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 		  }
 		else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
 		  {
+		    tree new_var = lookup_decl (var, ctx);
 		    tree type = TREE_TYPE (var);
-		    tree inner_type = is_reference (var)
+		    tree inner_type = is_reference (new_var)
 		      ? TREE_TYPE (type) : type;
 		    gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt));
 		    if ((TREE_CODE (inner_type) == REAL_TYPE
diff --git a/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 b/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90
index b6bb6b3..5349e0d 100644
--- a/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90
+++ b/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90
@@ -6,20 +6,20 @@
 
 program allocate
   implicit none
-  integer, allocatable :: a(:)
+  integer, allocatable :: a(:), b
   integer, parameter :: n = 100
   integer i
-  !$acc declare create(a)
+  !$acc declare create(a,b)
 
-  allocate (a(n))
+  allocate (a(n), b)
 
-  !$acc parallel loop copyout(a)
+  !$acc parallel loop copyout(a, b)
   do i = 1, n
-     a(i) = i
+     a(i) = b
   end do
 
-  deallocate (a)
+  deallocate (a, b)
 end program allocate
 
-! { dg-final { scan-tree-dump-times "pragma acc enter data map.declare_allocate" 1 "original" } }
-! { dg-final { scan-tree-dump-times "pragma acc exit data map.declare_deallocate" 1 "original" } }
+! { dg-final { scan-tree-dump-times "pragma acc enter data map.declare_allocate" 2 "original" } }
+! { dg-final { scan-tree-dump-times "pragma acc exit data map.declare_deallocate" 2 "original" } }
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 3962076..66acdf6 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -523,6 +523,7 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 	    {
 	      switch (kind)
 		{
+		case GOMP_MAP_DECLARE_ALLOCATE:
 		case GOMP_MAP_ALLOC:
 		  acc_present_or_create (hostaddrs[i], sizes[i]);
 		  break;
@@ -574,6 +575,7 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 		if (acc_is_present (hostaddrs[i], sizes[i]))
 		  acc_delete (hostaddrs[i], sizes[i]);
 		break;
+	      case GOMP_MAP_DECLARE_DEALLOCATE:
 	      case GOMP_MAP_FROM:
 	      case GOMP_MAP_FORCE_FROM:
 		acc_copyout (hostaddrs[i], sizes[i]);
@@ -655,6 +657,7 @@ GOACC_update (int device, size_t mapnum,
 
   acc_dev->openacc.async_set_async_func (async);
 
+  bool update_device = false;
   for (i = 0; i < mapnum; ++i)
     {
       unsigned char kind = kinds[i] & 0xff;
@@ -665,11 +668,31 @@ GOACC_update (int device, size_t mapnum,
 	case GOMP_MAP_TO_PSET:
 	  break;
 
+	case GOMP_MAP_ALWAYS_POINTER:
+	  if (update_device)
+	    {
+	      /* Save the contents of the host pointer.  */
+	      void *dptr = acc_deviceptr (hostaddrs[i-1]);
+	      uintptr_t t = *(uintptr_t *) hostaddrs[i];
+
+	      /* Update the contents of the host pointer to reflect
+		 the value of the allocated device memory in the
+		 previous pointer.  */
+	      *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
+	      acc_update_device (hostaddrs[i], sizeof (uintptr_t));
+
+	      /* Restore the host pointer.  */
+	      *(uintptr_t *) hostaddrs[i] = t;
+	    }
+	  break;
+
 	case GOMP_MAP_FORCE_TO:
+	  update_device = true;
 	  acc_update_device (hostaddrs[i], sizes[i]);
 	  break;
 
 	case GOMP_MAP_FORCE_FROM:
+	  update_device = false;
 	  acc_update_self (hostaddrs[i], sizes[i]);
 	  break;
 
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90 b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90
new file mode 100644
index 0000000..8386c5d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/allocatable-scalar.f90
@@ -0,0 +1,30 @@
+program main
+  implicit none
+  integer, parameter :: n = 100
+  integer, allocatable :: a, c
+  integer :: i, b(n)
+
+  allocate (a)
+
+  a = 50
+
+  !$acc parallel loop
+  do i = 1, n;
+     b(i) = a
+  end do
+
+  do i = 1, n
+     if (b(i) /= a) call abort
+  end do
+
+  allocate (c)
+
+  print *, loc (c)
+  !$acc parallel copyout(c) num_gangs(1)
+  c = a
+  !$acc end parallel
+
+  if (c /= a) call abort
+
+  deallocate (a, c)
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90
new file mode 100644
index 0000000..3521a7f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-2.f90
@@ -0,0 +1,48 @@
+! Test declare create with allocatable scalars.
+
+! { dg-do run }
+
+program main
+  use openacc
+  implicit none
+  integer, parameter :: n = 100
+  integer, allocatable :: a, c
+  integer :: i, b(n)
+  !$acc declare create (c)
+
+  allocate (a)
+
+  a = 50
+
+  !$acc parallel loop firstprivate(a)
+  do i = 1, n;
+     b(i) = a
+  end do
+
+  do i = 1, n
+     if (b(i) /= a) call abort
+  end do
+
+  allocate (c)
+  a = 100
+
+  if (.not.acc_is_present(c)) call abort
+
+  !$acc parallel num_gangs(1) present(c)
+  c = a
+  !$acc end parallel
+
+  !$acc update host(c)
+  if (c /= a) call abort
+
+  !$acc parallel loop
+  do i = 1, n
+     b(i) = c
+  end do
+
+  do i = 1, n
+     if (b(i) /= a) call abort
+  end do
+
+  deallocate (a, c)
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90
new file mode 100644
index 0000000..919146a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-3.f90
@@ -0,0 +1,218 @@
+! Test declare create with allocatable arrays.
+
+! { dg-do run }
+
+module vars
+  implicit none
+  integer, parameter :: n = 100
+  real*8, allocatable :: a, b(:)
+ !$acc declare create (a, b)
+end module vars
+
+program test
+  use vars
+  use openacc
+  implicit none
+  integer :: i
+
+  interface
+     subroutine sub1
+       !$acc routine gang
+     end subroutine sub1
+
+     subroutine sub2
+     end subroutine sub2
+
+     real*8 function fun1 (ix)
+       integer ix
+       !$acc routine seq
+     end function fun1
+
+     real*8 function fun2 (ix)
+       integer ix
+       !$acc routine seq
+     end function fun2
+  end interface
+
+  if (allocated (a)) call abort
+  if (allocated (b)) call abort
+
+  ! Test local usage of an allocated declared array.
+
+  allocate (a)
+
+  if (.not.allocated (a)) call abort
+  if (acc_is_present (a) .neqv. .true.) call abort
+
+  allocate (b(n))
+
+  if (.not.allocated (b)) call abort
+  if (acc_is_present (b) .neqv. .true.) call abort
+
+  a = 2.0
+  !$acc update device(a)
+
+  !$acc parallel loop
+  do i = 1, n
+     b(i) = i * a
+  end do
+
+  if (.not.acc_is_present (b)) call abort
+
+  !$acc update host(b)
+
+  do i = 1, n
+     if (b(i) /= i*a) call abort
+  end do
+
+  deallocate (b)
+
+  ! Test the usage of an allocated declared array inside an acc
+  ! routine subroutine.
+
+  allocate (b(n))
+
+  if (.not.allocated (b)) call abort
+  if (acc_is_present (b) .neqv. .true.) call abort
+
+  !$acc parallel
+  call sub1
+  !$acc end parallel
+
+  if (.not.acc_is_present (b)) call abort
+
+  !$acc update host(b)
+
+  do i = 1, n
+     if (b(i) /= a+i*2) call abort
+  end do
+
+  deallocate (b)
+
+  ! Test the usage of an allocated declared array inside a host
+  ! subroutine.
+
+  call sub2
+
+  if (.not.acc_is_present (b)) call abort
+
+  !$acc update host(b)
+
+  do i = 1, n
+     if (b(i) /= 1.0) call abort
+  end do
+
+  deallocate (b)
+
+  if (allocated (b)) call abort
+
+  ! Test the usage of an allocated declared array inside an acc
+  ! routine function.
+
+  allocate (b(n))
+
+  if (.not.allocated (b)) call abort
+  if (acc_is_present (b) .neqv. .true.) call abort
+
+  !$acc parallel loop
+  do i = 1, n
+     b(i) = 1.0
+  end do
+
+  !$acc parallel loop
+  do i = 1, n
+     b(i) = fun1 (i)
+  end do
+
+  if (.not.acc_is_present (b)) call abort
+
+  !$acc update host(b)
+
+  do i = 1, n
+     if (b(i) /= i) call abort
+  end do
+
+  deallocate (b)
+
+  ! Test the usage of an allocated declared array inside a host
+  ! function.
+
+  allocate (b(n))
+
+  if (.not.allocated (b)) call abort
+  if (acc_is_present (b) .neqv. .true.) call abort
+
+  !$acc parallel loop
+  do i = 1, n
+     b(i) = 1.0
+  end do
+
+  !$acc update host(b)
+
+  do i = 1, n
+     b(i) = fun2 (i)
+  end do
+
+  if (.not.acc_is_present (b)) call abort
+
+  do i = 1, n
+     if (b(i) /= i*a) call abort
+  end do
+
+  deallocate (a)
+  deallocate (b)
+end program test
+
+! Set each element in array 'b' at index i to a+i*2.
+
+subroutine sub1 ! { dg-warning "region is worker partitioned" }
+  use vars
+  implicit none
+  integer i
+  !$acc routine gang
+
+  !$acc loop
+  do i = 1, n
+     b(i) = a+i*2
+  end do
+end subroutine sub1
+
+! Allocate array 'b', and set it to all 1.0.
+
+subroutine sub2
+  use vars
+  use openacc
+  implicit none
+  integer i
+
+  allocate (b(n))
+
+  if (.not.allocated (b)) call abort
+  if (acc_is_present (b) .neqv. .true.) call abort
+
+  !$acc parallel loop
+  do i = 1, n
+     b(i) = 1.0
+  end do
+end subroutine sub2
+
+! Return b(i) * i;
+
+real*8 function fun1 (i)
+  use vars
+  implicit none
+  integer i
+  !$acc routine seq
+
+  fun1 = b(i) * i
+end function fun1
+
+! Return b(i) * i * a;
+
+real*8 function fun2 (i)
+  use vars
+  implicit none
+  integer i
+
+  fun2 = b(i) * i * a
+end function fun2
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90
new file mode 100644
index 0000000..b4cf26e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-4.f90
@@ -0,0 +1,66 @@
+! Test declare create with allocatable arrays and scalars.  The unused
+! declared array 'b' caused an ICE in the past.
+
+! { dg-do run }
+
+module vars
+  implicit none
+  integer, parameter :: n = 100
+  real*8, allocatable :: a, b(:)
+ !$acc declare create (a, b)
+end module vars
+
+program test
+  use vars
+  implicit none
+  integer :: i
+
+  interface
+     subroutine sub1
+     end subroutine sub1
+
+     subroutine sub2
+     end subroutine sub2
+
+     real*8 function fun1 (ix)
+       integer ix
+       !$acc routine seq
+     end function fun1
+
+     real*8 function fun2 (ix)
+       integer ix
+       !$acc routine seq
+     end function fun2
+  end interface
+
+  if (allocated (a)) call abort
+  if (allocated (b)) call abort
+
+  ! Test the usage of an allocated declared array inside an acc
+  ! routine subroutine.
+
+  allocate (a)
+  allocate (b(n))
+
+  if (.not.allocated (b)) call abort
+
+  call sub1
+
+  !$acc update self(a)
+  if (a /= 50) call abort
+
+  deallocate (a)
+  deallocate (b)
+
+end program test
+
+! Set 'a' to 50.
+
+subroutine sub1
+  use vars
+  implicit none
+  integer i
+
+  a = 50
+  !$acc update device(a)
+end subroutine sub1

Reply via email to