This patch adds the OpenACC routines acc_attach and acc_detach.

However, in order to avoid the generation of a temporary, which
breaks this feature, a special case had to be added to
gfc_trans_call.

Otherwise, I think it completes the Fortran additions of existing
C/C++ functions, by adding this OpenACC 3.3 feature, which is used
by ICON.

Any comments, suggestions, remarks before I commit this patch?

Tobias
Fortran/OpenACC: Add Fortran support for acc_attach/acc_detach

While C/++ support the routines acc_attach{,_async} and
acc_detach{,_finalize}{,_async} routines since a long time, the Fortran
API routines where only added in OpenACC 3.3.

Unfortunately, they cannot directly be implemented in the library as
GCC will introduce a temporary array descriptor in some cases, which
causes the attempted attachment to the this temporary variable instead
of to the original one.

Therefore, those API routines are handled in a special way in the compiler.

gcc/fortran/ChangeLog:

	* trans-stmt.cc (gfc_trans_call_acc_attach_detach): New.
	(gfc_trans_call): Call it.

libgomp/ChangeLog:

	* libgomp.texi (acc_attach, acc_detach): Update for Fortran
	version.
	* openacc.f90 acc_attach{,_async}, acc_detach{,_finalize}{,_async}:
	Add.
	* openacc_lib.h: Likewise.
	* testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90: New test.
	* testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90: New test.

 gcc/fortran/trans-stmt.cc                          | 74 +++++++++++++++++++++-
 libgomp/libgomp.texi                               | 40 ++++++------
 libgomp/openacc.f90                                | 44 +++++++++++++
 libgomp/openacc_lib.h                              | 42 ++++++++++++
 .../libgomp.oacc-fortran/acc-attach-detach-1.f90   | 25 ++++++++
 .../libgomp.oacc-fortran/acc-attach-detach-2.f90   | 62 ++++++++++++++++++
 6 files changed, 265 insertions(+), 22 deletions(-)

diff --git a/gcc/fortran/trans-stmt.cc b/gcc/fortran/trans-stmt.cc
index 487b7687ef1..f1054015862 100644
--- a/gcc/fortran/trans-stmt.cc
+++ b/gcc/fortran/trans-stmt.cc
@@ -377,6 +377,57 @@ get_intrinsic_for_code (gfc_code *code)
 }
 
 
+/* Handle the OpenACC routines acc_attach{,_async} and
+   acc_detach{,_finalize}{,_async} explicitly.  This is required as the
+   the corresponding device pointee is attached to the corresponding device
+   pointer, but if a temporary array descriptor is created for the call,
+   that one is used as pointer instead of the original pointer.  */
+
+tree
+gfc_trans_call_acc_attach_detach (gfc_code *code)
+{
+  stmtblock_t block;
+  gfc_se ptr_addr_se, async_se;
+  tree fn;
+
+  fn = code->resolved_sym->backend_decl;
+  if (fn == NULL)
+    {
+      fn = gfc_get_symbol_decl (code->resolved_sym);
+      code->resolved_sym->backend_decl = fn;
+    }
+
+  gfc_start_block (&block);
+
+  gfc_init_se (&ptr_addr_se, NULL);
+  ptr_addr_se.descriptor_only = 1;
+  ptr_addr_se.want_pointer = 1;
+  gfc_conv_expr (&ptr_addr_se, code->ext.actual->expr);
+  gfc_add_block_to_block (&block, &ptr_addr_se.pre);
+  if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (ptr_addr_se.expr)))
+    ptr_addr_se.expr = gfc_conv_descriptor_data_get (ptr_addr_se.expr);
+  ptr_addr_se.expr = build_fold_addr_expr (ptr_addr_se.expr);
+
+  bool async = code->ext.actual->next != NULL;
+  if (async)
+    {
+      gfc_init_se (&async_se, NULL);
+      gfc_conv_expr (&async_se, code->ext.actual->next->expr);
+      fn = build_call_expr_loc (gfc_get_location (&code->loc), fn, 2,
+				ptr_addr_se.expr, async_se.expr);
+    }
+  else
+    fn = build_call_expr_loc (gfc_get_location (&code->loc),
+			      fn, 1, ptr_addr_se.expr);
+  gfc_add_expr_to_block (&block, fn);
+  gfc_add_block_to_block (&block, &ptr_addr_se.post);
+  if (async)
+    gfc_add_block_to_block (&block, &async_se.post);
+
+  return gfc_finish_block (&block);
+}
+
+
 /* Translate the CALL statement.  Builds a call to an F95 subroutine.  */
 
 tree
@@ -392,13 +443,32 @@ gfc_trans_call (gfc_code * code, bool dependency_check,
   tree tmp;
   bool is_intrinsic_mvbits;
 
+  gcc_assert (code->resolved_sym);
+
+  /* Unfortunately, acc_attach* and acc_detach* need some special treatment for
+     attaching the the pointee to a pointer as GCC might introduce a temporary
+     array descriptor, whose data component is then used as to be attached to
+     pointer.  */
+  if (flag_openacc
+      && code->resolved_sym->attr.subroutine
+      && code->resolved_sym->formal
+      && code->resolved_sym->formal->sym->ts.type == BT_ASSUMED
+      && code->resolved_sym->formal->sym->attr.dimension
+      && code->resolved_sym->formal->sym->as->type == AS_ASSUMED_RANK
+      && startswith (code->resolved_sym->name, "acc_")
+      && (!strcmp (code->resolved_sym->name + 4, "attach")
+	  || !strcmp (code->resolved_sym->name + 4, "attach_async")
+	  || !strcmp (code->resolved_sym->name + 4, "detach")
+	  || !strcmp (code->resolved_sym->name + 4, "detach_async")
+	  || !strcmp (code->resolved_sym->name + 4, "detach_finalize")
+	  || !strcmp (code->resolved_sym->name + 4, "detach_finalize_async")))
+    return gfc_trans_call_acc_attach_detach (code);
+
   /* A CALL starts a new block because the actual arguments may have to
      be evaluated first.  */
   gfc_init_se (&se, NULL);
   gfc_start_block (&se.pre);
 
-  gcc_assert (code->resolved_sym);
-
   ss = gfc_ss_terminator;
   if (code->resolved_sym->attr.elemental)
     ss = gfc_walk_elemental_function_args (ss, code->ext.actual,
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 9f53f167e06..5518033f1f3 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -5967,19 +5967,19 @@ address to pointing to the corresponding device data.
 @item @emph{Prototype}: @tab @code{void acc_attach_async(h_void **ptr_addr, int async);}
 @end multitable
 
-@c @item @emph{Fortran}:
-@c @multitable @columnfractions .20 .80
-@c @item @emph{Interface}: @tab @code{subroutine acc_attach(ptr_addr)}
-@c @item @emph{Interface}: @tab @code{subroutine acc_attach_async(ptr_addr, async_arg)}
-@c @item                   @tab @code{type(*), dimension(..) :: ptr_addr}
-@c @item                   @tab @code{integer(acc_handle_kind), value :: async_arg}
-@c @end multitable
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{subroutine acc_attach(ptr_addr)}
+@item @emph{Interface}: @tab @code{subroutine acc_attach_async(ptr_addr, async_arg)}
+@item                   @tab @code{type(*), dimension(..) :: ptr_addr}
+@item                   @tab @code{integer(acc_handle_kind), value :: async_arg}
+@end multitable
 
 @item @emph{Reference}:
 @uref{https://www.openacc.org, OpenACC specification v2.6}, section
 3.2.34.
-@c  @uref{https://www.openacc.org, OpenACC specification v3.3}, section
-@c 3.2.29.
+ @uref{https://www.openacc.org, OpenACC specification v3.3}, section
+3.2.29.
 @end table
 
 
@@ -5999,21 +5999,21 @@ address to pointing to the corresponding host data.
 @item @emph{Prototype}: @tab @code{void acc_detach_finalize_async(h_void **ptr_addr, int async);}
 @end multitable
 
-@c @item @emph{Fortran}:
-@c @multitable @columnfractions .20 .80
-@c @item @emph{Interface}: @tab @code{subroutine acc_detach(ptr_addr)}
-@c @item @emph{Interface}: @tab @code{subroutine acc_detach_async(ptr_addr, async_arg)}
-@c @item @emph{Interface}: @tab @code{subroutine acc_detach_finalize(ptr_addr)}
-@c @item @emph{Interface}: @tab @code{subroutine acc_detach_finalize_async(ptr_addr, async_arg)}
-@c @item                   @tab @code{type(*), dimension(..) :: ptr_addr}
-@c @item                   @tab @code{integer(acc_handle_kind), value :: async_arg}
-@c @end multitable
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{subroutine acc_detach(ptr_addr)}
+@item @emph{Interface}: @tab @code{subroutine acc_detach_async(ptr_addr, async_arg)}
+@item @emph{Interface}: @tab @code{subroutine acc_detach_finalize(ptr_addr)}
+@item @emph{Interface}: @tab @code{subroutine acc_detach_finalize_async(ptr_addr, async_arg)}
+@item                   @tab @code{type(*), dimension(..) :: ptr_addr}
+@item                   @tab @code{integer(acc_handle_kind), value :: async_arg}
+@end multitable
 
 @item @emph{Reference}:
 @uref{https://www.openacc.org, OpenACC specification v2.6}, section
 3.2.35.
-@c  @uref{https://www.openacc.org, OpenACC specification v3.3}, section
-@c 3.2.29.
+@uref{https://www.openacc.org, OpenACC specification v3.3}, section
+3.2.29.
 @end table
 
 
diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90
index 9d51f017985..3f2db45617b 100644
--- a/libgomp/openacc.f90
+++ b/libgomp/openacc.f90
@@ -798,6 +798,8 @@ module openacc
   public :: acc_memcpy_to_device, acc_memcpy_to_device_async
   public :: acc_memcpy_from_device, acc_memcpy_from_device_async
   public :: acc_memcpy_device, acc_memcpy_device_async
+  public :: acc_attach, acc_attach_async, acc_detach, acc_detach_async
+  public :: acc_detach_finalize, acc_detach_finalize_async
 
   integer, parameter :: openacc_version = 201711
 
@@ -1068,6 +1070,48 @@ module openacc
     end subroutine
   end interface
 
+  interface
+    subroutine acc_attach (ptr_addr) bind(C)
+      type(*), dimension(..) :: ptr_addr
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_attach_async (ptr_addr, async_arg) bind(C)
+      import :: acc_handle_kind
+      type(*), dimension(..) :: ptr_addr
+      integer(acc_handle_kind), value :: async_arg
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_detach (ptr_addr) bind(C)
+      type(*), dimension(..) :: ptr_addr
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_detach_async (ptr_addr, async_arg) bind(C)
+      import :: acc_handle_kind
+      type(*), dimension(..) :: ptr_addr
+      integer(acc_handle_kind), value :: async_arg
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_detach_finalize (ptr_addr) bind(C)
+      type(*), dimension(..) :: ptr_addr
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_detach_finalize_async (ptr_addr, async_arg) bind(C)
+      import :: acc_handle_kind
+      type(*), dimension(..) :: ptr_addr
+      integer(acc_handle_kind), value :: async_arg
+    end subroutine
+  end interface
+
   interface acc_copyin_async
     procedure :: acc_copyin_async_32_h
     procedure :: acc_copyin_async_64_h
diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h
index 9333c481502..dbdc4d7bc40 100644
--- a/libgomp/openacc_lib.h
+++ b/libgomp/openacc_lib.h
@@ -707,3 +707,45 @@
           integer (acc_handle_kind) async_
         end subroutine
       end interface
+
+      interface
+        subroutine acc_attach (ptr_addr) bind(C)
+          type(*), dimension(..) :: ptr_addr
+        end subroutine
+      end interface
+
+      interface
+        subroutine acc_attach_async (ptr_addr, async_arg) bind(C)
+          import :: acc_handle_kind
+          type(*), dimension(..) :: ptr_addr
+          integer(acc_handle_kind), value :: async_arg
+        end subroutine
+      end interface
+
+      interface
+        subroutine acc_detach (ptr_addr) bind(C)
+          type(*), dimension(..) :: ptr_addr
+        end subroutine
+      end interface
+
+      interface
+        subroutine acc_detach_async (ptr_addr, async_arg) bind(C)
+          import :: acc_handle_kind
+          type(*), dimension(..) :: ptr_addr
+          integer(acc_handle_kind), value :: async_arg
+        end subroutine
+      end interface
+
+      interface
+        subroutine acc_detach_finalize (ptr_addr) bind(C)
+          type(*), dimension(..) :: ptr_addr
+        end subroutine
+      end interface
+
+      interface
+        subroutine acc_detach_finalize_async(ptr_addr, async_arg)bind(C)
+          import :: acc_handle_kind
+          type(*), dimension(..) :: ptr_addr
+          integer(acc_handle_kind), value :: async_arg
+        end subroutine
+      end interface
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90
new file mode 100644
index 00000000000..15393b456c8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90
@@ -0,0 +1,25 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-original" }
+
+use openacc
+implicit none (type, external)
+integer,pointer :: a, b(:)
+integer,allocatable :: c, d(:)
+
+call acc_attach(a)  ! ICE
+call acc_attach_async(b, 4)
+call acc_attach(c)
+
+call acc_detach(a)
+call acc_detach_async(b, 4)
+call acc_detach_finalize(c)
+call acc_detach_finalize_async(d,7)
+end
+
+! { dg-final { scan-tree-dump-times "acc_attach \\(&a\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_attach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_attach \\(&c\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach \\(&a\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach_finalize \\(&c\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach_finalize_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\* restrict\\) d.data, 7\\);" 1 "original" } }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90
new file mode 100644
index 00000000000..b2204ac4467
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90
@@ -0,0 +1,62 @@
+! { dg-do run }
+
+use openacc
+implicit none (type, external)
+integer, target :: tgt_a, tgt_b(5)
+
+integer, pointer :: p1, p2(:)
+
+type t
+  integer,pointer :: a => null ()
+  integer,pointer :: b(:) => null ()
+  integer,allocatable :: c, d(:)
+end type t
+
+type(t), target :: var
+
+tgt_a = 51
+tgt_b = [11,22,33,44,55]
+
+var%b => tgt_b
+!$acc enter data copyin(var, tgt_a, tgt_b)
+var%a => tgt_a
+
+call acc_attach(var%a)
+call acc_attach(var%b)
+
+!$acc serial
+! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 }
+  if (var%a /= 51) stop 1
+  if (any (var%b /= [11,22,33,44,55])) stop 2
+!$acc end serial
+
+call acc_detach(var%a)
+call acc_detach(var%b)
+
+!$acc exit data delete(var, tgt_a, tgt_b)
+
+var%c = 9
+var%d = [1,2,3]
+
+p1 => var%c
+p2 => var%d
+
+!$acc enter data copyin(p1, p2)
+!$acc enter data copyin(var)
+call acc_attach(var%c)
+call acc_attach(var%d)
+
+!$acc serial
+! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 }
+  if (var%c /= 9) stop 3
+  if (any (var%d /= [1,2,3])) stop 4
+!$acc end serial
+
+call acc_detach(var%c)
+call acc_detach(var%d)
+
+!$acc exit data delete(var, p1, p2)
+
+deallocate(var%d)
+
+end

Reply via email to