So far, the GCN plugin has used a kernel_dispatch struct instance it
calls "shadow" to keep effectively a copy of part of the HSA dispatch
packet before populating said packet.  It also allocated it on the heap.

This, at first glance, seems useless: why double up the data in a shadow
when it's already in packet?

But, it serves a purpose.  The packet is owned by the HSA runtime.
After dispatch, its contents are to be considered no longer accessible
by the dispatcher (i.e. run_kernel).  So, we can't read back from it the
addresses or handles of resources we allocated, and so, we can't clean
them up.

However, this allocation doesn't need to happen on the heap.  It's of a
known fixed size, and its lifetime is the same as the lifetime of an
automatic variable.

This patch demotes the heap allocation into an automatic variable, and
adds commentary to make it clear what the purpose of this "shadow" is.
In the end, the result of this patch is that the run_kernel hot path has
one fewer allocation.

I've also taken the opportunity to do some very minor code cleanup.

libgomp/ChangeLog:

        * plugin/plugin-gcn.c (struct kernel_dispatch): Store
        hsa_signal_t, rather than a uint64_t, so that we don't rely on
        knowledge of the contents of hsa_signal_t.
        (create_kernel_dispatch): Rename...
        (prepare_kernel_dispatch): ... to this, as it no longer creates
        a kernel dispatch.  The allocation that would've created it is
        hoisted...
        (run_kernel): ... here, as an automatic variable.  Move logic
        that copies the fields of kernel_dispatch...
        (populate_packet_from_dispatch): ... into this standalone
        function, to make it clearer.
        (release_kernel_dispatch): Rename....
        (cleanup_kernel_dispatch): ... to this, don't free 'shadow'.
---
 libgomp/plugin/plugin-gcn.c | 64 ++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 29 deletions(-)

diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 99ba65e14243..af35b06f83af 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -274,8 +274,11 @@ struct GOMP_kernel_launch_attributes
   uint32_t wdims[3];
 };
 
-/* Collection of information needed for a dispatch of a kernel from a
-   kernel.  */
+/* Collection of information needed for a dispatch of a kernel from a kernel.
+   Redundant with parts of hsa_kernel_dispatch_packet_t.  It is maintained
+   separately because the contents of the aforementioned packet become
+   unspecified after dispatch, so, we can't re-read back pointers we wrote into
+   the dispatch packet in order to clean them up.  */
 
 struct kernel_dispatch
 {
@@ -285,7 +288,7 @@ struct kernel_dispatch
   /* Kernel object.  */
   uint64_t object;
   /* Synchronization signal used for dispatch synchronization.  */
-  uint64_t signal;
+  hsa_signal_t signal;
   /* Private segment size.  */
   uint32_t private_segment_size;
   /* Group segment size.  */
@@ -2125,13 +2128,12 @@ alloc_by_agent (struct agent_info *agent, size_t size)
 /* Create kernel dispatch data structure for given KERNEL, along with
    the necessary device signals and memory allocations.  */
 
-static struct kernel_dispatch *
-create_kernel_dispatch (struct kernel_info *kernel, int num_teams,
-                       int num_threads, struct kernargs *kernargs)
+static inline void
+prepare_kernel_dispatch (struct kernel_dispatch *shadow,
+                        struct kernel_info *kernel, int num_teams,
+                        int num_threads, struct kernargs *kernargs)
 {
   struct agent_info *agent = kernel->agent;
-  struct kernel_dispatch *shadow
-    = GOMP_PLUGIN_malloc_cleared (sizeof (struct kernel_dispatch));
 
   shadow->agent = kernel->agent;
   shadow->object = kernel->object;
@@ -2141,7 +2143,7 @@ create_kernel_dispatch (struct kernel_info *kernel, int 
num_teams,
   if (status != HSA_STATUS_SUCCESS)
     hsa_fatal ("Error creating the GCN sync signal", status);
 
-  shadow->signal = sync_signal.handle;
+  shadow->signal = sync_signal;
   shadow->private_segment_size = kernel->private_segment_size;
 
   if (lowlat_size < 0)
@@ -2169,7 +2171,7 @@ create_kernel_dispatch (struct kernel_info *kernel, int 
num_teams,
   if (kernel->kernarg_segment_size > 8)
     {
       GOMP_PLUGIN_fatal ("Unexpectedly large kernargs segment requested");
-      return NULL;
+      return;
     }
 
   /* Zero-initialize the output_data (minimum needed).  */
@@ -2190,8 +2192,19 @@ create_kernel_dispatch (struct kernel_info *kernel, int 
num_teams,
 
   /* Ensure we can recognize unset return values.  */
   kernargs->output_data.return_value = 0xcafe0000;
+}
 
-  return shadow;
+/* Copy information from DISPATCH into PACKET, to get it ready for
+   dispatching.  */
+
+static inline void
+populate_packet_from_dispatch (hsa_kernel_dispatch_packet_t *packet,
+                              struct kernel_dispatch *shadow)
+{
+  packet->private_segment_size = shadow->private_segment_size;
+  packet->group_segment_size = shadow->group_segment_size;
+  packet->kernel_object = shadow->object;
+  packet->completion_signal = shadow->signal;
 }
 
 static void
@@ -2265,7 +2278,7 @@ console_output (struct kernel_info *kernel, struct 
kernargs *kernargs,
    and clean up the signal and memory allocations.  */
 
 static inline void
-release_kernel_dispatch (struct kernel_dispatch *shadow,
+cleanup_kernel_dispatch (struct kernel_dispatch *shadow,
                         struct kernargs *kernargs)
 {
   GCN_DEBUG ("Released kernel dispatch: %p\n", shadow);
@@ -2275,11 +2288,7 @@ release_kernel_dispatch (struct kernel_dispatch *shadow,
     addr = (void *)kernargs->abi.stack_ptr;
   release_ephemeral_memories (shadow->agent, addr);
 
-  hsa_signal_t s;
-  s.handle = shadow->signal;
-  hsa_fns.hsa_signal_destroy_fn (s);
-
-  free (shadow);
+  hsa_fns.hsa_signal_destroy_fn (shadow->signal);
 }
 
 /* Extract the properties from a kernel binary.  */
@@ -2505,23 +2514,20 @@ run_kernel (struct gomp_offload_session *session,
             packet->workgroup_size_x, packet->workgroup_size_y,
             packet->workgroup_size_z);
 
-  struct kernel_dispatch *shadow
-    = create_kernel_dispatch (kernel, packet->grid_size_x,
-                             packet->grid_size_z, kernargs);
-  shadow->queue = command_q;
+  struct kernel_dispatch shadow;
+  prepare_kernel_dispatch (&shadow, kernel, packet->grid_size_x,
+                          packet->grid_size_z, kernargs);
+  shadow.queue = command_q;
 
   if (debug)
     {
       fprintf (stderr, "\nKernel has following dependencies:\n");
-      print_kernel_dispatch (shadow, 2, kernargs);
+      print_kernel_dispatch (&shadow, 2, kernargs);
     }
 
-  packet->private_segment_size = shadow->private_segment_size;
-  packet->group_segment_size = shadow->group_segment_size;
-  packet->kernel_object = shadow->object;
-  hsa_signal_t s;
-  s.handle = shadow->signal;
-  packet->completion_signal = s;
+  populate_packet_from_dispatch (packet, &shadow);
+
+  hsa_signal_t s = shadow.signal;
   hsa_fns.hsa_signal_store_relaxed_fn (s, 1);
 
   GCN_DEBUG ("Copying kernel runtime pointer %p to kernarg_address\n", 
session->target_var_table);
@@ -2555,7 +2561,7 @@ run_kernel (struct gomp_offload_session *session,
 
   unsigned int return_value = (unsigned int)kernargs->output_data.return_value;
 
-  release_kernel_dispatch (shadow, kernargs);
+  cleanup_kernel_dispatch (&shadow, kernargs);
   release_session (session);
 
   if (!module_locked && pthread_rwlock_unlock (&agent->module_rwlock))
-- 
2.54.0

Reply via email to