If an offload kernel uses a large number of VGPRs, AMD GCN hardware may
need to limit the number of threads/workers launched for that kernel.
The number of SGPRs/VGPRs in use is detected by mkoffload and recorded in
the processed output.  The patterns emitted detailing SGPR/VGPR occupancy
changed between HSACO v2 and v3 though, so this patch updates parsing
to account for that.

OK for og10 branch? (I will repost for mainline after re-testing, etc.)

Julian

ChangeLog

        gcc/
        * config/gcn/mkoffload.c (process_asm): Initialise regcount.  Update
        scanning for SGPR/VGPR usage for HSACO v3.
---
 gcc/config/gcn/mkoffload.c | 40 ++++++++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/gcc/config/gcn/mkoffload.c b/gcc/config/gcn/mkoffload.c
index 723da108b655..48a86c719532 100644
--- a/gcc/config/gcn/mkoffload.c
+++ b/gcc/config/gcn/mkoffload.c
@@ -230,7 +230,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
     int sgpr_count;
     int vgpr_count;
     char *kernel_name;
-  } regcount;
+  } regcount = { -1, -1, NULL };
 
   /* Always add _init_array and _fini_array as kernels.  */
   obstack_ptr_grow (&fns_os, xstrdup ("_init_array"));
@@ -238,7 +238,12 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
   fn_count += 2;
 
   char buf[1000];
-  enum { IN_CODE, IN_AMD_KERNEL_CODE_T, IN_VARS, IN_FUNCS } state = IN_CODE;
+  enum
+    { IN_CODE,
+      IN_METADATA,
+      IN_VARS,
+      IN_FUNCS
+    } state = IN_CODE;
   while (fgets (buf, sizeof (buf), in))
     {
       switch (state)
@@ -251,21 +256,25 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
                obstack_grow (&dims_os, &dim, sizeof (dim));
                dims_count++;
              }
-           else if (sscanf (buf, " .amdgpu_hsa_kernel %ms\n",
-                            &regcount.kernel_name) == 1)
-             break;
 
            break;
          }
-       case IN_AMD_KERNEL_CODE_T:
+       case IN_METADATA:
          {
-           gcc_assert (regcount.kernel_name);
-           if (sscanf (buf, " wavefront_sgpr_count = %d\n",
-                       &regcount.sgpr_count) == 1)
+           if (sscanf (buf, " - .name: %ms\n", &regcount.kernel_name) == 1)
              break;
-           else if (sscanf (buf, " workitem_vgpr_count = %d\n",
+           else if (sscanf (buf, " .sgpr_count: %d\n",
+                            &regcount.sgpr_count) == 1)
+             {
+               gcc_assert (regcount.kernel_name);
+               break;
+             }
+           else if (sscanf (buf, " .vgpr_count: %d\n",
                             &regcount.vgpr_count) == 1)
-             break;
+             {
+               gcc_assert (regcount.kernel_name);
+               break;
+             }
 
            break;
          }
@@ -306,9 +315,10 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
        state = IN_VARS;
       else if (sscanf (buf, " .section .gnu.offload_funcs%c", &dummy) > 0)
        state = IN_FUNCS;
-      else if (sscanf (buf, " .amd_kernel_code_%c", &dummy) > 0)
+      else if (sscanf (buf, " .amdgpu_metadata%c", &dummy) > 0)
        {
-         state = IN_AMD_KERNEL_CODE_T;
+         state = IN_METADATA;
+         regcount.kernel_name = NULL;
          regcount.sgpr_count = regcount.vgpr_count = -1;
        }
       else if (sscanf (buf, " .section %c", &dummy) > 0
@@ -317,7 +327,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
               || sscanf (buf, " .data%c", &dummy) > 0
               || sscanf (buf, " .ident %c", &dummy) > 0)
        state = IN_CODE;
-      else if (sscanf (buf, " .end_amd_kernel_code_%c", &dummy) > 0)
+      else if (sscanf (buf, " .end_amdgpu_metadata%c", &dummy) > 0)
        {
          state = IN_CODE;
          gcc_assert (regcount.kernel_name != NULL
@@ -329,7 +339,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
          regcount.sgpr_count = regcount.vgpr_count = -1;
        }
 
-      if (state == IN_CODE || state == IN_AMD_KERNEL_CODE_T)
+      if (state == IN_CODE || state == IN_METADATA)
        fputs (buf, out);
     }
 
-- 
2.23.0

Reply via email to