The AMD GCN runtime must be set to the correct mode for Unified Shared Memory
to work, but this is not always clear at compile and link time due to the split
nature of the offload compilation pipeline.
This patch places a marker in the assembler code for mkoffload to find.
mkoffload then augments a constructor function into the final program to ensure
that the HSA_XNACK environment variable passes the correct mode to the GPU.
The HSA_XNACK variable must be set before the HSA runtime is even loaded, so
it makes more sense to have this set within the constructor than at some point
later within libgomp or the GCN plugin.
Other toolchains require the end-user to set HSA_XNACK manually (or else wonder
why it's not working), so the constructor also checks that any existing manual
setting is compatible with the binary's requirements.
gcc/ChangeLog:
* config/gcn/gcn.cc (gcn_init_cumulative_args): Emit a warning if the
-mxnack setting looks wrong.
(gcn_hsa_declare_function_name): Emit "MKOFFLOAD OPTIONS: XNACK+".
* config/gcn/mkoffload.cc (process_asm): Detect "MKOFFLOAD OPTIONS".
Emit HSA_XNACK code into mkoffload_setup, as required.
(main) Modify HSACO_ATTR_OFF to preserve user-set -mxnack.
---
I believe I can approve this one myself, but at least Tobias probably
has an opinion how this feature should go. :)
OK for mainline?
Andrew
gcc/config/gcn/gcn.cc | 16 +++++++++++++-
gcc/config/gcn/mkoffload.cc | 43 +++++++++++++++++++++++++++++++------
2 files changed, 52 insertions(+), 7 deletions(-)
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 1e04074d78b..f7a029c5d12 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -2940,6 +2940,15 @@ gcn_init_cumulative_args (CUMULATIVE_ARGS *cum /*
Argument info to init */ ,
if (!caller && cfun->machine->normal_function)
gcn_detect_incoming_pointer_arg (fndecl);
+ if ((omp_requires_mask & OMP_REQUIRES_UNIFIED_SHARED_MEMORY)
+ && gcn_devices[gcn_arch].xnack_default != HSACO_ATTR_UNSUPPORTED
+ && flag_xnack == HSACO_ATTR_OFF)
+ {
+ warning_at (UNKNOWN_LOCATION, 0,
+ "Unified Shared Memory is enabled, but XNACK is disabled");
+ inform (UNKNOWN_LOCATION, "Try -foffload-options=-mxnack=any");
+ }
+
reinit_regs ();
}
@@ -6912,12 +6921,17 @@ gcn_hsa_declare_function_name (FILE *file, const char
*name,
fputs (",@function\n", file);
ASM_OUTPUT_FUNCTION_LABEL (file, name, decl);
- /* This comment is read by mkoffload. */
+ /* These comments are read by mkoffload. */
if (flag_openacc)
fprintf (file, "\t;; OPENACC-DIMS: %d, %d, %d : %s\n",
oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_GANG),
oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_WORKER),
oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_VECTOR), name);
+ if (flag_xnack == HSACO_ATTR_ON
+ || (omp_requires_mask & OMP_REQUIRES_UNIFIED_SHARED_MEMORY
+ && gcn_devices[gcn_arch].xnack_default != HSACO_ATTR_UNSUPPORTED))
+ /* If a device supports XNACK then it will be needed for USM. */
+ fprintf (asm_out_file, "\t;; MKOFFLOAD OPTIONS: XNACK+\n");
}
/* Implement TARGET_ASM_INIT_SECTIONS. */
diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index 222adb2cd41..c9bf13c2da5 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -445,6 +445,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
{
int fn_count = 0, var_count = 0, ind_fn_count = 0;
int dims_count = 0, regcount_count = 0;
+ bool xnack_required = false;
struct obstack fns_os, dims_os, regcounts_os;
obstack_init (&fns_os);
obstack_init (&dims_os);
@@ -469,6 +470,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
fn_count += 2;
char buf[1000];
+ char dummy;
enum
{ IN_CODE,
IN_METADATA,
@@ -489,6 +491,9 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
dims_count++;
}
+ if (sscanf (buf, " ;; MKOFFLOAD OPTIONS: XNACK+%c", &dummy) > 0)
+ xnack_required = true;
+
break;
}
case IN_METADATA:
@@ -549,7 +554,6 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
}
}
- char dummy;
if (sscanf (buf, " .section .gnu.offload_vars%c", &dummy) > 0)
{
state = IN_VARS;
@@ -615,11 +619,14 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
struct oaccdims *dims = XOBFINISH (&dims_os, struct oaccdims *);
struct regcount *regcounts = XOBFINISH (®counts_os, struct regcount *);
+ bool use_xnack = (!TEST_XNACK_ANY (elf_flags) || xnack_required);
+
+ if (gcn_stack_size || use_xnack)
+ fprintf (cfile, "#include <stdlib.h>\n");
if (gcn_stack_size)
- {
- fprintf (cfile, "#include <stdlib.h>\n");
- fprintf (cfile, "#include <stdbool.h>\n\n");
- }
+ fprintf (cfile, "#include <stdbool.h>\n\n");
+ if (use_xnack)
+ fprintf (cfile, "#include <stdio.h>\n\n");
fprintf (cfile, "static const int gcn_num_vars = %d;\n\n", var_count);
fprintf (cfile, "static const int gcn_num_ind_funcs = %d;\n\n",
ind_fn_count);
@@ -677,6 +684,29 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
" setenv (\"GCN_STACK_SIZE\", \"%d\", true);\n",
gcn_stack_size);
+ /* Emit a constructor function to set the HSA_XNACK environment variable.
+ This must be done before the ROCr runtime library is loaded.
+ We never override a user value (except empty string), but we do emit a
+ useful diagnostic in the wrong mode (the ROCr message is not good. */
+ if (TEST_XNACK_OFF (elf_flags) && xnack_required)
+ fatal_error (input_location,
+ "conflicting settings; XNACK is forced off but Unified "
+ "Shared Memory is on");
+ if (!TEST_XNACK_ANY (elf_flags) || xnack_required)
+ fprintf (cfile,
+ "\n"
+ " const char *xn_var = getenv (\"HSA_XNACK\");\n"
+ " if (!xn_var || xn_var[0] == '\\0')\n"
+ " setenv (\"HSA_XNACK\", \"%d\", true);\n"
+ " else if (%s)\n"
+ " fprintf (stderr, \"warning: HSA_XNACK=%%s is incompatible; "
+ "the GPU kernel may revert to host fall-back\\n\", "
+ "xn_var);\n",
+ xnack_required || TEST_XNACK_ON (elf_flags),
+ (xnack_required || TEST_XNACK_ON (elf_flags)
+ ? "xn_var[0] != '1' || xn_var[1] != '\\0'"
+ : "xn_var[0] == '1' && xn_var[1] == '\\0'"));
+
/* End of mkoffload_setup function. */
fprintf (cfile, "}\n\n");
@@ -1116,7 +1146,8 @@ main (int argc, char **argv)
#define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, SRAM, ...) \
case ELF: XNACK; break;
#define HSACO_ATTR_UNSUPPORTED SET_XNACK_UNSET (elf_flags)
-#define HSACO_ATTR_OFF SET_XNACK_OFF (elf_flags)
+#define HSACO_ATTR_OFF \
+ if (TEST_XNACK_UNSET (elf_flags)) SET_XNACK_OFF (elf_flags)
#define HSACO_ATTR_ANY \
if (TEST_XNACK_UNSET (elf_flags)) SET_XNACK_ANY (elf_flags)
#include "gcn-devices.def"
--
2.51.0