This patch flips the switch to enable worker partitioning on AMD GCN. OK?
Thanks, Julian ChangeLog gcc/ * config/gcn/gcn.c (gcn_goacc_validate_dims): Remove no-flag_worker-partitioning assertion. (TARGET_GOACC_WORKER_PARTITIONING): Define target hook to true. * config/gcn/gcn.opt (flag_worker_partitioning): Change default to 1. libgomp/ * plugin/plugin-gcn.c (gcn_exec): Change default number of workers to 16. --- gcc/config/gcn/gcn.c | 4 ++-- gcc/config/gcn/gcn.opt | 2 +- libgomp/plugin/plugin-gcn.c | 4 +--- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c index 3584ac85021..6e45032ea3a 100644 --- a/gcc/config/gcn/gcn.c +++ b/gcc/config/gcn/gcn.c @@ -4692,8 +4692,6 @@ gcn_goacc_validate_dims (tree decl, int dims[], int fn_level, /* FIXME: remove -facc-experimental-workers when they're ready. */ int max_workers = flag_worker_partitioning ? 16 : 1; - gcc_assert (!flag_worker_partitioning); - /* The vector size must appear to be 64, to the user, unless this is a SEQ routine. The real, internal value is always 1, which means use autovectorization, but the user should not see that. */ @@ -6072,6 +6070,8 @@ print_operand (FILE *file, rtx x, int code) #define TARGET_GOACC_REDUCTION gcn_goacc_reduction #undef TARGET_GOACC_VALIDATE_DIMS #define TARGET_GOACC_VALIDATE_DIMS gcn_goacc_validate_dims +#undef TARGET_GOACC_WORKER_PARTITIONING +#define TARGET_GOACC_WORKER_PARTITIONING true #undef TARGET_HARD_REGNO_MODE_OK #define TARGET_HARD_REGNO_MODE_OK gcn_hard_regno_mode_ok #undef TARGET_HARD_REGNO_NREGS diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt index 402deb625bd..bdc878f35ad 100644 --- a/gcc/config/gcn/gcn.opt +++ b/gcc/config/gcn/gcn.opt @@ -65,7 +65,7 @@ Target Report RejectNegative Var(flag_bypass_init_error) bool flag_worker_partitioning = false macc-experimental-workers -Target Report Var(flag_worker_partitioning) Init(0) +Target Report Var(flag_worker_partitioning) Init(1) int stack_size_opt = -1 diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index c4347dfa45d..3368d7e261a 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -3097,10 +3097,8 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs, problem size, so let's do a reasonable number of single-worker gangs. 64 gangs matches a typical Fiji device. */ - /* NOTE: Until support for middle-end worker partitioning is merged, use 1 - for the default number of workers. */ if (dims[0] == 0) dims[0] = get_cu_count (kernel->agent); /* Gangs. */ - if (dims[1] == 0) dims[1] = 1; /* Workers. */ + if (dims[1] == 0) dims[1] = 16; /* Workers. */ /* The incoming dimensions are expressed in terms of gangs, workers, and vectors. The HSA dimensions are expressed in terms of "work-items", -- 2.23.0