Hi, this patch simplifies the logic in nvptx_single.
Build x86_64 with nvptx accelerator and tested libgomp. Thanks, - Tom
[nvptx] Simplifly logic in nvptx_single 2018-04-12 Tom de Vries <t...@codesourcery.com> * config/nvptx/nvptx.c (nvptx_single): Simplify init of vector variable. Add and use variable use_partitioning_p. --- gcc/config/nvptx/nvptx.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 3c48c14..547022e 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -4305,22 +4305,24 @@ nvptx_single (unsigned mask, basic_block from, basic_block to) we should never have worker mode only. */ broadcast_data_t data; unsigned size = GET_MODE_SIZE (SImode); - bool vector = true; + bool vector = (GOMP_DIM_MASK (GOMP_DIM_VECTOR) == mask) != 0; rtx barrier = GEN_INT (0); int threads = 0; - if (GOMP_DIM_MASK (GOMP_DIM_WORKER) == mask) - vector = false; - data.base = oacc_bcast_sym; data.ptr = 0; - if (vector - && nvptx_mach_max_workers () > 1 - && cfun->machine->bcast_partition) - data.base = cfun->machine->bcast_partition; - + bool use_partitioning_p = (vector + && nvptx_mach_max_workers () > 1 + && cfun->machine->bcast_partition); + if (use_partitioning_p) + { + data.base = cfun->machine->bcast_partition; + barrier = cfun->machine->sync_bar; + threads = nvptx_mach_vector_length (); + } gcc_assert (data.base != NULL); + gcc_assert (barrier); unsigned int psize = ROUND_UP (size, oacc_bcast_align); unsigned int pnum = (nvptx_mach_vector_length () > PTX_WARP_SIZE @@ -4335,14 +4337,6 @@ nvptx_single (unsigned mask, basic_block from, basic_block to) vector), before); - if (vector - && nvptx_mach_max_workers () > 1 - && cfun->machine->sync_bar) - { - barrier = cfun->machine->sync_bar; - threads = nvptx_mach_vector_length (); - } - /* Barrier so other workers can see the write. */ emit_insn_before (nvptx_cta_sync (barrier, threads), tail); data.offset = 0;