Re: [patch] various OpenACC reduction enhancements - ME and nvptx changes
On Tue, 4 Dec 2018 16:55:04 +0100 Tom de Vries wrote: > On 04-12-18 13:29, Jakub Jelinek wrote: > > On Fri, Jun 29, 2018 at 11:19:53AM -0700, Cesar Philippidis wrote: > >> The attached patch includes the nvptx and GCC ME reductions > >> enhancements. > >> > >> Is this patch OK for trunk? It bootstrapped / regression tested > >> cleanly for x86_64 with nvptx offloading. > > This is all OpenACC specific code not really shareable with OpenMP, > > if Thomas (for middle-end) and Tom (for NVPTX backend) are ok with > > it, it is ok for trunk. > > > > Formatting needs to be fixed: > ... > There should be exactly one space between function name and > parenthesis. 160:+ unsigned old_shift = DIM_SIZE(VECTOR); > ... > > Also, the updated patch does not address my comment about > probabilities here > ( https://gcc.gnu.org/ml/gcc-patches/2018-10/msg00325.html ): ... > > + /* Create the loop. */ > > + post_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU; > > Edges need probabilities, as in nvptx_lockless_update, > nvptx_lockfull_update and nvptx_goacc_reduction_init. > ... Something like the attached? Tested alongside other revised patches in the series: https://gcc.gnu.org/ml/gcc-patches/2018-12/msg00930.html https://gcc.gnu.org/ml/gcc-patches/2018-12/msg00931.html (except the lines adding edge probabilities, which I've smoke-tested but haven't yet gone through a full test cycle). Thanks, Julian ChangeLog gcc/ * config/nvptx/nvptx.c (nvptx_propagate_unified): New. (nvptx_split_blocks): Call it for cond_uni insn. (nvptx_expand_cond_uni): New. (enum nvptx_builtins): Add NVPTX_BUILTIN_COND_UNI. (nvptx_init_builtins): Initialize it. (nvptx_expand_builtin): (nvptx_generate_vector_shuffle): Change integral SHIFT operand to tree BITS operand. (nvptx_vector_reduction): New. (nvptx_adjust_reduction_type): New. (nvptx_goacc_reduction_setup): Use it to adjust the type of ref_to_res. (nvptx_goacc_reduction_init): Don't update LHS if it doesn't exist. (nvptx_goacc_reduction_fini): Call nvptx_vector_reduction for vector. Use it to adjust the type of ref_to_res. (nvptx_goacc_reduction_teardown): * config/nvptx/nvptx.md (cond_uni): New pattern. commit 401876d422c4fa7f02c1b899e81568eea6ad7531 Author: Julian Brown Date: Tue Dec 11 13:35:52 2018 -0800 Various OpenACC reduction enhancements - ME and nvptx changes gcc/ * config/nvptx/nvptx.c (nvptx_propagate_unified): New. (nvptx_split_blocks): Call it for cond_uni insn. (nvptx_expand_cond_uni): New. (enum nvptx_builtins): Add NVPTX_BUILTIN_COND_UNI. (nvptx_init_builtins): Initialize it. (nvptx_expand_builtin): (nvptx_generate_vector_shuffle): Change integral SHIFT operand to tree BITS operand. (nvptx_vector_reduction): New. (nvptx_adjust_reduction_type): New. (nvptx_goacc_reduction_setup): Use it to adjust the type of ref_to_res. (nvptx_goacc_reduction_init): Don't update LHS if it doesn't exist. (nvptx_goacc_reduction_fini): Call nvptx_vector_reduction for vector. Use it to adjust the type of ref_to_res. (nvptx_goacc_reduction_teardown): * config/nvptx/nvptx.md (cond_uni): New pattern. diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 9903a27..0023dad 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -2863,6 +2863,52 @@ nvptx_reorg_uniform_simt () } } +/* UNIFIED is a cond_uni insn. Find the branch insn it affects, and + mark that as unified. We expect to be in a single block. */ + +static void +nvptx_propagate_unified (rtx_insn *unified) +{ + rtx_insn *probe = unified; + rtx cond_reg = SET_DEST (PATTERN (unified)); + rtx pat = NULL_RTX; + + /* Find the comparison. (We could skip this and simply scan to he + blocks' terminating branch, if we didn't care for self + checking.) */ + for (;;) +{ + probe = next_real_insn (probe); + if (!probe) + break; + pat = PATTERN (probe); + + if (GET_CODE (pat) == SET + && GET_RTX_CLASS (GET_CODE (SET_SRC (pat))) == RTX_COMPARE + && XEXP (SET_SRC (pat), 0) == cond_reg) + break; + gcc_assert (NONJUMP_INSN_P (probe)); +} + gcc_assert (pat); + rtx pred_reg = SET_DEST (pat); + + /* Find the branch. */ + do +probe = NEXT_INSN (probe); + while (!JUMP_P (probe)); + + pat = PATTERN (probe); + rtx itec = XEXP (SET_SRC (pat), 0); + gcc_assert (XEXP (itec, 0) == pred_reg); + + /* Mark the branch's condition as unified. */ + rtx unspec = gen_rtx_UNSPEC (BImode, gen_rtvec (1, pred_reg), + UNSPEC_BR_UNIFIED); + bool ok = validate_change (probe, (itec, 0), unspec, false); + + gcc_assert (ok); +} + /* Loop structure of the function. The entire function is described as a NULL loop. */ @@ -2964,6 +3010,9 @@ nvptx_split_blocks (bb_insn_map_t *map) continue;
Re: [patch] various OpenACC reduction enhancements - ME and nvptx changes
On 04-12-18 13:29, Jakub Jelinek wrote: > On Fri, Jun 29, 2018 at 11:19:53AM -0700, Cesar Philippidis wrote: >> The attached patch includes the nvptx and GCC ME reductions enhancements. >> >> Is this patch OK for trunk? It bootstrapped / regression tested cleanly >> for x86_64 with nvptx offloading. > This is all OpenACC specific code not really shareable with OpenMP, if > Thomas (for middle-end) and Tom (for NVPTX backend) are ok with it, it is ok > for trunk. > Formatting needs to be fixed: ... There should be exactly one space between function name and parenthesis. 160:+ unsigned old_shift = DIM_SIZE(VECTOR); ... Also, the updated patch does not address my comment about probabilities here ( https://gcc.gnu.org/ml/gcc-patches/2018-10/msg00325.html ): ... > + /* Create the loop. */ > + post_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU; Edges need probabilities, as in nvptx_lockless_update, nvptx_lockfull_update and nvptx_goacc_reduction_init. ... Thanks, - Tom
Re: [patch] various OpenACC reduction enhancements - ME and nvptx changes
On Fri, Jun 29, 2018 at 11:19:53AM -0700, Cesar Philippidis wrote: > The attached patch includes the nvptx and GCC ME reductions enhancements. > > Is this patch OK for trunk? It bootstrapped / regression tested cleanly > for x86_64 with nvptx offloading. This is all OpenACC specific code not really shareable with OpenMP, if Thomas (for middle-end) and Tom (for NVPTX backend) are ok with it, it is ok for trunk. > 2018-06-29 Cesar Philippidis > Nathan Sidwell > > gcc/ > * config/nvptx/nvptx.c (nvptx_propagate_unified): New. > (nvptx_split_blocks): Call it for cond_uni insn. > (nvptx_expand_cond_uni): New. > (enum nvptx_builtins): Add NVPTX_BUILTIN_COND_UNI. > (nvptx_init_builtins): Initialize it. > (nvptx_expand_builtin): > (nvptx_generate_vector_shuffle): Change integral SHIFT operand to > tree BITS operand. > (nvptx_vector_reduction): New. > (nvptx_adjust_reduction_type): New. > (nvptx_goacc_reduction_setup): Use it to adjust the type of ref_to_res. > (nvptx_goacc_reduction_init): Don't update LHS if it doesn't exist. > (nvptx_goacc_reduction_fini): Call nvptx_vector_reduction for vector. > Use it to adjust the type of ref_to_res. > (nvptx_goacc_reduction_teardown): > * config/nvptx/nvptx.md (cond_uni): New pattern. > * omp-general.h (enum oacc_loop_flags): Add OLF_REDUCTION enum. > * omp-low.c (lower_oacc_reductions): Handle reduction decls mapped > with GOMP_MAP_FIRSTPRIVATE_POINTER. > (lower_oacc_head_mark): Use OLF_REDUCTION to mark OpenACC reductions. > * omp-offload.c (oacc_loop_auto_partitions): Don't assign gang > level parallelism to orphan reductions. > (default_goacc_reduction): Retype ref_to_res as necessary. Jakub
Re: [patch] various OpenACC reduction enhancements - ME and nvptx changes
On 10/5/18 07:07, Tom de Vries wrote: > On 6/29/18 8:19 PM, Cesar Philippidis wrote: >> The attached patch includes the nvptx and GCC ME reductions enhancements. >> >> Is this patch OK for trunk? It bootstrapped / regression tested cleanly >> for x86_64 with nvptx offloading. >> > > These need fixing: > ... > === ERROR type #5: trailing whitespace (4 error(s)) === > gcc/config/nvptx/nvptx.c:5139:0:██ > gcc/config/nvptx/nvptx.c:5660:8: do█ > gcc/config/nvptx/nvptx.c:5702:0:██ > gcc/config/nvptx/nvptx.c:5726:0:██ > ... Sorry. The attached patch fixes that. > Otherwise, nvptx part LGTM. Tomorrow's my last day at Mentor, so either Thomas or Julian will need to commit it once the other patches get approved. Thanks, Cesar gcc/ * config/nvptx/nvptx.c (nvptx_propagate_unified): New. (nvptx_split_blocks): Call it for cond_uni insn. (nvptx_expand_cond_uni): New. (enum nvptx_builtins): Add NVPTX_BUILTIN_COND_UNI. (nvptx_init_builtins): Initialize it. (nvptx_expand_builtin): (nvptx_generate_vector_shuffle): Change integral SHIFT operand to tree BITS operand. (nvptx_vector_reduction): New. (nvptx_adjust_reduction_type): New. (nvptx_goacc_reduction_setup): Use it to adjust the type of ref_to_res. (nvptx_goacc_reduction_init): Don't update LHS if it doesn't exist. (nvptx_goacc_reduction_fini): Call nvptx_vector_reduction for vector. Use it to adjust the type of ref_to_res. (nvptx_goacc_reduction_teardown): * config/nvptx/nvptx.md (cond_uni): New pattern. diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 9903a273863..acb490a9a90 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -2863,6 +2863,52 @@ nvptx_reorg_uniform_simt () } } +/* UNIFIED is a cond_uni insn. Find the branch insn it affects, and + mark that as unified. We expect to be in a single block. */ + +static void +nvptx_propagate_unified (rtx_insn *unified) +{ + rtx_insn *probe = unified; + rtx cond_reg = SET_DEST (PATTERN (unified)); + rtx pat = NULL_RTX; + + /* Find the comparison. (We could skip this and simply scan to he + blocks' terminating branch, if we didn't care for self + checking.) */ + for (;;) +{ + probe = next_real_insn (probe); + if (!probe) + break; + pat = PATTERN (probe); + + if (GET_CODE (pat) == SET + && GET_RTX_CLASS (GET_CODE (SET_SRC (pat))) == RTX_COMPARE + && XEXP (SET_SRC (pat), 0) == cond_reg) + break; + gcc_assert (NONJUMP_INSN_P (probe)); +} + gcc_assert (pat); + rtx pred_reg = SET_DEST (pat); + + /* Find the branch. */ + do +probe = NEXT_INSN (probe); + while (!JUMP_P (probe)); + + pat = PATTERN (probe); + rtx itec = XEXP (SET_SRC (pat), 0); + gcc_assert (XEXP (itec, 0) == pred_reg); + + /* Mark the branch's condition as unified. */ + rtx unspec = gen_rtx_UNSPEC (BImode, gen_rtvec (1, pred_reg), + UNSPEC_BR_UNIFIED); + bool ok = validate_change (probe, (itec, 0), unspec, false); + + gcc_assert (ok); +} + /* Loop structure of the function. The entire function is described as a NULL loop. */ @@ -2964,6 +3010,9 @@ nvptx_split_blocks (bb_insn_map_t *map) continue; switch (recog_memoized (insn)) { + case CODE_FOR_cond_uni: + nvptx_propagate_unified (insn); + /* FALLTHROUGH */ default: seen_insn = true; continue; @@ -5083,6 +5132,21 @@ nvptx_expand_cmp_swap (tree exp, rtx target, return target; } +/* Expander for the compare unified builtin. */ + +static rtx +nvptx_expand_cond_uni (tree exp, rtx target, machine_mode mode, int ignore) +{ + if (ignore) +return target; + + rtx src = expand_expr (CALL_EXPR_ARG (exp, 0), + NULL_RTX, mode, EXPAND_NORMAL); + + emit_insn (gen_cond_uni (target, src)); + + return target; +} /* Codes for all the NVPTX builtins. */ enum nvptx_builtins @@ -5092,6 +5156,7 @@ enum nvptx_builtins NVPTX_BUILTIN_WORKER_ADDR, NVPTX_BUILTIN_CMP_SWAP, NVPTX_BUILTIN_CMP_SWAPLL, + NVPTX_BUILTIN_COND_UNI, NVPTX_BUILTIN_MAX }; @@ -5129,6 +5194,7 @@ nvptx_init_builtins (void) (PTRVOID, ST, UINT, UINT, NULL_TREE)); DEF (CMP_SWAP, "cmp_swap", (UINT, PTRVOID, UINT, UINT, NULL_TREE)); DEF (CMP_SWAPLL, "cmp_swapll", (LLUINT, PTRVOID, LLUINT, LLUINT, NULL_TREE)); + DEF (COND_UNI, "cond_uni", (integer_type_node, integer_type_node, NULL_TREE)); #undef DEF #undef ST @@ -5161,6 +5227,9 @@ nvptx_expand_builtin (tree exp, rtx target, rtx ARG_UNUSED (subtarget), case NVPTX_BUILTIN_CMP_SWAPLL: return nvptx_expand_cmp_swap (exp, target, mode, ignore); +case NVPTX_BUILTIN_COND_UNI: + return nvptx_expand_cond_uni (exp, target, mode, ignore); + default: gcc_unreachable (); } } @@ -5284,7 +5353,7 @@ nvptx_get_worker_red_addr (tree type, tree offset) static void nvptx_generate_vector_shuffle (location_t loc, - tree dest_var, tree var, unsigned shift, + tree dest_var, tree var, tree bits, gimple_seq *seq)
Re: [patch] various OpenACC reduction enhancements - ME and nvptx changes
On 6/29/18 8:19 PM, Cesar Philippidis wrote: > The attached patch includes the nvptx and GCC ME reductions enhancements. > > Is this patch OK for trunk? It bootstrapped / regression tested cleanly > for x86_64 with nvptx offloading. > These need fixing: ... === ERROR type #5: trailing whitespace (4 error(s)) === gcc/config/nvptx/nvptx.c:5139:0:██ gcc/config/nvptx/nvptx.c:5660:8: do█ gcc/config/nvptx/nvptx.c:5702:0:██ gcc/config/nvptx/nvptx.c:5726:0:██ ... > gcc/ > * config/nvptx/nvptx.c (nvptx_propagate_unified): New. > (nvptx_split_blocks): Call it for cond_uni insn. > (nvptx_expand_cond_uni): New. > (enum nvptx_builtins): Add NVPTX_BUILTIN_COND_UNI. > (nvptx_init_builtins): Initialize it. > (nvptx_expand_builtin): > (nvptx_generate_vector_shuffle): Change integral SHIFT operand to > tree BITS operand. > (nvptx_vector_reduction): New. > (nvptx_adjust_reduction_type): New. > (nvptx_goacc_reduction_setup): Use it to adjust the type of ref_to_res. > (nvptx_goacc_reduction_init): Don't update LHS if it doesn't exist. > (nvptx_goacc_reduction_fini): Call nvptx_vector_reduction for vector. > Use it to adjust the type of ref_to_res. > (nvptx_goacc_reduction_teardown): > * config/nvptx/nvptx.md (cond_uni): New pattern. > diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c > index 5608bee8a8d..33ec3db1153 100644 > --- a/gcc/config/nvptx/nvptx.c > +++ b/gcc/config/nvptx/nvptx.c > @@ -2863,6 +2863,52 @@ nvptx_reorg_uniform_simt () > } > } > > +/* UNIFIED is a cond_uni insn. Find the branch insn it affects, and > + mark that as unified. We expect to be in a single block. */ > + > +static void > +nvptx_propagate_unified (rtx_insn *unified) > +{ > + rtx_insn *probe = unified; > + rtx cond_reg = SET_DEST (PATTERN (unified)); > + rtx pat = NULL_RTX; > + > + /* Find the comparison. (We could skip this and simply scan to he > + blocks' terminating branch, if we didn't care for self > + checking.) */ > + for (;;) > +{ > + probe = next_real_insn (probe); > + if (!probe) > + break; > + pat = PATTERN (probe); > + > + if (GET_CODE (pat) == SET > + && GET_RTX_CLASS (GET_CODE (SET_SRC (pat))) == RTX_COMPARE > + && XEXP (SET_SRC (pat), 0) == cond_reg) > + break; > + gcc_assert (NONJUMP_INSN_P (probe)); > +} > + gcc_assert (pat); > + rtx pred_reg = SET_DEST (pat); > + > + /* Find the branch. */ > + do > +probe = NEXT_INSN (probe); > + while (!JUMP_P (probe)); > + > + pat = PATTERN (probe); > + rtx itec = XEXP (SET_SRC (pat), 0); > + gcc_assert (XEXP (itec, 0) == pred_reg); > + > + /* Mark the branch's condition as unified. */ > + rtx unspec = gen_rtx_UNSPEC (BImode, gen_rtvec (1, pred_reg), > +UNSPEC_BR_UNIFIED); > + bool ok = validate_change (probe, (itec, 0), unspec, false); > + > + gcc_assert (ok); > +} > + > /* Loop structure of the function. The entire function is described as > a NULL loop. */ > > @@ -2964,6 +3010,9 @@ nvptx_split_blocks (bb_insn_map_t *map) > continue; > switch (recog_memoized (insn)) > { > + case CODE_FOR_cond_uni: > + nvptx_propagate_unified (insn); > + /* FALLTHROUGH */ > default: > seen_insn = true; > continue; > @@ -5080,6 +5129,21 @@ nvptx_expand_cmp_swap (tree exp, rtx target, >return target; > } > > +/* Expander for the compare unified builtin. */ > + > +static rtx > +nvptx_expand_cond_uni (tree exp, rtx target, machine_mode mode, int ignore) > +{ > + if (ignore) > +return target; > + > + rtx src = expand_expr (CALL_EXPR_ARG (exp, 0), > + NULL_RTX, mode, EXPAND_NORMAL); > + > + emit_insn (gen_cond_uni (target, src)); > + > + return target; > +} > > /* Codes for all the NVPTX builtins. */ > enum nvptx_builtins > @@ -5089,6 +5153,7 @@ enum nvptx_builtins >NVPTX_BUILTIN_WORKER_ADDR, >NVPTX_BUILTIN_CMP_SWAP, >NVPTX_BUILTIN_CMP_SWAPLL, > + NVPTX_BUILTIN_COND_UNI, >NVPTX_BUILTIN_MAX > }; > > @@ -5126,6 +5191,7 @@ nvptx_init_builtins (void) > (PTRVOID, ST, UINT, UINT, NULL_TREE)); >DEF (CMP_SWAP, "cmp_swap", (UINT, PTRVOID, UINT, UINT, NULL_TREE)); >DEF (CMP_SWAPLL, "cmp_swapll", (LLUINT, PTRVOID, LLUINT, LLUINT, > NULL_TREE)); > + DEF (COND_UNI, "cond_uni", (integer_type_node, integer_type_node, > NULL_TREE)); > > #undef DEF > #undef ST > @@ -5158,6 +5224,9 @@ nvptx_expand_builtin (tree exp, rtx target, rtx > ARG_UNUSED (subtarget), > case NVPTX_BUILTIN_CMP_SWAPLL: >return nvptx_expand_cmp_swap (exp, target, mode, ignore); > > +case NVPTX_BUILTIN_COND_UNI: > + return nvptx_expand_cond_uni (exp, target, mode, ignore); > + > default: gcc_unreachable (); > } > } > @@ -5284,7 +5353,7 @@ nvptx_get_worker_red_addr (tree type, tree offset) > >
Re: [patch] various OpenACC reduction enhancements - ME and nvptx changes
The attached patch includes the nvptx and GCC ME reductions enhancements. Is this patch OK for trunk? It bootstrapped / regression tested cleanly for x86_64 with nvptx offloading. Thanks, Cesar 2018-06-29 Cesar Philippidis Nathan Sidwell gcc/ * config/nvptx/nvptx.c (nvptx_propagate_unified): New. (nvptx_split_blocks): Call it for cond_uni insn. (nvptx_expand_cond_uni): New. (enum nvptx_builtins): Add NVPTX_BUILTIN_COND_UNI. (nvptx_init_builtins): Initialize it. (nvptx_expand_builtin): (nvptx_generate_vector_shuffle): Change integral SHIFT operand to tree BITS operand. (nvptx_vector_reduction): New. (nvptx_adjust_reduction_type): New. (nvptx_goacc_reduction_setup): Use it to adjust the type of ref_to_res. (nvptx_goacc_reduction_init): Don't update LHS if it doesn't exist. (nvptx_goacc_reduction_fini): Call nvptx_vector_reduction for vector. Use it to adjust the type of ref_to_res. (nvptx_goacc_reduction_teardown): * config/nvptx/nvptx.md (cond_uni): New pattern. * omp-general.h (enum oacc_loop_flags): Add OLF_REDUCTION enum. * omp-low.c (lower_oacc_reductions): Handle reduction decls mapped with GOMP_MAP_FIRSTPRIVATE_POINTER. (lower_oacc_head_mark): Use OLF_REDUCTION to mark OpenACC reductions. * omp-offload.c (oacc_loop_auto_partitions): Don't assign gang level parallelism to orphan reductions. (default_goacc_reduction): Retype ref_to_res as necessary. --- diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 5608bee8a8d..33ec3db1153 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -2863,6 +2863,52 @@ nvptx_reorg_uniform_simt () } } +/* UNIFIED is a cond_uni insn. Find the branch insn it affects, and + mark that as unified. We expect to be in a single block. */ + +static void +nvptx_propagate_unified (rtx_insn *unified) +{ + rtx_insn *probe = unified; + rtx cond_reg = SET_DEST (PATTERN (unified)); + rtx pat = NULL_RTX; + + /* Find the comparison. (We could skip this and simply scan to he + blocks' terminating branch, if we didn't care for self + checking.) */ + for (;;) +{ + probe = next_real_insn (probe); + if (!probe) + break; + pat = PATTERN (probe); + + if (GET_CODE (pat) == SET + && GET_RTX_CLASS (GET_CODE (SET_SRC (pat))) == RTX_COMPARE + && XEXP (SET_SRC (pat), 0) == cond_reg) + break; + gcc_assert (NONJUMP_INSN_P (probe)); +} + gcc_assert (pat); + rtx pred_reg = SET_DEST (pat); + + /* Find the branch. */ + do +probe = NEXT_INSN (probe); + while (!JUMP_P (probe)); + + pat = PATTERN (probe); + rtx itec = XEXP (SET_SRC (pat), 0); + gcc_assert (XEXP (itec, 0) == pred_reg); + + /* Mark the branch's condition as unified. */ + rtx unspec = gen_rtx_UNSPEC (BImode, gen_rtvec (1, pred_reg), + UNSPEC_BR_UNIFIED); + bool ok = validate_change (probe, (itec, 0), unspec, false); + + gcc_assert (ok); +} + /* Loop structure of the function. The entire function is described as a NULL loop. */ @@ -2964,6 +3010,9 @@ nvptx_split_blocks (bb_insn_map_t *map) continue; switch (recog_memoized (insn)) { + case CODE_FOR_cond_uni: + nvptx_propagate_unified (insn); + /* FALLTHROUGH */ default: seen_insn = true; continue; @@ -5080,6 +5129,21 @@ nvptx_expand_cmp_swap (tree exp, rtx target, return target; } +/* Expander for the compare unified builtin. */ + +static rtx +nvptx_expand_cond_uni (tree exp, rtx target, machine_mode mode, int ignore) +{ + if (ignore) +return target; + + rtx src = expand_expr (CALL_EXPR_ARG (exp, 0), + NULL_RTX, mode, EXPAND_NORMAL); + + emit_insn (gen_cond_uni (target, src)); + + return target; +} /* Codes for all the NVPTX builtins. */ enum nvptx_builtins @@ -5089,6 +5153,7 @@ enum nvptx_builtins NVPTX_BUILTIN_WORKER_ADDR, NVPTX_BUILTIN_CMP_SWAP, NVPTX_BUILTIN_CMP_SWAPLL, + NVPTX_BUILTIN_COND_UNI, NVPTX_BUILTIN_MAX }; @@ -5126,6 +5191,7 @@ nvptx_init_builtins (void) (PTRVOID, ST, UINT, UINT, NULL_TREE)); DEF (CMP_SWAP, "cmp_swap", (UINT, PTRVOID, UINT, UINT, NULL_TREE)); DEF (CMP_SWAPLL, "cmp_swapll", (LLUINT, PTRVOID, LLUINT, LLUINT, NULL_TREE)); + DEF (COND_UNI, "cond_uni", (integer_type_node, integer_type_node, NULL_TREE)); #undef DEF #undef ST @@ -5158,6 +5224,9 @@ nvptx_expand_builtin (tree exp, rtx target, rtx ARG_UNUSED (subtarget), case NVPTX_BUILTIN_CMP_SWAPLL: return nvptx_expand_cmp_swap (exp, target, mode, ignore); +case NVPTX_BUILTIN_COND_UNI: + return nvptx_expand_cond_uni (exp, target, mode, ignore); + default: gcc_unreachable (); } } @@ -5284,7 +5353,7 @@ nvptx_get_worker_red_addr (tree type, tree offset) static void nvptx_generate_vector_shuffle (location_t loc, - tree dest_var, tree var, unsigned shift, + tree dest_var, tree var, tree bits, gimple_seq *seq) { unsigned fn = NVPTX_BUILTIN_SHUFFLE; @@ -5307,7 +5376,6 @@