Hi! On Thu, 13 Nov 2014 19:09:49 +0100, Jakub Jelinek <[email protected]> wrote: > On Thu, Nov 13, 2014 at 05:59:11PM +0100, Thomas Schwinge wrote: > > --- gcc/builtins.c > > +++ gcc/builtins.c
> > +/* Expand OpenACC acc_on_device.
> > +
> > + This has to happen late (that is, not in early folding;
> > expand_builtin_*,
> > + rather than fold_builtin_*), as we have to act differently for host and
> > + acceleration device (ACCEL_COMPILER conditional). */
> > +
> > +static rtx
> > +expand_builtin_acc_on_device (tree exp, rtx target ATTRIBUTE_UNUSED)
> > +{
> > + if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE))
> > + return NULL_RTX;
> > +
> > + tree arg, v1, v2, ret;
> > + location_t loc;
> > +
> > + arg = CALL_EXPR_ARG (exp, 0);
> > + arg = builtin_save_expr (arg);
> > + loc = EXPR_LOCATION (exp);
> > +
> > + /* Build: (arg == v1 || arg == v2) ? 1 : 0. */
> > +
> > +#ifdef ACCEL_COMPILER
> > + v1 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_not_host */ 3);
> > + v2 = build_int_cst (TREE_TYPE (arg), ACCEL_COMPILER_acc_device);
> > +#else
> > + v1 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_none */ 0);
> > + v2 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_host */ 2);
> > +#endif
> > +
> > + v1 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v1);
> > + v2 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v2);
> > +
> > + /* Can't use TRUTH_ORIF_EXPR, as that is not supported by
> > + expand_expr_real*. */
> > + ret = fold_build3_loc (loc, COND_EXPR, integer_type_node, v1, v1, v2);
> > + ret = fold_build3_loc (loc, COND_EXPR, integer_type_node,
> > + ret, integer_one_node, integer_zero_node);
> > +
> > + return expand_normal (ret);
>
> If you can't fold it late (which is indeed a problem for -O0),
> then I'd suggest to implement this more RTL-ish.
> So, avoid the builtin_save_expr, instead
> rtx op = expand_normal (arg);
> Don't build v1/v2 as trees (and, please fix the TODOs), but rtxes,
(acc_device_* TODOs already resolved earlier on.)
> just
> rtx v1 = GEN_INT (...);
> rtx v2 = GEN_INT (...);
> machine_mode mode = TYPE_MODE (TREE_TYPE (arg));
> rtx ret = gen_reg_rtx (TYPE_MODE (integer_type_node));
> emit_move_insn (ret, const0_rtx);
> rtx_code_label *done_label = gen_label_rtx ();
> emit_cmp_and_jump_insns (op, v1, NE, NULL_RTX, mode,
> false, done_label, PROB_EVEN);
> emit_cmp_and_jump_insns (op, v2, NE, NULL_RTX, mode,
> false, done_label, PROB_EVEN);
> emit_move_insn (ret, const1_rtx);
> emit_label (done_label);
> return ret;
> or similar.
Thanks for the review/suggestion/code!
> Note, it would still be worthwhile to fold the builtin, at least
> when optimizing, after IPA. Dunno if we have some property you can check,
> and Richard B. could suggest where it would be most appropriate (if GIMPLE
> guarded match.pd entry, or what), gimple_fold, etc.
I'll make a note to have a look at that later on.
> I bet I should handle omp_is_initial_device (); similarly.
Yeah.
Committed to gomp-4_0-branch in r218858:
commit da5ad5aec1c0f9b230ecb2dc00620a5598de5066
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Thu Dec 18 10:42:30 2014 +0000
OpenACC acc_on_device: Make builtin expansion more RTXy.
gcc/
* builtins.c (expand_builtin_acc_on_device): Make more RTXy.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@218858
138bc75d-0d04-0410-961f-82ee72b054a4
---
gcc/ChangeLog.gomp | 5 +++++
gcc/builtins.c | 44 +++++++++++++++++++++-----------------------
2 files changed, 26 insertions(+), 23 deletions(-)
diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index b370616..a3650c5 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,3 +1,8 @@
+2014-12-18 Thomas Schwinge <[email protected]>
+ Jakub Jelinek <[email protected]>
+
+ * builtins.c (expand_builtin_acc_on_device): Make more RTXy.
+
2014-12-17 Thomas Schwinge <[email protected]>
Bernd Schmidt <[email protected]>
diff --git gcc/builtins.c gcc/builtins.c
index fcf3f53..e946521 100644
--- gcc/builtins.c
+++ gcc/builtins.c
@@ -5889,38 +5889,36 @@ expand_stack_save (void)
acceleration device (ACCEL_COMPILER conditional). */
static rtx
-expand_builtin_acc_on_device (tree exp, rtx target ATTRIBUTE_UNUSED)
+expand_builtin_acc_on_device (tree exp, rtx target)
{
if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE))
return NULL_RTX;
- tree arg, v1, v2, ret;
- location_t loc;
-
- arg = CALL_EXPR_ARG (exp, 0);
- arg = builtin_save_expr (arg);
- loc = EXPR_LOCATION (exp);
-
- /* Build: (arg == v1 || arg == v2) ? 1 : 0. */
+ tree arg = CALL_EXPR_ARG (exp, 0);
+ /* Return (arg == v1 || arg == v2) ? 1 : 0. */
+ machine_mode v_mode = TYPE_MODE (TREE_TYPE (arg));
+ rtx v = expand_normal (arg), v1, v2;
#ifdef ACCEL_COMPILER
- v1 = build_int_cst (TREE_TYPE (arg), GOMP_DEVICE_NOT_HOST);
- v2 = build_int_cst (TREE_TYPE (arg), ACCEL_COMPILER_acc_device);
+ v1 = GEN_INT (GOMP_DEVICE_NOT_HOST);
+ v2 = GEN_INT (ACCEL_COMPILER_acc_device);
#else
- v1 = build_int_cst (TREE_TYPE (arg), GOMP_DEVICE_NONE);
- v2 = build_int_cst (TREE_TYPE (arg), GOMP_DEVICE_HOST);
+ v1 = GEN_INT (GOMP_DEVICE_NONE);
+ v2 = GEN_INT (GOMP_DEVICE_HOST);
#endif
+ machine_mode target_mode = TYPE_MODE (integer_type_node);
+ if (!REG_P (target) || GET_MODE (target) != target_mode)
+ target = gen_reg_rtx (target_mode);
+ emit_move_insn (target, const0_rtx);
+ rtx_code_label *done_label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (v, v1, NE, NULL_RTX, v_mode,
+ false, done_label, PROB_EVEN);
+ emit_cmp_and_jump_insns (v, v2, NE, NULL_RTX, v_mode,
+ false, done_label, PROB_EVEN);
+ emit_move_insn (target, const1_rtx);
+ emit_label (done_label);
- v1 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v1);
- v2 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v2);
-
- /* Can't use TRUTH_ORIF_EXPR, as that is not supported by
- expand_expr_real*. */
- ret = fold_build3_loc (loc, COND_EXPR, integer_type_node, v1, v1, v2);
- ret = fold_build3_loc (loc, COND_EXPR, integer_type_node,
- ret, integer_one_node, integer_zero_node);
-
- return expand_normal (ret);
+ return target;
}
Grüße,
Thomas
pgpTD1xP58om4.pgp
Description: PGP signature
