> -----Original Message-----
> From: Richard Sandiford <richard.sandif...@arm.com>
> Sent: Friday, January 12, 2024 4:26 AM
> To: Andrew Pinski (QUIC) <quic_apin...@quicinc.com>
> Cc: gcc-patches@gcc.gnu.org
> Subject: [PATCHv3] aarch64/expr: Use ccmp when the outer expression is
> used twice [PR100942]
> 
> Andrew Pinski <quic_apin...@quicinc.com> writes:
> > Ccmp is not used if the result of the and/ior is used by both
> > a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
> > here by using ccmp in this case.
> > Two changes is required, first we need to allow the outer statement's
> > result be used more than once.
> > The second change is that during the expansion of the gimple, we need
> > to try using ccmp. This is needed because we don't use expand the ssa
> > name of the lhs but rather expand directly from the gimple.
> >
> > A small note on the ccmp_4.c testcase, we should be able to get slightly
> > better than with this patch but it is one extra instruction compared to
> > before.
> >
> > Diff from v1:
> > * v2: Split out expand_gimple_assign_ssa so the we only need to handle
> > promotion once. Add ccmp_5.c testcase which was suggested. Change
> comment
> > on ccmp_candidate_p.
> 
> I meant more that we should split out the gassign handling in
> expand_expr_real_1, since we're effectively making cfgexpand follow
> it more closely.  What do you think about the attached version?
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.
> 
> OK for the expr/cfgexpand bits?

Oh that is what I originally thought you wanted but I was not 100% sure so I 
just
moved it out in one place.  Anyways thanks for taking care of the change. 

Thanks,
Andrew Pinski

> 
> Thanks,
> Richard
> 
> ----
> 
> Ccmp is not used if the result of the and/ior is used by both
> a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
> here by using ccmp in this case.
> Two changes is required, first we need to allow the outer statement's
> result be used more than once.
> The second change is that during the expansion of the gimple, we need
> to try using ccmp. This is needed because we don't use expand the ssa
> name of the lhs but rather expand directly from the gimple.
> 
> A small note on the ccmp_4.c testcase, we should be able to get slightly
> better than with this patch but it is one extra instruction compared to
> before.
> 
>       PR target/100942
> 
> gcc/ChangeLog:
> 
>       * ccmp.cc (ccmp_candidate_p): Add outer argument.
>       Allow if the outer is true and the lhs is used more
>       than once.
>       (expand_ccmp_expr): Update call to ccmp_candidate_p.
>       * expr.h (expand_expr_real_gassign): Declare.
>       * expr.cc (expand_expr_real_gassign): New function, split out from...
>       (expand_expr_real_1): ...here.
>       * cfgexpand.cc (expand_gimple_stmt_1): Use
> expand_expr_real_gassign.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/aarch64/ccmp_3.c: New test.
>       * gcc.target/aarch64/ccmp_4.c: New test.
>       * gcc.target/aarch64/ccmp_5.c: New test.
> 
> Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
> Co-authored-by: Richard Sandiford <richard.sandif...@arm.com>
> ---
>  gcc/ccmp.cc                               |  12 +--
>  gcc/cfgexpand.cc                          |  31 ++-----
>  gcc/expr.cc                               | 103 ++++++++++++----------
>  gcc/expr.h                                |   3 +
>  gcc/testsuite/gcc.target/aarch64/ccmp_3.c |  20 +++++
>  gcc/testsuite/gcc.target/aarch64/ccmp_4.c |  35 ++++++++
>  gcc/testsuite/gcc.target/aarch64/ccmp_5.c |  20 +++++
>  7 files changed, 149 insertions(+), 75 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_4.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_5.c
> 
> diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
> index 09d6b5595a4..7cb525addf4 100644
> --- a/gcc/ccmp.cc
> +++ b/gcc/ccmp.cc
> @@ -90,9 +90,10 @@ ccmp_tree_comparison_p (tree t, basic_block bb)
>     If all checks OK in expand_ccmp_expr, it emits insns in prep_seq, then
>     insns in gen_seq.  */
> 
> -/* Check whether G is a potential conditional compare candidate.  */
> +/* Check whether G is a potential conditional compare candidate; OUTER is
> true if
> +   G is the outer most AND/IOR.  */
>  static bool
> -ccmp_candidate_p (gimple *g)
> +ccmp_candidate_p (gimple *g, bool outer = false)
>  {
>    tree lhs, op0, op1;
>    gimple *gs0, *gs1;
> @@ -109,8 +110,9 @@ ccmp_candidate_p (gimple *g)
>    lhs = gimple_assign_lhs (g);
>    op0 = gimple_assign_rhs1 (g);
>    op1 = gimple_assign_rhs2 (g);
> -  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME)
> -      || !has_single_use (lhs))
> +  if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME))
> +    return false;
> +  if (!outer && !has_single_use (lhs))
>      return false;
> 
>    bb = gimple_bb (g);
> @@ -284,7 +286,7 @@ expand_ccmp_expr (gimple *g, machine_mode
> mode)
>    rtx_insn *last;
>    rtx tmp;
> 
> -  if (!ccmp_candidate_p (g))
> +  if (!ccmp_candidate_p (g, true))
>      return NULL_RTX;
> 
>    last = get_last_insn ();
> diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> index 1db22f0a1a3..381ed2c82d7 100644
> --- a/gcc/cfgexpand.cc
> +++ b/gcc/cfgexpand.cc
> @@ -3971,37 +3971,18 @@ expand_gimple_stmt_1 (gimple *stmt)
>         {
>           rtx target, temp;
>           bool nontemporal = gimple_assign_nontemporal_move_p
> (assign_stmt);
> -         struct separate_ops ops;
>           bool promoted = false;
> 
>           target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
>           if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P
> (target))
>             promoted = true;
> 
> -         ops.code = gimple_assign_rhs_code (assign_stmt);
> -         ops.type = TREE_TYPE (lhs);
> -         switch (get_gimple_rhs_class (ops.code))
> -           {
> -             case GIMPLE_TERNARY_RHS:
> -               ops.op2 = gimple_assign_rhs3 (assign_stmt);
> -               /* Fallthru */
> -             case GIMPLE_BINARY_RHS:
> -               ops.op1 = gimple_assign_rhs2 (assign_stmt);
> -               /* Fallthru */
> -             case GIMPLE_UNARY_RHS:
> -               ops.op0 = gimple_assign_rhs1 (assign_stmt);
> -               break;
> -             default:
> -               gcc_unreachable ();
> -           }
> -         ops.location = gimple_location (stmt);
> -
> -         /* If we want to use a nontemporal store, force the value to
> -            register first.  If we store into a promoted register,
> -            don't directly expand to target.  */
> +        /* If we want to use a nontemporal store, force the value to
> +           register first.  If we store into a promoted register,
> +           don't directly expand to target.  */
>           temp = nontemporal || promoted ? NULL_RTX : target;
> -         temp = expand_expr_real_2 (&ops, temp, GET_MODE (target),
> -                                    EXPAND_NORMAL);
> +         temp = expand_expr_real_gassign (assign_stmt, temp,
> +                                          GET_MODE (target),
> EXPAND_NORMAL);
> 
>           if (temp == target)
>             ;
> @@ -4013,7 +3994,7 @@ expand_gimple_stmt_1 (gimple *stmt)
>               if (CONSTANT_P (temp) && GET_MODE (temp) == VOIDmode)
>                 {
>                   temp = convert_modes (GET_MODE (target),
> -                                       TYPE_MODE (ops.type),
> +                                       TYPE_MODE (TREE_TYPE (lhs)),
>                                         temp, unsignedp);
>                   temp = convert_modes (GET_MODE (SUBREG_REG (target)),
>                                         GET_MODE (target), temp,
> unsignedp);
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index dc816bc20fa..f9052a6ff5f 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -11035,6 +11035,62 @@ stmt_is_replaceable_p (gimple *stmt)
>    return false;
>  }
> 
> +/* A subroutine of expand_expr_real_1.  Expand gimple assignment G,
> +   which is known to set an SSA_NAME result.  The other arguments are
> +   as for expand_expr_real_1.  */
> +
> +rtx
> +expand_expr_real_gassign (gassign *g, rtx target, machine_mode tmode,
> +                       enum expand_modifier modifier, rtx *alt_rtl,
> +                       bool inner_reference_p)
> +{
> +  separate_ops ops;
> +  rtx r;
> +  location_t saved_loc = curr_insn_location ();
> +  auto loc = gimple_location (g);
> +  if (loc != UNKNOWN_LOCATION)
> +    set_curr_insn_location (loc);
> +  tree lhs = gimple_assign_lhs (g);
> +  ops.code = gimple_assign_rhs_code (g);
> +  ops.type = TREE_TYPE (lhs);
> +  switch (get_gimple_rhs_class (ops.code))
> +    {
> +    case GIMPLE_TERNARY_RHS:
> +      ops.op2 = gimple_assign_rhs3 (g);
> +      /* Fallthru */
> +    case GIMPLE_BINARY_RHS:
> +      ops.op1 = gimple_assign_rhs2 (g);
> +
> +      /* Try to expand conditonal compare.  */
> +      if (targetm.gen_ccmp_first)
> +     {
> +       gcc_checking_assert (targetm.gen_ccmp_next != NULL);
> +       r = expand_ccmp_expr (g, TYPE_MODE (ops.type));
> +       if (r)
> +         break;
> +     }
> +      /* Fallthru */
> +    case GIMPLE_UNARY_RHS:
> +      ops.op0 = gimple_assign_rhs1 (g);
> +      ops.location = loc;
> +      r = expand_expr_real_2 (&ops, target, tmode, modifier);
> +      break;
> +    case GIMPLE_SINGLE_RHS:
> +      {
> +     r = expand_expr_real (gimple_assign_rhs1 (g), target,
> +                           tmode, modifier, alt_rtl,
> +                           inner_reference_p);
> +     break;
> +      }
> +    default:
> +      gcc_unreachable ();
> +    }
> +  set_curr_insn_location (saved_loc);
> +  if (REG_P (r) && !REG_EXPR (r))
> +    set_reg_attrs_for_decl_rtl (lhs, r);
> +  return r;
> +}
> +
>  rtx
>  expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
>                   enum expand_modifier modifier, rtx *alt_rtl,
> @@ -11198,51 +11254,8 @@ expand_expr_real_1 (tree exp, rtx target,
> machine_mode tmode,
>         && stmt_is_replaceable_p (SSA_NAME_DEF_STMT (exp)))
>       g = SSA_NAME_DEF_STMT (exp);
>        if (g)
> -     {
> -       rtx r;
> -       location_t saved_loc = curr_insn_location ();
> -       loc = gimple_location (g);
> -       if (loc != UNKNOWN_LOCATION)
> -         set_curr_insn_location (loc);
> -       ops.code = gimple_assign_rhs_code (g);
> -          switch (get_gimple_rhs_class (ops.code))
> -         {
> -         case GIMPLE_TERNARY_RHS:
> -           ops.op2 = gimple_assign_rhs3 (g);
> -           /* Fallthru */
> -         case GIMPLE_BINARY_RHS:
> -           ops.op1 = gimple_assign_rhs2 (g);
> -
> -           /* Try to expand conditonal compare.  */
> -           if (targetm.gen_ccmp_first)
> -             {
> -               gcc_checking_assert (targetm.gen_ccmp_next != NULL);
> -               r = expand_ccmp_expr (g, mode);
> -               if (r)
> -                 break;
> -             }
> -           /* Fallthru */
> -         case GIMPLE_UNARY_RHS:
> -           ops.op0 = gimple_assign_rhs1 (g);
> -           ops.type = TREE_TYPE (gimple_assign_lhs (g));
> -           ops.location = loc;
> -           r = expand_expr_real_2 (&ops, target, tmode, modifier);
> -           break;
> -         case GIMPLE_SINGLE_RHS:
> -           {
> -             r = expand_expr_real (gimple_assign_rhs1 (g), target,
> -                                   tmode, modifier, alt_rtl,
> -                                   inner_reference_p);
> -             break;
> -           }
> -         default:
> -           gcc_unreachable ();
> -         }
> -       set_curr_insn_location (saved_loc);
> -       if (REG_P (r) && !REG_EXPR (r))
> -         set_reg_attrs_for_decl_rtl (SSA_NAME_VAR (exp), r);
> -       return r;
> -     }
> +     return expand_expr_real_gassign (as_a<gassign *> (g), target, tmode,
> +                                      modifier, alt_rtl, inner_reference_p);
> 
>        ssa_name = exp;
>        decl_rtl = get_rtx_for_ssa_name (ssa_name);
> diff --git a/gcc/expr.h b/gcc/expr.h
> index f04f40da6ab..64956f63029 100644
> --- a/gcc/expr.h
> +++ b/gcc/expr.h
> @@ -302,6 +302,9 @@ extern rtx expand_expr_real_1 (tree, rtx,
> machine_mode,
>                              enum expand_modifier, rtx *, bool);
>  extern rtx expand_expr_real_2 (sepops, rtx, machine_mode,
>                              enum expand_modifier);
> +extern rtx expand_expr_real_gassign (gassign *, rtx, machine_mode,
> +                                  enum expand_modifier modifier,
> +                                  rtx * = nullptr, bool = false);
> 
>  /* Generate code for computing expression EXP.
>     An rtx for the computed value is returned.  The value is never null.
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> new file mode 100644
> index 00000000000..a2b47fbee14
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> @@ -0,0 +1,20 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b)
> +{
> +  int c = a == 0 || b == 0;
> +  if (c) foo();
> +  return c;
> +}
> +
> +/* We should get one cmp followed by ccmp and one cset. */
> +/* { dg-final { scan-assembler "\tccmp\t" } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 } } */
> +/* And not get 2 cmps and 2 (or more cset) and orr and a cbnz. */
> +/* { dg-final { scan-assembler-not "\torr\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbnz\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbz\t" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> new file mode 100644
> index 00000000000..bc0f57a7c59
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> @@ -0,0 +1,35 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b, int d)
> +{
> +  int c = a < 8 || b < 9;
> +  int e = d < 11 || c;
> +  if (e) foo();
> +  return c;
> +}
> +
> +/*
> +  We really should get:
> +        cmp     w0, 7
> +        ccmp    w1, 8, 4, gt
> +        cset    w0, le
> +        ccmp    w2, 10, 4, gt
> +        ble     .L11
> +
> +  But we currently get:
> +        cmp     w0, 7
> +        ccmp    w1, 8, 4, gt
> +        cset    w0, le
> +        cmp     w0, 0
> +        ccmp    w2, 10, 4, eq
> +        ble     .L11
> +  The middle cmp is not needed.
> + */
> +
> +/* We should end up with only one cmp and 2 ccmp and 1 cset but currently
> we get 2 cmp
> +   though. */
> +/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 { xfail *-*-* } } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_5.c
> b/gcc/testsuite/gcc.target/aarch64/ccmp_5.c
> new file mode 100644
> index 00000000000..7e52ae4f322
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_5.c
> @@ -0,0 +1,20 @@
> +
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +void f1(int a, int b, _Bool *x)
> +{
> +  x[0] = x[1] = a == 0 || b == 0;
> +}
> +
> +void f2(int a, int b, int *x)
> +{
> +  x[0] = x[1] = a == 0 || b == 0;
> +}
> +
> +
> +/* Both functions should be using ccmp rather than 2 cset/orr.  */
> +/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 2 } } */
> +/* { dg-final { scan-assembler-not "\torr\t" } } */
> +
> --
> 2.25.1

Reply via email to