Richard Henderson <richard.hender...@linaro.org> writes:
> Restrict the immediate range to the intersection of LT/GE and GT/LE
> so that cfglayout can invert the condition to redirect any branch.
>
> gcc:
>       * config/aarch64/aarch64.cc (aarch64_cb_rhs): Restrict the
>       range of LT/GE and GT/LE to their intersections.
>       * config/aarch64/aarch64.md (*aarch64_cb<INT_CMP><GPI>): Unexport.
>       Use cmpbr_imm_predicate instead of aarch64_cb_rhs.
>       * config/aarch64/constraints.md (Uc1): Accept 0..62.
>       (Uc2): Remove.
>       * config/aarch64/iterators.md (cmpbr_imm_predicate): New.
>       (cmpbr_imm_constraint): Update to match aarch64_cb_rhs.
>       * config/aarch64/predicates.md (aarch64_cb_reg_i63_operand): New.
>       (aarch64_cb_reg_i62_operand): New.
>
> gcc/testsuite:
>       * gcc.target/aarch64/cmpbr.c (u32_x0_ult_64): XFAIL.
>       (i32_x0_slt_64, u64_x0_ult_64, i64_x0_slt_64): XFAIL.
>       * gcc.target/aarch64/cmpbr-2.c: New.
> ---
>  gcc/config/aarch64/aarch64.cc               |  23 ++--
>  gcc/config/aarch64/aarch64.md               |  27 +++--
>  gcc/config/aarch64/constraints.md           |  10 +-
>  gcc/config/aarch64/iterators.md             |  34 +++---
>  gcc/config/aarch64/predicates.md            |  10 ++
>  gcc/testsuite/gcc.target/aarch64/cmpbr-2.c  | 110 ++++++++++++++++++++
>  gcc/testsuite/gcc.target/aarch64/cmpbr.c    |   8 +-
>  gcc/testsuite/gcc.target/aarch64/pr109078.c |   2 +-
>  8 files changed, 176 insertions(+), 48 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/cmpbr-2.c
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index e7b174a571e..88235d0f8d2 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -975,19 +975,24 @@ aarch64_cb_rhs (rtx_code op_code, rtx rhs)
>      {
>      case EQ:
>      case NE:
> -    case GT:
> -    case GTU:
>      case LT:
>      case LTU:
> +    case GE:
> +    case GEU:
> +      /* EQ/NE  range is 0 .. 63.
> +      LT/LTU range is 0 .. 63.
> +      GE/GEU range is 1 .. 64 => GT x - 1, but also supports 0 via XZR.
> +      So the intersection is 0 .. 63. */
>        return IN_RANGE (rhs_val, 0, 63);
>  
> -    case GE:  /* CBGE:   signed greater than or equal */
> -    case GEU: /* CBHS: unsigned greater than or equal */
> -      return IN_RANGE (rhs_val, 1, 64);
> -
> -    case LE:  /* CBLE:   signed less than or equal */
> -    case LEU: /* CBLS: unsigned less than or equal */
> -      return IN_RANGE (rhs_val, -1, 62);
> +    case GT:
> +    case GTU:
> +    case LE:
> +    case LEU:
> +      /* GT/GTU range is  0 .. 63
> +      LE/LEU range is -1 .. 62 => LT x + 1.
> +      So the intersection is 0 .. 62. */
> +      return IN_RANGE (rhs_val, 0, 62);
>  
>      default:
>        return false;
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 74a8912c7a9..8545c750282 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -865,21 +865,20 @@
>  
>  ;; Emit a `CB<cond> (register)` or `CB<cond> (immediate)` instruction.
>  ;; The immediate range depends on the comparison code.
> -;; Comparisons against immediates outside this range fall back to
> -;; CMP + B<cond>.
> -(define_insn "aarch64_cb<INT_CMP:code><GPI:mode>"
> -  [(set (pc) (if_then_else (INT_CMP
> -                          (match_operand:GPI 0 "register_operand" "r")
> -                          (match_operand:GPI 1 "nonmemory_operand"
> -                            "r<INT_CMP:cmpbr_imm_constraint>"))
> -                        (label_ref (match_operand 2))
> -                        (pc)))]
> -  "TARGET_CMPBR && aarch64_cb_rhs (<INT_CMP:CODE>, operands[1])"
> +(define_insn "*aarch64_cb<code><mode>"
> +  [(set (pc) (if_then_else
> +             (INT_CMP
> +               (match_operand:GPI 0 "register_operand" "r")
> +               (match_operand:GPI 1
> +                 "<cmpbr_imm_predicate>" "r<cmpbr_imm_constraint>"))

An alternative to adding a new code attribute would be to embed
the immediate constraint in the predicate name, to ensure that
the two don't get out of sync.  We've done that for things like
aarch64_sve_cmp_<sve_imm_con>_operand.  It does make the predicate
name slightly less mnemonic though...

The patch is ok either way though.

Thanks,
Richard


> +             (label_ref (match_operand 2))
> +             (pc)))]
> +  "TARGET_CMPBR"
>    {
> -    return (get_attr_far_branch (insn) == FAR_BRANCH_NO)
> -      ? "cb<INT_CMP:cmp_op>\\t%<w>0, %<w>1, %l2"
> -      : aarch64_gen_far_branch (operands, 2, "L",
> -          "cb<INT_CMP:inv_cmp_op>\\t%<w>0, %<w>1, ");
> +    if (get_attr_length (insn) == 4)
> +      return "cb<cmp_op>\t%<w>0, %<w>1, %l2";
> +    return aarch64_gen_far_branch (operands, 2, "L",
> +             "cb<inv_cmp_op>\t%<w>0, %<w>1, ");
>    }
>    [(set_attr "type" "branch")
>     (set (attr "length")
> diff --git a/gcc/config/aarch64/constraints.md 
> b/gcc/config/aarch64/constraints.md
> index dc1925dfb6c..7b9e5583bc7 100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -312,15 +312,9 @@
>  
>  (define_constraint "Uc1"
>    "@internal
> -  A constraint that matches the integers 1...64."
> +  A constraint that matches the integers 0...62."
>    (and (match_code "const_int")
> -       (match_test "IN_RANGE (ival, 1, 64)")))
> -
> -(define_constraint "Uc2"
> -  "@internal
> -  A constraint that matches the integers -1...62."
> -  (and (match_code "const_int")
> -       (match_test "IN_RANGE (ival, -1, 62)")))
> +       (match_test "IN_RANGE (ival, 0, 62)")))
>  
>  (define_constraint "Up3"
>    "@internal
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 8f8237edf6c..3fff94fc2e6 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -2980,19 +2980,29 @@
>  
>  (define_code_iterator INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
>  
> +;; Inverse comparisons must have the same constraint so that
> +;; branches can be redirected during late compilation.
>  (define_code_attr cmpbr_imm_constraint [
> -    (eq "Uc0")
> -    (ne "Uc0")
> -    (gt "Uc0")
> -    (gtu "Uc0")
> -    (lt "Uc0")
> -    (ltu "Uc0")
> -
> -    (ge "Uc1")
> -    (geu "Uc1")
> -
> -    (le "Uc2")
> -    (leu "Uc2")
> +    (eq "Uc0") (ne "Uc0")
> +    (lt "Uc0") (ge "Uc0")
> +    (ltu "Uc0") (geu "Uc0")
> +
> +    (gt "Uc1") (le "Uc1")
> +    (gtu "Uc1") (leu "Uc1")
> +])
> +
> +(define_code_attr cmpbr_imm_predicate [
> +    (eq "aarch64_cb_reg_i63_operand")
> +    (ne "aarch64_cb_reg_i63_operand")
> +    (lt "aarch64_cb_reg_i63_operand")
> +    (ge "aarch64_cb_reg_i63_operand")
> +    (ltu "aarch64_cb_reg_i63_operand")
> +    (geu "aarch64_cb_reg_i63_operand")
> +
> +    (gt "aarch64_cb_reg_i62_operand")
> +    (le "aarch64_cb_reg_i62_operand")
> +    (gtu "aarch64_cb_reg_i62_operand")
> +    (leu "aarch64_cb_reg_i62_operand")
>  ])
>  
>  (define_code_attr fix_trunc_optab [(fix "fix_trunc")
> diff --git a/gcc/config/aarch64/predicates.md 
> b/gcc/config/aarch64/predicates.md
> index af6f00e7353..f9a094d4190 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -1089,3 +1089,13 @@
>  (define_special_predicate "aarch64_ptrue_all_operand"
>    (and (match_code "const_vector")
>         (match_test "aarch64_ptrue_all_mode (op) == mode")))
> +
> +(define_predicate "aarch64_cb_reg_i63_operand"
> +  (ior (match_operand 0 "register_operand")
> +       (and (match_code "const_int")
> +         (match_test "IN_RANGE (INTVAL (op), 0, 63)"))))
> +
> +(define_predicate "aarch64_cb_reg_i62_operand"
> +  (ior (match_operand 0 "register_operand")
> +       (and (match_code "const_int")
> +         (match_test "IN_RANGE (INTVAL (op), 0, 62)"))))
> diff --git a/gcc/testsuite/gcc.target/aarch64/cmpbr-2.c 
> b/gcc/testsuite/gcc.target/aarch64/cmpbr-2.c
> new file mode 100644
> index 00000000000..2c2764f131e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/cmpbr-2.c
> @@ -0,0 +1,110 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* PR target/121388 */
> +
> +#pragma GCC target "+cmpbr"
> +
> +extern int a, b, s;
> +typedef struct {
> +  unsigned long d[2];
> +} g;
> +typedef struct {
> +  long d[4];
> +} h;
> +typedef struct {
> +  unsigned long d[];
> +} j;
> +typedef struct {
> +  long d[];
> +} k;
> +typedef union {
> +  struct {
> +    short l;
> +    unsigned m;
> +  } i;
> +  long double f;
> +} n;
> +g o[] = {{}};
> +const g aa[1];
> +h ab, t;
> +h ac[1];
> +long p, r, ae, af, ag, ah, aj, ak, al, an, ao, aq, ar, aw, ax, ba, bb, bc, 
> bd;
> +unsigned long q, am, ay, w;
> +g ad;
> +unsigned ai, ap, at, au, av, az;
> +short as, v, be;
> +long double u;
> +double f() {
> +  long bf;
> +  g c;
> +  unsigned long bg;
> +  int e, bh;
> +  j x;
> +  if (q << 61 == 3ull << 61) {
> +    if (q & 58) {
> +      return u;
> +    }
> +    as = 8;
> +    return a;
> +  }
> +  e = c.d[1] = q & ((1ull << 49) - 1);
> +  bg = p;
> +  if (101086242752 < c.d[1] ||
> +      (101086242752 == c.d[1] && 4003012203950112767 < p))
> +    c.d[1] = p;
> +  if (c.d[1] && p == 0) {
> +    n bi;
> +    bi.i.l = be;
> +    return bi.f;
> +  }
> +  s = c.d[1] == 0 ? p ?: p : __builtin_clzll(c.d[1]);
> +  s == 0 ? c.d[1] : s >= 64 ? c.d[1] : (c.d[1] = s, bg = 0);
> +  if (e >= 3) {
> +    if (a) {
> +      n bi;
> +      bi.i.m = 0;
> +      return bi.f;
> +    }
> +    return ar;
> +  }
> +  if (e <= -4985)
> +    e = 4985;
> +  ad = (aa + 5)[e];
> +  bh = s;
> +  if (r && (bg = 0))
> +    t = ab;
> +  t = ac[e];
> +  k bj, bk;
> +  ao = bg;
> +  az = bg;
> +  al = az;
> +  ay = aw = bg >> 2;
> +  b = ay + (aq > 2) < bg * ap;
> +  ay = az;
> +  bd = av = w;
> +  ah = bb;
> +  bj.d[4] = am;
> +  an = c.d[1] >> an;
> +  ax = bg * az;
> +  aj = t.d[1];
> +  az = w = ax;
> +  bb = az;
> +  ag = aj;
> +  long bl = af = w + az < w;
> +  au = aw;
> +  am = au < w || w < ak + am;
> +  bk.d[3] = bl + az;
> +  ai = a < aj;
> +  at = aj + b < ai;
> +  x.d[3] = ba;
> +  bc = bk.d[3] + bc + bj.d[4];
> +  bf = ae;
> +  if (x.d[3] || o[bf & 1].d[0]) {
> +    if (bf == 0)
> +      ;
> +    else if (bf == 3 && bh)
> +      if ((a & 3 && x.d[3] < 3ull << 62) || (q && x.d[3]))
> +        b = 6;
> +  }
> +  return a;
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/cmpbr.c 
> b/gcc/testsuite/gcc.target/aarch64/cmpbr.c
> index e19ca899138..fe002512995 100644
> --- a/gcc/testsuite/gcc.target/aarch64/cmpbr.c
> +++ b/gcc/testsuite/gcc.target/aarch64/cmpbr.c
> @@ -1274,7 +1274,7 @@ FAR_BRANCH(u64, 42);
>  */
>  
>  /*
> -** u32_x0_ult_64:
> +** u32_x0_ult_64: { xfail *-*-* }
>  **   cbhi    w0, 63, .L([0-9]+)
>  **   b       taken
>  ** .L\1:
> @@ -1309,7 +1309,7 @@ FAR_BRANCH(u64, 42);
>  */
>  
>  /*
> -** i32_x0_slt_64:
> +** i32_x0_slt_64: { xfail *-*-* }
>  **   cbgt    w0, 63, .L([0-9]+)
>  **   b       taken
>  ** .L\1:
> @@ -1362,7 +1362,7 @@ FAR_BRANCH(u64, 42);
>  */
>  
>  /*
> -** u64_x0_ult_64:
> +** u64_x0_ult_64: { xfail *-*-* }
>  **   cbhi    x0, 63, .L([0-9]+)
>  **   b       taken
>  ** .L\1:
> @@ -1397,7 +1397,7 @@ FAR_BRANCH(u64, 42);
>  */
>  
>  /*
> -** i64_x0_slt_64:
> +** i64_x0_slt_64: { xfail *-*-* }
>  **   cbgt    x0, 63, .L([0-9]+)
>  **   b       taken
>  ** .L\1:
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr109078.c 
> b/gcc/testsuite/gcc.target/aarch64/pr109078.c
> index 0d756722239..ad30bd56208 100644
> --- a/gcc/testsuite/gcc.target/aarch64/pr109078.c
> +++ b/gcc/testsuite/gcc.target/aarch64/pr109078.c
> @@ -8,7 +8,7 @@
>  /*
>  ** simple_gemm:
>  **   ...
> -**   tbnz    [^\n]+
> +**   [tc]bnz [^\n]+
>  **   ld1     [^\n]+
>  **   fadd    [^\n]+
>  **   fadd    [^\n]+

Reply via email to