This is attacking case 3 of PR 94174.

Although I'm no longer using ccmp for most of the TImode comparisons.
Thanks to Wilco Dijkstra for pulling off my blinders and reminding me
that we can use subs+sbcs for (almost) all compares.

The first 5 patches clean up or add patterns to support the expansion
and not generate extraneous constant loads.

The aarch64_expand_addsubti patch tidies up the existing TImode
arithmetic expansions.

EXAMPLE __subvti3 (context diff is easier to read):

*** 12,27 ****
    10: b7f800a3        tbnz    x3, #63, 24 <__subvti3+0x24>
!   14: eb02003f        cmp     x1, x2
!   18: 5400010c        b.gt    38 <__subvti3+0x38>
!   1c: 54000140        b.eq    44 <__subvti3+0x44>  // b.none
    20: d65f03c0        ret
!   24: eb01005f        cmp     x2, x1
!   28: 5400008c        b.gt    38 <__subvti3+0x38>
!   2c: 54ffffa1        b.ne    20 <__subvti3+0x20>  // b.any
!   30: eb00009f        cmp     x4, x0
!   34: 54ffff69        b.ls    20 <__subvti3+0x20>  // b.plast
!   38: a9bf7bfd        stp     x29, x30, [sp, #-16]!
!   3c: 910003fd        mov     x29, sp
!   40: 94000000        bl      0 <abort>
!   44: eb04001f        cmp     x0, x4
!   48: 54ffff88        b.hi    38 <__subvti3+0x38>  // b.pmore
!   4c: d65f03c0        ret
--- 12,22 ----
    10: b7f800a3        tbnz    x3, #63, 24 <__subvti3+0x24>
!   14: eb00009f        cmp     x4, x0
!   18: fa01005f        sbcs    xzr, x2, x1
!   1c: 540000ab        b.lt    30 <__subvti3+0x30>  // b.tstop
    20: d65f03c0        ret
!   24: eb04001f        cmp     x0, x4
!   28: fa02003f        sbcs    xzr, x1, x2
!   2c: 54ffffaa        b.ge    20 <__subvti3+0x20>  // b.tcont
!   30: a9bf7bfd        stp     x29, x30, [sp, #-16]!
!   34: 910003fd        mov     x29, sp
!   38: 94000000        bl      0 <abort>

EXAMPLE from the pr:

void test3(__int128 a, uint64_t l)
{
        if ((__int128_t)a - l <= 1)
                doit();
}

*** 11,23 ****
        subs    x0, x0, x2
        sbc     x1, x1, xzr
!       cmp     x1, 0
!       ble     .L6
! .L1:
        ret
        .p2align 2,,3
- .L6:
-       bne     .L4
-       cmp     x0, 1
-       bhi     .L1
  .L4:
        b       doit
--- 11,19 ----
        subs    x0, x0, x2
        sbc     x1, x1, xzr
!       cmp     x0, 2
!       sbcs    xzr, x1, xzr
!       blt     .L4
        ret
        .p2align 2,,3
  .L4:
        b       doit


r~


Richard Henderson (9):
  aarch64: Accept 0 as first argument to compares
  aarch64: Accept zeros in add<GPI>3_carryin
  aarch64: Add <su>cmp_*_carryinC patterns
  aarch64: Add <su>cmp<GPI>_carryinC_m2
  aarch64: Provide expander for sub<GPI>3_compare1
  aarch64: Introduce aarch64_expand_addsubti
  aarch64: Adjust result of aarch64_gen_compare_reg
  aarch64: Implement TImode comparisons
  aarch64: Implement absti2

 gcc/config/aarch64/aarch64-protos.h       |  10 +-
 gcc/config/aarch64/aarch64.c              | 292 +++++++++-------
 gcc/config/aarch64/aarch64-simd.md        |  18 +-
 gcc/config/aarch64/aarch64-speculation.cc |   5 +-
 gcc/config/aarch64/aarch64.md             | 389 +++++++++++++---------
 5 files changed, 402 insertions(+), 312 deletions(-)

-- 
2.20.1

Reply via email to