On 19/10/2019 17:31, Segher Boessenkool wrote:
> Hi Richard,
> On Fri, Oct 18, 2019 at 08:48:31PM +0100, Richard Earnshaw wrote:
>> This series of patches rewrites all the DImode arithmetic patterns for
>> the Arm backend when compiling for Arm or Thumb2 to split the
>> operations during expand (the thumb1 code is unchanged and cannot
>> benefit from early splitting as we are unable to expose the carry
>> flag).
> Very nice :-)
> I have a bunch of testcases from when I did something similar for PowerPC
> that I wanted to test...  But I cannot get your series to apply.  Do you
> have a git repo I can pull from?

Perhaps because it's already committed to trunk?

> Here is one test case (it's a bit geared towards what our ISA can do):
> ===
> typedef unsigned int u32;
> typedef unsigned long long u64;
> u64 add(u64 a, u64 b) { return a + b; }
> u64 add1(u64 a) { return a + 1; }
> u64 add42(u64 a) { return a + 42; }
> u64 addm1(u64 a) { return a - 1; }
> u64 addff(u64 a) { return a + 0xffffffffULL; }
> u64 addH(u64 a) { return a + 0x123400005678ULL; }
> u64 addH0(u64 a) { return a + 0x123400000000ULL; }
> u64 addS(u64 a, u32 b) { return a + b; }
> u64 addSH(u64 a, u32 b) { return a + ((u64)b << 32); }
> u64 addB1(u64 a) { return a + 0x100000000ULL; }
> u64 addB8(u64 a) { return a + 0x800000000ULL; }
> u64 addSH42(u64 a, u32 b) { return a + ((u64)b << 32) + 42; }
> u64 addSHm1(u64 a, u32 b) { return a + ((u64)b << 32) - 1; }
> u64 addSHff(u64 a, u32 b) { return a + ((u64)b << 32) + 0xffffffffULL; }
> ===
> rs6000 -m32 currently has non-optimal code for addm1, addSHm1; trunk arm
> has non-optimal code for addH0, addSH, addB1, addB8, addSH42, addSHm1, and
> addSHff if I understand well enough.  So I'd love to see what it does with
> your series applied :-)
> Segher

We do pretty well on this.  Only addSHm1 needs three insns (except where
the constant isn't valid for arm), and I think that's the minimum for
this case anyway.  Several of the tests only need one insn.

        .arch armv8-a
        .eabi_attribute 20, 1
        .eabi_attribute 21, 1
        .eabi_attribute 23, 3
        .eabi_attribute 24, 1
        .eabi_attribute 25, 1
        .eabi_attribute 26, 1
        .eabi_attribute 30, 2
        .eabi_attribute 34, 1
        .eabi_attribute 18, 4
        .file   "lltest.c"
        .align  2
        .global add
        .syntax unified
        .fpu softvfp
        .type   add, %function
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        adds    r0, r0, r2
        adc     r1, r1, r3
        bx      lr
        .size   add, .-add
        .align  2
        .global add1
        .syntax unified
        .fpu softvfp
        .type   add1, %function
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        adds    r0, r0, #1
        adc     r1, r1, #0
        bx      lr
        .size   add1, .-add1
        .align  2
        .global add42
        .syntax unified
        .fpu softvfp
        .type   add42, %function
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        adds    r0, r0, #42
        adc     r1, r1, #0
        bx      lr
        .size   add42, .-add42
        .align  2
        .global addm1
        .syntax unified
        .fpu softvfp
        .type   addm1, %function
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        subs    r0, r0, #1
        sbc     r1, r1, #0
        bx      lr
        .size   addm1, .-addm1
        .align  2
        .global addff
        .syntax unified
        .fpu softvfp
        .type   addff, %function
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        subs    r0, r0, #1
        adc     r1, r1, #0
        bx      lr
        .size   addff, .-addff
        .align  2
        .global addH
        .syntax unified
        .fpu softvfp
        .type   addH, %function
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        movw    r3, #22136
        adds    r0, r0, r3
        movw    r3, #4660
        adc     r1, r3, r1
        bx      lr
        .size   addH, .-addH
        .align  2
        .global addH0
        .syntax unified
        .fpu softvfp
        .type   addH0, %function
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        add     r1, r1, #4608
        add     r1, r1, #52
        bx      lr
        .size   addH0, .-addH0
        .align  2
        .global addS
        .syntax unified
        .fpu softvfp
        .type   addS, %function
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        adds    r0, r2, r0
        adc     r1, r1, #0
        bx      lr
        .size   addS, .-addS
        .align  2
        .global addSH
        .syntax unified
        .fpu softvfp
        .type   addSH, %function
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        add     r1, r2, r1
        bx      lr
        .size   addSH, .-addSH
        .align  2
        .global addB1
        .syntax unified
        .fpu softvfp
        .type   addB1, %function
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        add     r1, r1, #1
        bx      lr
        .size   addB1, .-addB1
        .align  2
        .global addB8
        .syntax unified
        .fpu softvfp
        .type   addB8, %function
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        add     r1, r1, #8
        bx      lr
        .size   addB8, .-addB8
        .align  2
        .global addSH42
        .syntax unified
        .fpu softvfp
        .type   addSH42, %function
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        adds    r0, r0, #42
        adc     r1, r1, r2
        bx      lr
        .size   addSH42, .-addSH42
        .align  2
        .global addSHm1
        .syntax unified
        .fpu softvfp
        .type   addSHm1, %function
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        subs    r0, r0, #1
        sbc     r1, r1, #0
        add     r1, r2, r1
        bx      lr
        .size   addSHm1, .-addSHm1
        .align  2
        .global addSHff
        .syntax unified
        .fpu softvfp
        .type   addSHff, %function
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        subs    r0, r0, #1
        adc     r1, r1, r2
        bx      lr
        .size   addSHff, .-addSHff
        .ident  "GCC: (trunk) 10.0.0 20191018 (experimental) [master revision 

Reply via email to