On 19/10/2019 17:31, Segher Boessenkool wrote: > Hi Richard, > > On Fri, Oct 18, 2019 at 08:48:31PM +0100, Richard Earnshaw wrote: >> >> This series of patches rewrites all the DImode arithmetic patterns for >> the Arm backend when compiling for Arm or Thumb2 to split the >> operations during expand (the thumb1 code is unchanged and cannot >> benefit from early splitting as we are unable to expose the carry >> flag). > > Very nice :-) > > I have a bunch of testcases from when I did something similar for PowerPC > that I wanted to test... But I cannot get your series to apply. Do you > have a git repo I can pull from? >
Perhaps because it's already committed to trunk? > Here is one test case (it's a bit geared towards what our ISA can do): > > === > typedef unsigned int u32; > typedef unsigned long long u64; > > u64 add(u64 a, u64 b) { return a + b; } > u64 add1(u64 a) { return a + 1; } > u64 add42(u64 a) { return a + 42; } > u64 addm1(u64 a) { return a - 1; } > u64 addff(u64 a) { return a + 0xffffffffULL; } > u64 addH(u64 a) { return a + 0x123400005678ULL; } > u64 addH0(u64 a) { return a + 0x123400000000ULL; } > u64 addS(u64 a, u32 b) { return a + b; } > u64 addSH(u64 a, u32 b) { return a + ((u64)b << 32); } > u64 addB1(u64 a) { return a + 0x100000000ULL; } > u64 addB8(u64 a) { return a + 0x800000000ULL; } > > u64 addSH42(u64 a, u32 b) { return a + ((u64)b << 32) + 42; } > u64 addSHm1(u64 a, u32 b) { return a + ((u64)b << 32) - 1; } > u64 addSHff(u64 a, u32 b) { return a + ((u64)b << 32) + 0xffffffffULL; } > === > > rs6000 -m32 currently has non-optimal code for addm1, addSHm1; trunk arm > has non-optimal code for addH0, addSH, addB1, addB8, addSH42, addSHm1, and > addSHff if I understand well enough. So I'd love to see what it does with > your series applied :-) > > > Segher > We do pretty well on this. Only addSHm1 needs three insns (except where the constant isn't valid for arm), and I think that's the minimum for this case anyway. Several of the tests only need one insn. R.
.arch armv8-a .eabi_attribute 20, 1 .eabi_attribute 21, 1 .eabi_attribute 23, 3 .eabi_attribute 24, 1 .eabi_attribute 25, 1 .eabi_attribute 26, 1 .eabi_attribute 30, 2 .eabi_attribute 34, 1 .eabi_attribute 18, 4 .file "lltest.c" .text .align 2 .global add .syntax unified .arm .fpu softvfp .type add, %function add: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. adds r0, r0, r2 adc r1, r1, r3 bx lr .size add, .-add .align 2 .global add1 .syntax unified .arm .fpu softvfp .type add1, %function add1: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. adds r0, r0, #1 adc r1, r1, #0 bx lr .size add1, .-add1 .align 2 .global add42 .syntax unified .arm .fpu softvfp .type add42, %function add42: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. adds r0, r0, #42 adc r1, r1, #0 bx lr .size add42, .-add42 .align 2 .global addm1 .syntax unified .arm .fpu softvfp .type addm1, %function addm1: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. subs r0, r0, #1 sbc r1, r1, #0 bx lr .size addm1, .-addm1 .align 2 .global addff .syntax unified .arm .fpu softvfp .type addff, %function addff: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. subs r0, r0, #1 adc r1, r1, #0 bx lr .size addff, .-addff .align 2 .global addH .syntax unified .arm .fpu softvfp .type addH, %function addH: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. movw r3, #22136 adds r0, r0, r3 movw r3, #4660 adc r1, r3, r1 bx lr .size addH, .-addH .align 2 .global addH0 .syntax unified .arm .fpu softvfp .type addH0, %function addH0: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. add r1, r1, #4608 add r1, r1, #52 bx lr .size addH0, .-addH0 .align 2 .global addS .syntax unified .arm .fpu softvfp .type addS, %function addS: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. adds r0, r2, r0 adc r1, r1, #0 bx lr .size addS, .-addS .align 2 .global addSH .syntax unified .arm .fpu softvfp .type addSH, %function addSH: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. add r1, r2, r1 bx lr .size addSH, .-addSH .align 2 .global addB1 .syntax unified .arm .fpu softvfp .type addB1, %function addB1: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. add r1, r1, #1 bx lr .size addB1, .-addB1 .align 2 .global addB8 .syntax unified .arm .fpu softvfp .type addB8, %function addB8: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. add r1, r1, #8 bx lr .size addB8, .-addB8 .align 2 .global addSH42 .syntax unified .arm .fpu softvfp .type addSH42, %function addSH42: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. adds r0, r0, #42 adc r1, r1, r2 bx lr .size addSH42, .-addSH42 .align 2 .global addSHm1 .syntax unified .arm .fpu softvfp .type addSHm1, %function addSHm1: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. subs r0, r0, #1 sbc r1, r1, #0 add r1, r2, r1 bx lr .size addSHm1, .-addSHm1 .align 2 .global addSHff .syntax unified .arm .fpu softvfp .type addSHff, %function addSHff: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. subs r0, r0, #1 adc r1, r1, r2 bx lr .size addSHff, .-addSHff .ident "GCC: (trunk) 10.0.0 20191018 (experimental) [master revision 54f1e150a38:1ddbabe127b:e55e9d95a5ab8397197a5e358ba0185f9471f043]"