https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122871

Torbjorn SVENSSON <azoff at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |azoff at gcc dot gnu.org

--- Comment #11 from Torbjorn SVENSSON <azoff at gcc dot gnu.org> ---
(In reply to GCC Commits from comment #10)
> The master branch has been updated by Roger Sayle <[email protected]>:
> 
> https://gcc.gnu.org/g:1a06a37611e3b27889c595a17df13f6d27202a95
> 
> commit r17-383-g1a06a37611e3b27889c595a17df13f6d27202a95
> Author: Roger Sayle <[email protected]>
> Date:   Thu May 7 18:46:37 2026 +0100
> 
>     PR middle-end/122871: Doubleword multiplication improvements
>     
>     This patch resolves PR middle-end/122871 by improving RTL expansion of
>     doubleword multiplications.  The main change is to synth_mult adding
>     support for the case where the constant being multiplied has
> BITS_PER_WORD
>     or more trailing zeros.  The shift_cost tables in expmed are only
>     parameterized for shifts less than BITS_PER_WORD, so doubleword shifts
>     by more than this can't use the usual code path. This patch teaches
>     synth_mult that for scalar doubleword multiplications, a doubleword shift
>     by more than BITS_PER_WORD typically requires two instructions; one to
>     set the result lowpart to zero, and the other a wordmode shift to
>     calculate the result highpart.
>     
>     For the testcase given in the PR:
>     
>     long long ashll_fn (long long a)
>     {
>       long long c;
>     
>       c = a << 33;
>       c += a;
>       return c;
>     }
>     
>     GCC for arm-linux-gnueabihf currently generates with -O2:
>     
>     ashll_fn:
>             lsl     r2, r1, #11
>             lsl     ip, r0, #11
>             subs    ip, ip, r0
>             orr     r2, r2, r0, lsr #21
>             sbc     r2, r2, r1
>             lsl     r3, ip, #11
>             lsl     r2, r2, #11
>             adds    r3, r3, r0
>             orr     r2, r2, ip, lsr #21
>             adc     r1, r1, r2
>             lsl     r2, r1, #11
>             lsl     r0, r3, #11
>             adds    r0, r3, r0
>             orr     r2, r2, r3, lsr #21
>             adc     r1, r1, r2
>             bx      lr
>     
>     with this patch, we instead generate:
>     
>     ashll_fn:
>             add     r1, r1, r0, lsl #1
>             bx      lr
>     
>     Additionally, this patch includes a clean-up (identified by A. Pinski)
>     to prevent RTL expansion of doubleword multiplications from
>     initially emitting multiply instructions by immediate constants 0, 1
>     or 2.  These dubious multiplications eventually get tidied up by later
>     RTL optimization passes, but being sensible during RTL expansion
>     both speeds up the compiler and reduces unnecessary memory usage.
>     
>     2026-05-07  Roger Sayle  <[email protected]>
>     
>     gcc/ChangeLog
>             PR middle-end/122871
>             * expmed.cc (synth_mult): Handle doubleword left shifts by
>             BITS_PER_WORD bits or more, for scalar modes.
>             * optabs.cc (expand_doubleword_mult): Avoid generating multiply
>             instructions by immediate constants 0, 1 or 2.
>     
>     gcc/testsuite/ChangeLog
>             PR middle-end/122871
>             * gcc.target/arm/muldi-1.c: New test case.

The new test case fail for Cortex-M0 and Cortex-M23. Is this a thumb2-only
improvement?

For Cortex-M0, I get:

$ /build/r17-409-g8376a674e3564f/bin/arm-none-eabi-gcc
/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c -mthumb -march=armv6s-m
-mtune=cortex-m0 -mfloat-abi=soft -mfpu=auto -fdiagnostics-plain-output -O2
-ffat-lto-objects -fno-ident -S -o - -dP
        .arch armv6s-m
        .fpu softvfp
        .eabi_attribute 20, 1
        .eabi_attribute 21, 1
        .eabi_attribute 23, 3
        .eabi_attribute 24, 1
        .eabi_attribute 25, 1
        .eabi_attribute 26, 1
        .eabi_attribute 30, 2
        .eabi_attribute 34, 0
        .eabi_attribute 18, 4
        .file   "muldi-1.c"
        .text
        .align  1
        .p2align 2,,3
        .global ashll_fn
        .syntax unified
        .code   16
        .thumb_func
        .type   ashll_fn, %function
ashll_fn:
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
@(insn 23 3 38 (set (reg:SI 3 r3 [ a+4 ])
@        (reg:SI 0 r0 [orig:107 a ] [107]))
"/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":9:5 743
{*thumb1_movsi_insn}
@     (nil))
@ 0x0000
        movs    r3, r0  @ 23    [c=4 l=2]  *thumb1_movsi_insn/0
@(insn 32 38 22 (unspec:SI [
@            (reg/f:SI 13 sp)
@        ] UNSPEC_REGISTER_USE)
"/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":11:1 403
{force_register_use}
@     (nil))
@ 0x0002
        @ sp needed     @ 32    [c=8 l=0]  force_register_use
@(insn 22 32 11 (set (reg:SI 2 r2 [orig:99 a ] [99])
@        (const_int 0 [0]))
"/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":9:5 743
{*thumb1_movsi_insn}
@     (nil))
@ 0x0002
        movs    r2, #0  @ 22    [c=4 l=2]  *thumb1_movsi_insn/1
@(insn 11 22 12 (parallel [
@            (set (reg:DI 2 r2 [102])
@                (plus:DI (reg:DI 2 r2 [orig:99 a ] [99])
@                    (reg:DI 0 r0 [orig:107 a ] [107])))
@            (clobber (reg:CC 80 cc))
@        ]) "/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":9:5 718
{*thumb1_adddi3}
@     (expr_list:REG_UNUSED (reg:CC 80 cc)
@        (expr_list:REG_EQUAL (mult:DI (reg:DI 0 r0 [orig:107 a ] [107])
@                (const_int 4294967297 [0x100000001]))
@            (nil))))
@ 0x0004
        adds    r2, r2, r0      @ 11    [c=4 l=4]  *thumb1_adddi3
        adcs    r3, r3, r1
@(insn 12 11 13 (parallel [
@            (set (reg:DI 2 r2 [103])
@                (plus:DI (reg:DI 2 r2 [102])
@                    (reg:DI 2 r2 [102])))
@            (clobber (reg:CC 80 cc))
@        ]) "/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":9:5 718
{*thumb1_adddi3}
@     (expr_list:REG_UNUSED (reg:CC 80 cc)
@        (nil)))
@ 0x0008
        adds    r2, r2, r2      @ 12    [c=4 l=4]  *thumb1_adddi3
        adcs    r3, r3, r3
@(insn 13 12 28 (parallel [
@            (set (reg:DI 2 r2 [103])
@                (minus:DI (reg:DI 2 r2 [103])
@                    (reg:DI 0 r0 [orig:107 a ] [107])))
@            (clobber (reg:CC 80 cc))
@        ]) "/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":9:5 720
{*thumb_subdi3}
@     (expr_list:REG_DEAD (reg:DI 0 r0 [orig:107 a ] [107])
@        (expr_list:REG_DEAD (reg:DI 0 r0 [orig:107 a ] [107])
@            (expr_list:REG_UNUSED (reg:CC 80 cc)
@                (expr_list:REG_EQUAL (mult:DI (reg:DI 0 r0 [orig:107 a ]
[107])
@                        (const_int 8589934593 [0x200000001]))
@                    (nil))))))
@ 0x000c
        subs    r2, r2, r0      @ 13    [c=4 l=4]  *thumb_subdi3
        sbcs    r3, r3, r1
@(insn 28 13 29 (set (reg:SI 0 r0 [orig:104 _5 ] [104])
@        (reg:SI 2 r2 [103]))
"/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":9:5 743
{*thumb1_movsi_insn}
@     (expr_list:REG_DEAD (reg:SI 2 r2 [103])
@        (nil)))
@ 0x0010
        movs    r0, r2  @ 28    [c=4 l=2]  *thumb1_movsi_insn/0
@(insn 29 28 19 (set (reg:SI 1 r1 [ _5+4 ])
@        (reg:SI 3 r3 [+4 ]))
"/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":9:5 743
{*thumb1_movsi_insn}
@     (expr_list:REG_DEAD (reg:SI 3 r3 [+4 ])
@        (nil)))
@ 0x0012
        movs    r1, r3  @ 29    [c=4 l=2]  *thumb1_movsi_insn/0
@(jump_insn 34 33 35 (unspec_volatile [
@            (return)
@        ] VUNSPEC_EPILOGUE)
"/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":11:1 779
{*epilogue_insns}
@     (nil)
@ -> return)
@ 0x0014
        bx      lr
        .size   ashll_fn, .-ashll_fn

Reply via email to