https://gcc.gnu.org/g:bf6f77edd625cfe2f2f164e90437df318b96527f
commit r15-5938-gbf6f77edd625cfe2f2f164e90437df318b96527f Author: Georg-Johann Lay <a...@gjlay.de> Date: Thu Dec 5 11:24:30 2024 +0100 AVR: target/107957 - Propagate zero_reg to store sources. When -msplit-ldst is on, it may be possible to propagate __zero_reg__ to the sources of the new stores. For example, without this patch, unsigned long lx; void store_lsr17 (void) { lx >>= 17; } compiles to: store_lsr17: lds r26,lx+2 ; movqi_insn lds r27,lx+3 ; movqi_insn movw r24,r26 ; *movhi lsr r25 ; *lshrhi3_const ror r24 ldi r26,0 ; movqi_insn ldi r27,0 ; movqi_insn sts lx,r24 ; movqi_insn sts lx+1,r25 ; movqi_insn sts lx+2,r26 ; movqi_insn sts lx+3,r27 ; movqi_insn ret but with this patch it becomes: store_lsr17: lds r26,lx+2 ; movqi_insn lds r27,lx+3 ; movqi_insn movw r24,r26 ; *movhi lsr r25 ; *lshrhi3_const ror r24 sts lx,r24 ; movqi_insn sts lx+1,r25 ; movqi_insn sts lx+2,__zero_reg__ ; movqi_insn sts lx+3,__zero_reg__ ; movqi_insn ret gcc/ PR target/107957 * config/avr/avr-passes-fuse-move.h (bbinfo_t) <try_mem0_p>: Add static property. * config/avr/avr-passes.cc (bbinfo_t::try_mem0_p): Define it. (optimize_data_t::try_mem0): New method. (bbinfo_t::optimize_one_block) [bbinfo_t::try_mem0_p]: Run try_mem0. (bbinfo_t::optimize_one_function): Set bbinfo_t::try_mem0_p. * config/avr/avr.md (pushhi1_insn): Also allow zero as source. (define_split) [avropt_split_ldst]: Only run avr_split_ldst() when avr-fuse-move has been run at least once. * doc/invoke.texi (AVR Options) <-msplit-ldst>: Document it. Diff: --- gcc/config/avr/avr-passes-fuse-move.h | 1 + gcc/config/avr/avr-passes.cc | 49 ++++++++++++++++++++++++++++++++++- gcc/config/avr/avr.md | 9 +++++-- gcc/doc/invoke.texi | 9 +++++-- 4 files changed, 63 insertions(+), 5 deletions(-) diff --git a/gcc/config/avr/avr-passes-fuse-move.h b/gcc/config/avr/avr-passes-fuse-move.h index dbed1a636f3d..432f9ca4670f 100644 --- a/gcc/config/avr/avr-passes-fuse-move.h +++ b/gcc/config/avr/avr-passes-fuse-move.h @@ -1172,6 +1172,7 @@ struct bbinfo_t static find_plies_data_t *fpd; static bool try_fuse_p; + static bool try_mem0_p; static bool try_bin_arg1_p; static bool try_simplify_p; static bool try_split_ldi_p; diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc index de8de1cd2e8a..fad64b1b3454 100644 --- a/gcc/config/avr/avr-passes.cc +++ b/gcc/config/avr/avr-passes.cc @@ -434,6 +434,11 @@ static machine_mode size_to_mode (int size) Split all insns where the operation can be performed on individual bytes, like andsi3. In example (4) the andhi3 can be optimized to an andqi3. + + bbinfo_t::try_mem0_p + Try to fuse a mem = reg insn to mem = __zero_reg__. + This should only occur when -msplit-ldst is on, but may + also occur with pushes since push<mode>1 splits them. */ @@ -514,6 +519,7 @@ bool bbinfo_t::try_split_any_p; bool bbinfo_t::try_simplify_p; bool bbinfo_t::use_arith_p; bool bbinfo_t::use_set_some_p; +bool bbinfo_t::try_mem0_p; // Abstract Interpretation of expressions. @@ -1087,6 +1093,7 @@ struct optimize_data_t {} bool try_fuse (bbinfo_t *); + bool try_mem0 (bbinfo_t *); bool try_bin_arg1 (bbinfo_t *); bool try_simplify (bbinfo_t *); bool try_split_ldi (bbinfo_t *); @@ -2509,6 +2516,44 @@ bbinfo_t::run_find_plies (const insninfo_t &ii, const memento_t &memo) const } +// Try to propagate __zero_reg__ to a mem = reg insn's source. +// Returns true on success and sets .n_new_insns. +bool +optimize_data_t::try_mem0 (bbinfo_t *) +{ + rtx_insn *insn = curr.ii.m_insn; + rtx set, mem, reg; + machine_mode mode; + + if (insn + && (set = single_set (insn)) + && MEM_P (mem = SET_DEST (set)) + && REG_P (reg = SET_SRC (set)) + && GET_MODE_SIZE (mode = GET_MODE (mem)) <= 4 + && END_REGNO (reg) <= REG_32 + && ! (regmask (reg) & memento_t::fixed_regs_mask) + && curr.regs.have_value (REGNO (reg), GET_MODE_SIZE (mode), 0x0)) + { + avr_dump (";; Found insn %d: mem:%m = 0 = r%d\n", INSN_UID (insn), + mode, REGNO (reg)); + + // Some insns like PUSHes don't clobber REG_CC. + bool clobbers_cc = GET_CODE (PATTERN (insn)) == PARALLEL; + + if (clobbers_cc) + emit_valid_move_clobbercc (mem, CONST0_RTX (mode)); + else + emit_valid_insn (gen_rtx_SET (mem, CONST0_RTX (mode))); + + n_new_insns = 1; + + return true; + } + + return false; +} + + // Try to fuse two 1-byte insns .prev and .curr to one 2-byte insn (MOVW). // Returns true on success, and sets .n_new_insns, .ignore_mask etc. bool @@ -3108,7 +3153,8 @@ bbinfo_t::optimize_one_block (bool &changed) || (bbinfo_t::try_bin_arg1_p && od.try_bin_arg1 (this)) || (bbinfo_t::try_simplify_p && od.try_simplify (this)) || (bbinfo_t::try_split_ldi_p && od.try_split_ldi (this)) - || (bbinfo_t::try_split_any_p && od.try_split_any (this))); + || (bbinfo_t::try_split_any_p && od.try_split_any (this)) + || (bbinfo_t::try_mem0_p && od.try_mem0 (this))); rtx_insn *new_insns = get_insns (); end_sequence (); @@ -3193,6 +3239,7 @@ bbinfo_t::optimize_one_function (function *func) // Which optimization(s) to perform. bbinfo_t::try_fuse_p = avropt_fuse_move & 0x1; // Digit 0 in [0, 1]. + bbinfo_t::try_mem0_p = avropt_fuse_move & 0x1; // Digit 0 in [0, 1]. bbinfo_t::try_bin_arg1_p = avropt_fuse_move & 0x2; // Digit 1 in [0, 1]. bbinfo_t::try_split_any_p = avropt_fuse_move & 0x4; // Digit 2 in [0, 1]. bbinfo_t::try_split_ldi_p = avropt_fuse_move >> 3; // Digit 3 in [0, 2]. diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 3683001200da..9c348be7c48a 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -450,9 +450,11 @@ (define_insn "pushhi1_insn" [(set (mem:HI (post_dec:HI (reg:HI REG_SP))) - (match_operand:HI 0 "register_operand" "r"))] + (match_operand:HI 0 "reg_or_0_operand" "r,Y00"))] "" - "push %B0\;push %A0" + "@ + push %B0\;push %A0 + push __zero_reg__\;push __zero_reg__" [(set_attr "length" "2")]) ;; All modes for a multi-byte push. We must include complex modes here too, @@ -1029,6 +1031,9 @@ // provided non-volatile, addr-space = generic, no reg-overlap // and the resulting addressings are natively supported. if (avropt_split_ldst + // Splitting too early may obfuscate some PRE_DEC / POST_INC + // opportunities, thus only split after avr-fuse-add. + && n_avr_fuse_add_executed > 0 && GET_MODE_SIZE (<MODE>mode) > 1 && avr_split_ldst (operands)) DONE; diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 4b1acf9b79c1..78ead0e494e1 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -904,9 +904,9 @@ Objective-C and Objective-C++ Dialects}. -mbranch-cost=@var{cost} -mfuse-add=@var{level} -mfuse-move=@var{level} -mcall-prologues -mgas-isr-prologues -mint8 -mflmap -mdouble=@var{bits} -mlong-double=@var{bits} --mn_flash=@var{size} -mno-interrupts +-mn_flash=@var{size} -mfract-convert-truncate -mno-interrupts -mmain-is-OS_task -mrelax -mrmw -mstrict-X -mtiny-stack --mrodata-in-ram -mfract-convert-truncate -msplit-bit-shift +-mrodata-in-ram -msplit-bit-shift -msplit-ldst -mshort-calls -mskip-bug -nodevicelib -nodevicespecs -Waddr-space-convert -Wmisspelled-isr} @@ -24374,6 +24374,11 @@ This optimization is turned on per default for @option{-O2} and higher, including @option{-Os} but excluding @option{-Oz}. Splitting of shifts with a constant offset that is a multiple of 8 is controlled by @option{-mfuse-move}. +@opindex msplit-ldst + +@item -msplit-ldst +Split multi-byte loads and stores into several byte loads and stores. +This optimization is turned on per default for @option{-O2} and higher. @opindex mtiny-stack @item -mtiny-stack