https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90257
--- Comment #15 from Jakub Jelinek <jakub at gcc dot gnu.org> --- I have tried: --- gcc/cfgrtl.c (revision 270605) +++ gcc/cfgrtl.c (working copy) @@ -557,7 +557,8 @@ flow_active_insn_p (const rtx_insn *insn keep the return value from being live across the entire function. If we allow it to be skipped, we introduce the possibility for register lifetime confusion. */ - if (GET_CODE (PATTERN (insn)) == CLOBBER + if ((GET_CODE (PATTERN (insn)) == CLOBBER + || GET_CODE (PATTERN (insn)) == USE) && REG_P (XEXP (PATTERN (insn), 0)) && REG_FUNCTION_VALUE_P (XEXP (PATTERN (insn), 0))) return true; --- gcc/lra-spills.c (revision 270605) +++ gcc/lra-spills.c (working copy) @@ -740,6 +740,7 @@ lra_final_code_change (void) int i, hard_regno; basic_block bb; rtx_insn *insn, *curr; + rtx set; int max_regno = max_reg_num (); for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) @@ -818,5 +819,19 @@ lra_final_code_change (void) } if (insn_change_p) lra_update_operator_dups (id); + + if ((set = single_set (insn)) != NULL + && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set)) + && REGNO (SET_SRC (set)) == REGNO (SET_DEST (set))) + { + /* Remove an useless move insn. IRA can generate move + insns involving pseudos. It is better remove them + earlier to speed up compiler a bit. It is also + better to do it here as they might not pass final RTL + check in LRA, (e.g. insn moving a control register + into itself). */ + lra_invalidate_insn_data (insn); + delete_insn (insn); + } } } on trunk, passed bootstrap/regtest and compared bootstrap with just the lra-spills.c change. Besides cfgrtl.o (expected) there were changes in dwarf2asm.o gimple-ssa-evrp.o ipa-utils.o reload1.o reload.o tree-data-ref.o tree.o tree-scalar-evolution.o between the 2 stage3 gcc/ directories, but in some cases changes in debug info only, in other cases mostly changes like: --- tree-scalar-evolution.s_ 2019-04-29 15:24:02.029836995 +0200 +++ tree-scalar-evolution.s 2019-04-29 15:26:22.216589907 +0200 @@ -6537,7 +6537,30 @@ _ZL18find_var_scev_infoP15basic_block_de jmp .L1172 .p2align 4,,10 .p2align 3 +.L1187: + movl %r9d, 36(%rbp) +.L1176: + addq $24, %rsp + .cfi_remember_state + .cfi_def_cfa_offset 56 + addq $8, %rax + popq %rbx + .cfi_def_cfa_offset 48 + popq %rbp + .cfi_def_cfa_offset 40 + popq %r12 + .cfi_def_cfa_offset 32 + popq %r13 + .cfi_def_cfa_offset 24 + popq %r14 + .cfi_def_cfa_offset 16 + popq %r15 + .cfi_def_cfa_offset 8 + ret + .p2align 4,,10 + .p2align 3 .L1177: + .cfi_restore_state movq %rcx, %r12 .p2align 4,,10 .p2align 3 @@ -6560,28 +6583,10 @@ _ZL18find_var_scev_infoP15basic_block_de movl 84(%r8), %edx movl %edx, 4(%rax) movq %rax, (%r12) -.L1176: - addq $24, %rsp - .cfi_remember_state - .cfi_def_cfa_offset 56 - addq $8, %rax - popq %rbx - .cfi_def_cfa_offset 48 - popq %rbp - .cfi_def_cfa_offset 40 - popq %r12 - .cfi_def_cfa_offset 32 - popq %r13 - .cfi_def_cfa_offset 24 - popq %r14 - .cfi_def_cfa_offset 16 - popq %r15 - .cfi_def_cfa_offset 8 - ret + jmp .L1176 .p2align 4,,10 .p2align 3 .L1170: - .cfi_restore_state movl %r9d, 36(%rbp) testq %r12, %r12 je .L1177 @@ -6597,11 +6602,6 @@ _ZL18find_var_scev_infoP15basic_block_de movq 8(%rbp), %rsi movq 8(%rsp), %r8 jmp .L1163 - .p2align 4,,10 - .p2align 3 -.L1187: - movl %r9d, 36(%rbp) - jmp .L1176 .cfi_endproc .section .text.unlikely .cfi_startproc wgere just some bbs are moved around, but the *.s length is the same. In gimple-ssa-evrp.s there is a one byte difference in *.s size, guess just different label numbers plus reordering. The only larger change is ipa-utils.s, without the cfgrtl.c hunk 96603 bytes, with that change 96479 bytes of assembly, but just 18038 vs. 18022 bytes of .text.