https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88714
--- Comment #16 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
Some more progress.
I've used
--- gcc/combine.c.jj 2019-01-10 11:43:17.050333949 +0100
+++ gcc/combine.c 2019-01-15 14:47:28.009094300 +0100
@@ -2319,6 +2319,9 @@ contains_muldiv (rtx x)
}
}
+int cxcnt = -1;
+int cxcurcnt = 0;
+
/* Determine whether INSN can be used in a combination. Return nonzero if
not. This is used in try_combine to detect early some cases where we
can't perform combinations. */
@@ -2361,7 +2364,8 @@ cant_combine_insn_p (rtx_insn *insn)
#endif
|| (HARD_REGISTER_P (dest)
&& ! TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dest))
- && targetm.class_likely_spilled_p (REGNO_REG_CLASS (REGNO
(dest))))))
+ && (targetm.class_likely_spilled_p (REGNO_REG_CLASS (REGNO
(dest)))
+ || (getenv ("COMBINE_FIRST") && cxcurcnt == cxcnt)))))
return 1;
return 0;
@@ -14993,6 +14997,12 @@ make_more_copies (void)
{
basic_block bb;
+ if (cxcnt == -1 && getenv ("COMBINE_CNT"))
+ cxcnt = atoi (getenv ("COMBINE_CNT"));
+ ++cxcurcnt;
+ if (getenv ("COMBINE_SECOND") && cxcurcnt == cxcnt)
+ return;
+
FOR_EACH_BB_FN (bb, cfun)
{
rtx_insn *insn;
hack to undo both or any one of the two changes r265398 did on the function of
my choice (initialy for binary search I was using cxcurcnt >= cxcnt instead of
cxcurcnt == cxcnt in the two spots), and found that with
COMBINE_CNT=74 COMBINE_FIRST=1 COMBINE_SECOND=1
sort.i works as in stage1, so it is
_ZL21vn_reference_lookup_2P6ao_refP9tree_nodejPv that actually matters.
COMBINE_CNT=74 COMBINE_SECOND=1 generates the same (good assembly) as
COMBINE_CNT=74 COMBINE_FIRST=1 COMBINE_SECOND=1, while
COMBINE_CNT=74 COMBINE_FIRST=1 doesn't work the same as COMBINE_CNT=200.
The "bad" to "good" assembly difference is:
.type _ZL21vn_reference_lookup_2P6ao_refP9tree_nodejPv, %function
_ZL21vn_reference_lookup_2P6ao_refP9tree_nodejPv:
.fnstart
@ args = 0, pretend = 0, frame = 8
@ frame_needed = 0, uses_anonymous_args = 0
movw r0, #:lower16:global_options
- mov ip, r1
- movt r0, #:upper16:global_options
push {r4, r5, r6, lr}
.save {r4, r5, r6, lr}
- ldr r0, [r0, #88]
+ movt r0, #:upper16:global_options
+ mov r5, r3
.pad #8
sub sp, sp, #8
- str r3, [sp]
- ldr r1, [r0, #540]
- cmp r1, r2
+ ldr r3, [r0, #88]
+ str r5, [sp]
+ ldr r3, [r3, #540]
+ cmp r3, r2
bcc .L2103
- movw r5, #:lower16:.LANCHOR1
- mov r4, r3
- movt r5, #:upper16:.LANCHOR1
- ldr r3, [r5, #176]
+ movw r4, #:lower16:.LANCHOR1
+ mov ip, r1
+ movt r4, #:upper16:.LANCHOR1
+ ldr r3, [r4, #176]
cmp r3, #0
- strne ip, [r3]
- ldr r3, [r4, #12]
+ strne r1, [r3]
+ ldr r3, [r5, #12]
cmp r3, #0
ldrne r2, [r3, #4]
- ldrne r3, [r4, #8]
+ ldrne r3, [r5, #8]
subne r3, r3, r2
- strne r3, [r4, #8]
- cmp ip, #0
+ strne r3, [r5, #8]
+ cmp r1, #0
beq .L2104
- ldr r6, [r5, #12]
+ ldr r6, [r4, #12]
b .L2101
.L2127:
- ldr ip, [r2, #4]
+ ldr ip, [r3, #4]
.L2099:
- ldr r3, [r5, #8]
+ ldr r3, [r4, #8]
cmp r3, ip
beq .L2125
ldrb r3, [ip, #3] @ zero_extendqisi2
tst r3, #2
beq .L2126
.L2101:
ldr r2, [ip, #4]
add r1, sp, #4
mov r0, r6
str ip, [sp, #4]
bl
_ZN10hash_tableI17vn_ssa_aux_hasher11xcallocatorE14find_with_hashERKP9tree_nodej
- ldr r2, [r0]
- cmp r2, #0
+ ldr r3, [r0]
+ cmp r3, #0
beq .L2098
- ldrb r3, [r2, #16] @ zero_extendqisi2
- tst r3, #1
+ ldrb r2, [r3, #16] @ zero_extendqisi2
+ tst r2, #1
bne .L2127
.L2098:
ldr ip, [sp, #4]
b .L2099
.L2126:
- ldr r1, [sp]
+ ldr r3, [sp]
.L2097:
- ldrd r2, [r1, #8]
- str ip, [r4, #12]
- ldr r0, [r5, #28]
- cmp r3, #0
- ldrne r3, [r3, #4]
+ str ip, [r5, #12]
+ ldr r1, [r3, #12]
+ ldr r2, [r3, #8]
+ ldr r0, [r4, #28]
+ cmp r1, #0
+ ldrne r1, [r1, #4]
ldr r0, [r0, #8]
- addne r2, r2, r3
- mov r3, #0
- strne r2, [r1, #8]
+ addne r2, r2, r1
mov r1, sp
+ strne r2, [r3, #8]
+ mov r3, #0
bl
_ZN10hash_tableI19vn_reference_hasher11xcallocatorE19find_slot_with_hashERKP14vn_reference_sj13insert_option
cmp r0, #0
ldrne r0, [r0]
.L2093:
add sp, sp, #8
@ sp needed
pop {r4, r5, r6, pc}
.L2103:
mvn r0, #0
add sp, sp, #8
@ sp needed
pop {r4, r5, r6, pc}
.L2104:
- mov r1, r4
+ mov r3, r5
b .L2097
.L2125:
movw r2, #:lower16:.LC42
movw r0, #:lower16:.LC3
movt r2, #:upper16:.LC42
movt r0, #:upper16:.LC3
movw r1, #481
bl _Z11fancy_abortPKciS0_
.fnend
.size _ZL21vn_reference_lookup_2P6ao_refP9tree_nodejPv,
.-_ZL21vn_reference_lookup_2P6ao_refP9tree_nodejPv