On Fri, Apr 17, 2020 at 02:12:19PM +0200, Uros Bizjak wrote: > Is it possible to perform widening only for !TARGET_PARTIAL_REG_STALL? > I'm worried that highpart may not be cleared, so the effects of > partial reg stall can be quite noticeable.
So, like this then? All I've added was another condition, interdiff: diff -u gcc/config/i386/i386.md gcc/config/i386/i386.md --- gcc/config/i386/i386.md 2020-04-16 14:21:41.686972979 +0200 +++ gcc/config/i386/i386.md 2020-04-17 14:29:33.882690098 +0200 @@ -8750,6 +8750,18 @@ == GET_MODE_PRECISION (GET_MODE (operands[2])) && !register_operand (operands[2], GET_MODE (operands[2]))) + /* And we shouldn't widen if + TARGET_PARTIAL_REG_STALL. */ + || (TARGET_PARTIAL_REG_STALL + && (INTVAL (operands[3]) + INTVAL (operands[4]) + >= (paradoxical_subreg_p (operands[2]) + && (GET_MODE_CLASS + (GET_MODE (SUBREG_REG (operands[2]))) + == MODE_INT) + ? GET_MODE_PRECISION + (GET_MODE (SUBREG_REG (operands[2]))) + : GET_MODE_PRECISION + (GET_MODE (operands[2]))))) ? CCZmode : CCNOmode)" "#" "&& 1" and nothing in the splitter would need to change because the condition would ensure that for TARGET_PARTIAL_REG_STALL we require in all problematic cases CCZmode. The only exception would be the pos + len == 8 optimization which we wouldn't perform (of course with CCZmode we'd still do), but that isn't a partial reg stall thing, that is an optimization to use shorter testb instead of testw. 2020-04-17 Jakub Jelinek <ja...@redhat.com> Jeff Law <l...@redhat.com> PR target/94567 * config/i386/i386.md (*testqi_ext_3): Use CCZmode rather than CCNOmode in ix86_match_ccmode if len is equal to <MODE>mode precision, or pos + len >= 32, or pos + len is equal to operands[2] precision and operands[2] is not a register operand. During splitting perform SImode AND if operands[0] doesn't have CCZmode and pos + len is equal to mode precision. * gcc.c-torture/execute/pr94567.c: New test. --- gcc/config/i386/i386.md.jj 2020-04-17 08:49:33.236921107 +0200 +++ gcc/config/i386/i386.md 2020-04-17 14:29:33.882690098 +0200 @@ -8730,10 +8730,38 @@ (define_insn_and_split "*testqi_ext_3" && INTVAL (operands[3]) > 32 && INTVAL (operands[3]) + INTVAL (operands[4]) == 64)) && ix86_match_ccmode (insn, - /* *testdi_1 requires CCZmode if the mask has bit + /* If zero_extract mode precision is the same + as len, the SF of the zero_extract + comparison will be the most significant + extracted bit, but this could be matched + after splitting only for pos 0 len all bits + trivial extractions. Require CCZmode. */ + (GET_MODE_PRECISION (<MODE>mode) + == INTVAL (operands[3])) + /* Otherwise, require CCZmode if we'd use a mask + with the most significant bit set and can't + widen it to wider mode. *testdi_1 also + requires CCZmode if the mask has bit 31 set and all bits above it clear. */ - GET_MODE (operands[2]) == DImode - && INTVAL (operands[3]) + INTVAL (operands[4]) == 32 + || (INTVAL (operands[3]) + INTVAL (operands[4]) + >= 32) + /* We can't widen also if val is not a REG. */ + || (INTVAL (operands[3]) + INTVAL (operands[4]) + == GET_MODE_PRECISION (GET_MODE (operands[2])) + && !register_operand (operands[2], + GET_MODE (operands[2]))) + /* And we shouldn't widen if + TARGET_PARTIAL_REG_STALL. */ + || (TARGET_PARTIAL_REG_STALL + && (INTVAL (operands[3]) + INTVAL (operands[4]) + >= (paradoxical_subreg_p (operands[2]) + && (GET_MODE_CLASS + (GET_MODE (SUBREG_REG (operands[2]))) + == MODE_INT) + ? GET_MODE_PRECISION + (GET_MODE (SUBREG_REG (operands[2]))) + : GET_MODE_PRECISION + (GET_MODE (operands[2]))))) ? CCZmode : CCNOmode)" "#" "&& 1" @@ -8750,7 +8778,10 @@ (define_insn_and_split "*testqi_ext_3" /* Narrow paradoxical subregs to prevent partial register stalls. */ if (GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode) - && GET_MODE_CLASS (submode) == MODE_INT) + && GET_MODE_CLASS (submode) == MODE_INT + && (GET_MODE (operands[0]) == CCZmode + || pos + len < GET_MODE_PRECISION (submode) + || REG_P (SUBREG_REG (val)))) { val = SUBREG_REG (val); mode = submode; @@ -8758,14 +8789,32 @@ (define_insn_and_split "*testqi_ext_3" } /* Small HImode tests can be converted to QImode. */ - if (register_operand (val, HImode) && pos + len <= 8) + if (pos + len <= 8 + && register_operand (val, HImode)) { - val = gen_lowpart (QImode, val); - mode = QImode; + rtx nval = gen_lowpart (QImode, val); + if (!MEM_P (nval) + || GET_MODE (operands[0]) == CCZmode + || pos + len < 8) + { + val = nval; + mode = QImode; + } } gcc_assert (pos + len <= GET_MODE_PRECISION (mode)); + /* If the mask is going to have the sign bit set in the mode + we want to do the comparison in and user isn't interested just + in the zero flag, then we must widen the target mode. */ + if (pos + len == GET_MODE_PRECISION (mode) + && GET_MODE (operands[0]) != CCZmode) + { + gcc_assert (pos + len < 32 && !MEM_P (val)); + mode = SImode; + val = gen_lowpart (mode, val); + } + wide_int mask = wi::shifted_mask (pos, len, false, GET_MODE_PRECISION (mode)); --- gcc/testsuite/gcc.c-torture/execute/pr94567.c.jj 2020-04-17 14:19:34.610683856 +0200 +++ gcc/testsuite/gcc.c-torture/execute/pr94567.c 2020-04-17 14:19:34.610683856 +0200 @@ -0,0 +1,26 @@ +/* PR target/94567 */ + +volatile int a = 1, b; +short c, d = 4, f = 2, g; +unsigned short e = 53736; + +int +foo (int i, int j) +{ + return i && j ? 0 : i + j; +} + +int +main () +{ + for (; a; a = 0) + { + unsigned short k = e; + g = k >> 3; + if (foo (g < (f || c), b)) + d = 0; + } + if (d != 4) + __builtin_abort (); + return 0; +} Jakub