On 2022/06/11 9:12, Max Filippov wrote:
Hi Suwa-san,
hi!
This change results in a bunch of ICEs in tests that look like this: gcc/gcc/testsuite/gcc.c-torture/compile/memtst.c: In function 'main': gcc/gcc/testsuite/gcc.c-torture/compile/memtst.c:28:1: error: unrecognizable insn: (insn 7 6 8 2 (set (reg:SI 45) (plus:SI (reg:SI 44) (const_int 262144 [0x40000])))
oh, what a my mistake... it's so RISCy! int array[65535]; void test(void) { __builtin_memset(array, 0, sizeof(array)); } .literal_position .literal .LC0, array .literal .LC2, 65535 test: l32r a3, .LC0 l32r a2, .LC2 movi.n a4, 0 loop a2, .L2_LEND .L2: s32i.n a4, a3, 0 addi.n a3, a3, 4 .L2_LEND: ret.n --- gcc/config/xtensa/xtensa.cc | 71 ++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 21 deletions(-) diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc index c7b54babc37..bc3330f836f 100644 --- a/gcc/config/xtensa/xtensa.cc +++ b/gcc/config/xtensa/xtensa.cc @@ -1483,7 +1483,7 @@ xtensa_expand_block_set_unrolled_loop (rtx *operands) int xtensa_expand_block_set_small_loop (rtx *operands) { - HOST_WIDE_INT bytes, value, align; + HOST_WIDE_INT bytes, value, align, count; int expand_len, funccall_len; rtx x, dst, end, reg; machine_mode unit_mode; @@ -1503,17 +1503,25 @@ xtensa_expand_block_set_small_loop (rtx *operands) /* Totally-aligned block only. */ if (bytes % align != 0) return 0; + count = bytes / align; - /* If 4-byte aligned, small loop substitution is almost optimal, thus - limited to only offset to the end address for ADDI/ADDMI instruction. */ - if (align == 4 - && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) - return 0; + /* If the Loop Option (zero-overhead looping) is configured and active, + almost no restrictions about the length of the block. */ + if (! (TARGET_LOOPS && optimize)) + { + /* If 4-byte aligned, small loop substitution is almost optimal, + thus limited to only offset to the end address for ADDI/ADDMI + instruction. */ + if (align == 4 + && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) + return 0; - /* If no 4-byte aligned, loop count should be treated as the constraint. */ - if (align != 4 - && bytes / align > ((optimize > 1 && !optimize_size) ? 8 : 15)) - return 0; + /* If no 4-byte aligned, loop count should be treated as the + constraint. */ + if (align != 4 + && count > ((optimize > 1 && !optimize_size) ? 8 : 15)) + return 0; + } /* Insn expansion: holding the init value. Either MOV(.N) or L32R w/litpool. */ @@ -1523,16 +1531,33 @@ xtensa_expand_block_set_small_loop (rtx *operands) expand_len = TARGET_DENSITY ? 2 : 3; else expand_len = 3 + 4; - /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ - expand_len += bytes > 127 ? 3 - : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; - - /* Insn expansion: the loop body and branch instruction. - For store, one of S8I, S16I or S32I(.N). - For advance, ADDI(.N). - For branch, BNE. */ - expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) - + (TARGET_DENSITY ? 2 : 3) + 3; + if (TARGET_LOOPS && optimize) /* zero-overhead looping */ + { + /* Insn translation: Either MOV(.N) or L32R w/litpool for the + loop count. */ + expand_len += xtensa_simm12b (count) ? xtensa_sizeof_MOVI (count) + : 3 + 4; + /* Insn translation: LOOP, the zero-overhead looping setup + instruction. */ + expand_len += 3; + /* Insn expansion: the loop body instructions. + For store, one of S8I, S16I or S32I(.N). + For advance, ADDI(.N). */ + expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) + + (TARGET_DENSITY ? 2 : 3); + } + else /* NO zero-overhead looping */ + { + /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ + expand_len += bytes > 127 ? 3 + : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; + /* Insn expansion: the loop body and branch instruction. + For store, one of S8I, S16I or S32I(.N). + For advance, ADDI(.N). + For branch, BNE. */ + expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) + + (TARGET_DENSITY ? 2 : 3) + 3; + } /* Function call: preparing two arguments. */ funccall_len = xtensa_sizeof_MOVI (value); @@ -1555,7 +1580,11 @@ xtensa_expand_block_set_small_loop (rtx *operands) dst = gen_reg_rtx (SImode); emit_move_insn (dst, x); end = gen_reg_rtx (SImode); - emit_insn (gen_addsi3 (end, dst, operands[1] /* the length */)); + if (TARGET_LOOPS && optimize) + x = force_reg (SImode, operands[1] /* the length */); + else + x = operands[1]; + emit_insn (gen_addsi3 (end, dst, x)); switch (align) { case 1: -- 2.20.1