https://gcc.gnu.org/g:aa29654b1128a572c97fcaba94095f493662a0db
commit r16-276-gaa29654b1128a572c97fcaba94095f493662a0db Author: Uros Bizjak <ubiz...@gmail.com> Date: Tue Apr 29 10:25:45 2025 +0200 i386: Allow string instructions from non-default address space [PR111657] MOVS instructions allow segment override of their source operand, e.g.: rep movsq %gs:(%rsi), (%rdi) where %rsi is the address of the source location (with %gs segment override) and %rdi is the address of the destination location. The testcase improves from (-O2 -mno-sse -mtune=generic): xorl %eax, %eax .L2: movl %eax, %edx addl $8, %eax movq %gs:m(%rdx), %rcx movq %rcx, (%rdi,%rdx) cmpl $240, %eax jb .L2 ret to: movl $m, %esi movl $30, %ecx rep movsq %gs:(%rsi), (%rdi) ret PR target/111657 gcc/ChangeLog: * config/i386/i386-expand.cc (alg_usable_p): Remove have_as bool argument and add dst_as and src_as address space arguments. Reject libcall algorithm with dst_as and src_as in the non-default address spaces. Reject rep_prefix_{1,4,8}_byte algorithms with dst_as in the non-default address space. (decide_alg): Remove have_as bool argument and add dst_as and src_as address space arguments. Update calls to alg_usable_p. (ix86_expand_set_or_cpymem): Update call to decide_alg. * config/i386/i386.md (strmov): Do not fail if operand[3] (source) is in the non-default address space. Expand with gen_strmov_singleop only when operand[1] (destination) is in the default address space. (*strmovdi_rex_1): Determine memory operands from insn pattern. Allow only when destination is in the default address space. Rewrite asm template to use explicit operands. (*strmovsi_1): Ditto. (*strmovhi_1): DItto. (*strmovqi_1): Ditto. (*rep_movdi_rex64): Ditto. (*rep_movsi): Ditto. (*rep_movqi): Ditto. (*strsetdi_rex_1): Determine memory operands from insn pattern. Allow only when destination is in the default address space. (*strsetsi_1): Ditto. (*strsethi_1): Ditto. (*strsetqi_1): Ditto. (*rep_stosdi_rex64): Ditto. (*rep_stossi): Ditto. (*rep_stosqi): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr111657-1.c: New test. Diff: --- gcc/config/i386/i386-expand.cc | 56 +++++++------- gcc/config/i386/i386.md | 116 ++++++++++++++++++++++------- gcc/testsuite/gcc.target/i386/pr111657-1.c | 11 +++ 3 files changed, 131 insertions(+), 52 deletions(-) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 388e65192e48..f1cc85b4531c 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -8907,31 +8907,33 @@ expand_set_or_cpymem_constant_prologue (rtx dst, rtx *srcp, rtx destreg, /* Return true if ALG can be used in current context. Assume we expand memset if MEMSET is true. */ static bool -alg_usable_p (enum stringop_alg alg, bool memset, bool have_as) +alg_usable_p (enum stringop_alg alg, bool memset, + addr_space_t dst_as, addr_space_t src_as) { if (alg == no_stringop) return false; /* It is not possible to use a library call if we have non-default address space. We can do better than the generic byte-at-a-time loop, used as a fallback. */ - if (alg == libcall && have_as) + if (alg == libcall && + !(ADDR_SPACE_GENERIC_P (dst_as) && ADDR_SPACE_GENERIC_P (src_as))) return false; if (alg == vector_loop) return TARGET_SSE || TARGET_AVX; /* Algorithms using the rep prefix want at least edi and ecx; additionally, memset wants eax and memcpy wants esi. Don't consider such algorithms if the user has appropriated those - registers for their own purposes, or if we have a non-default - address space, since some string insns cannot override the segment. */ + registers for their own purposes, or if we have the destination + in the non-default address space, since string insns cannot + override the destination segment. */ if (alg == rep_prefix_1_byte || alg == rep_prefix_4_byte || alg == rep_prefix_8_byte) { - if (have_as) - return false; if (fixed_regs[CX_REG] || fixed_regs[DI_REG] - || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG])) + || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]) + || !ADDR_SPACE_GENERIC_P (dst_as)) return false; } return true; @@ -8941,8 +8943,8 @@ alg_usable_p (enum stringop_alg alg, bool memset, bool have_as) static enum stringop_alg decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size, - bool memset, bool zero_memset, bool have_as, - int *dynamic_check, bool *noalign, bool recur) + bool memset, bool zero_memset, addr_space_t dst_as, + addr_space_t src_as, int *dynamic_check, bool *noalign, bool recur) { const struct stringop_algs *algs; bool optimize_for_speed; @@ -8974,7 +8976,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, for (i = 0; i < MAX_STRINGOP_ALGS; i++) { enum stringop_alg candidate = algs->size[i].alg; - bool usable = alg_usable_p (candidate, memset, have_as); + bool usable = alg_usable_p (candidate, memset, dst_as, src_as); any_alg_usable_p |= usable; if (candidate != libcall && candidate && usable) @@ -8990,17 +8992,17 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, /* If user specified the algorithm, honor it if possible. */ if (ix86_stringop_alg != no_stringop - && alg_usable_p (ix86_stringop_alg, memset, have_as)) + && alg_usable_p (ix86_stringop_alg, memset, dst_as, src_as)) return ix86_stringop_alg; /* rep; movq or rep; movl is the smallest variant. */ else if (!optimize_for_speed) { *noalign = true; if (!count || (count & 3) || (memset && !zero_memset)) - return alg_usable_p (rep_prefix_1_byte, memset, have_as) + return alg_usable_p (rep_prefix_1_byte, memset, dst_as, src_as) ? rep_prefix_1_byte : loop_1_byte; else - return alg_usable_p (rep_prefix_4_byte, memset, have_as) + return alg_usable_p (rep_prefix_4_byte, memset, dst_as, src_as) ? rep_prefix_4_byte : loop; } /* Very tiny blocks are best handled via the loop, REP is expensive to @@ -9024,7 +9026,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, enum stringop_alg candidate = algs->size[i].alg; if (candidate != libcall - && alg_usable_p (candidate, memset, have_as)) + && alg_usable_p (candidate, memset, dst_as, src_as)) { alg = candidate; alg_noalign = algs->size[i].noalign; @@ -9044,7 +9046,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, else if (!any_alg_usable_p) break; } - else if (alg_usable_p (candidate, memset, have_as) + else if (alg_usable_p (candidate, memset, dst_as, src_as) && !(TARGET_PREFER_KNOWN_REP_MOVSB_STOSB && candidate == rep_prefix_1_byte /* NB: If min_size != max_size, size is @@ -9066,7 +9068,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, choice in ix86_costs. */ if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY) && (algs->unknown_size == libcall - || !alg_usable_p (algs->unknown_size, memset, have_as))) + || !alg_usable_p (algs->unknown_size, memset, dst_as, src_as))) { enum stringop_alg alg; HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2; @@ -9081,8 +9083,9 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, *dynamic_check = 128; return loop_1_byte; } - alg = decide_alg (count, new_expected_size, min_size, max_size, memset, - zero_memset, have_as, dynamic_check, noalign, true); + alg = decide_alg (count, new_expected_size, min_size, max_size, + memset, zero_memset, dst_as, src_as, + dynamic_check, noalign, true); gcc_assert (*dynamic_check == -1); if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) *dynamic_check = max; @@ -9094,7 +9097,11 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, /* Try to use some reasonable fallback algorithm. Note that for non-default address spaces we default to a loop instead of a libcall. */ - return (alg_usable_p (algs->unknown_size, memset, have_as) + + bool have_as = !(ADDR_SPACE_GENERIC_P (dst_as) + && ADDR_SPACE_GENERIC_P (src_as)); + + return (alg_usable_p (algs->unknown_size, memset, dst_as, src_as) ? algs->unknown_size : have_as ? loop : libcall); } @@ -9320,7 +9327,7 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, unsigned HOST_WIDE_INT max_size = -1; unsigned HOST_WIDE_INT probable_max_size = -1; bool misaligned_prologue_used = false; - bool have_as; + addr_space_t dst_as, src_as = ADDR_SPACE_GENERIC; if (CONST_INT_P (align_exp)) align = INTVAL (align_exp); @@ -9358,16 +9365,15 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, if (count > (HOST_WIDE_INT_1U << 30)) return false; - have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst)); + dst_as = MEM_ADDR_SPACE (dst); if (!issetmem) - have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)); + src_as = MEM_ADDR_SPACE (src); /* Step 0: Decide on preferred algorithm, desired alignment and size of chunks to be copied by main loop. */ alg = decide_alg (count, expected_size, min_size, probable_max_size, - issetmem, - issetmem && val_exp == const0_rtx, have_as, - &dynamic_check, &noalign, false); + issetmem, issetmem && val_exp == const0_rtx, + dst_as, src_as, &dynamic_check, &noalign, false); if (dump_file) fprintf (dump_file, "Selected stringop expansion strategy: %s\n", diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e170da3b0e64..962e7ab92aa8 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -25587,10 +25587,6 @@ (clobber (reg:CC FLAGS_REG))])] "" { - /* Can't use this for non-default address spaces. */ - if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3]))) - FAIL; - int piece_size = GET_MODE_SIZE (GET_MODE (operands[1])); /* If .md ever supports :P for Pmode, these can be directly @@ -25598,9 +25594,12 @@ operands[5] = plus_constant (Pmode, operands[0], piece_size); operands[6] = plus_constant (Pmode, operands[2], piece_size); - /* Can't use this if the user has appropriated esi or edi. */ + /* Can't use this if the user has appropriated esi or edi, + * or if we have the destination in the non-default address space, + * since string insns cannot override the destination segment. */ if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) - && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])) + && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]) + && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[1]))) { emit_insn (gen_strmov_singleop (operands[0], operands[1], operands[2], operands[3], @@ -25635,8 +25634,16 @@ (const_int 8)))] "TARGET_64BIT && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^movsq" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 0); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^movsq\t{%1, %0|%0, %1}"; +} [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "DI")]) @@ -25651,8 +25658,16 @@ (plus:P (match_dup 3) (const_int 4)))] "!(fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^movs{l|d}" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 0); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^movs{l|d}\t{%1, %0|%0, %1}"; +} [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "SI")]) @@ -25667,8 +25682,16 @@ (plus:P (match_dup 3) (const_int 2)))] "!(fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^movsw" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 0); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^movsw\t{%1, %0|%0, %1}"; +} [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "HI")]) @@ -25683,8 +25706,16 @@ (plus:P (match_dup 3) (const_int 1)))] "!(fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^movsb" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 0); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^movsb\t{%1, %0|%0, %1}"; +} [(set_attr "type" "str") (set_attr "memory" "both") (set (attr "prefix_rex") @@ -25723,8 +25754,16 @@ (use (match_dup 5))] "TARGET_64BIT && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^rep{%;} movsq" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 3))))" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 3); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^rep{%;} movsq\t{%1, %0|%0, %1}"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -25743,8 +25782,16 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^rep{%;} movs{l|d}" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 3))))" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 3); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^rep{%;} movs{l|d}\t{%1, %0|%0, %1}"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -25761,8 +25808,16 @@ (mem:BLK (match_dup 4))) (use (match_dup 5))] "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" - "%^rep{%;} movsb" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 3))))" +{ + rtx exp = XVECEXP (PATTERN (insn), 0, 3); + + operands[0] = SET_DEST (exp); + operands[1] = SET_SRC (exp); + + return "%^rep{%;} movsb\t{%1, %0|%0, %1}"; +} [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") @@ -25844,7 +25899,8 @@ (unspec [(const_int 0)] UNSPEC_STOS)] "TARGET_64BIT && !(fixed_regs[AX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" "%^stosq" [(set_attr "type" "str") (set_attr "memory" "store") @@ -25858,7 +25914,8 @@ (const_int 4))) (unspec [(const_int 0)] UNSPEC_STOS)] "!(fixed_regs[AX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" "%^stos{l|d}" [(set_attr "type" "str") (set_attr "memory" "store") @@ -25872,7 +25929,8 @@ (const_int 2))) (unspec [(const_int 0)] UNSPEC_STOS)] "!(fixed_regs[AX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" "%^stosw" [(set_attr "type" "str") (set_attr "memory" "store") @@ -25886,7 +25944,8 @@ (const_int 1))) (unspec [(const_int 0)] UNSPEC_STOS)] "!(fixed_regs[AX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))" "%^stosb" [(set_attr "type" "str") (set_attr "memory" "store") @@ -25922,7 +25981,8 @@ (use (match_dup 4))] "TARGET_64BIT && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))" "%^rep{%;} stosq" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -25940,7 +26000,8 @@ (use (match_operand:SI 2 "register_operand" "a")) (use (match_dup 4))] "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))" "%^rep{%;} stos{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -25957,7 +26018,8 @@ (use (match_operand:QI 2 "register_operand" "a")) (use (match_dup 4))] "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) - && ix86_check_no_addr_space (insn)" + && ADDR_SPACE_GENERIC_P + (MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))" "%^rep{%;} stosb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") diff --git a/gcc/testsuite/gcc.target/i386/pr111657-1.c b/gcc/testsuite/gcc.target/i386/pr111657-1.c new file mode 100644 index 000000000000..69117b72cdf1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr111657-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mno-sse -mtune=generic -masm=att" } */ + +typedef unsigned long uword __attribute__ ((mode (word))); + +struct a { uword arr[30]; }; + +__seg_gs struct a m; +void bar (struct a *dst) { *dst = m; } + +/* { dg-final { scan-assembler "rep\[; \t\]+movs(l|q)\[ \t\]+%gs:" } } */