XMM broadcast instructions broadcast value from general reg to all elements of the vector. This is not allowed for TARGET_MMX_WITH_SSE, where it is expected that bits outside lower 64bits load or retain zero value. Following testcases expect broadcast, and are thus invalid:
FAIL: gcc.target/i386/sse2-mmx-18b.c scan-assembler-not movd FAIL: gcc.target/i386/sse2-mmx-18b.c scan-assembler-times pbroadcastd 1 FAIL: gcc.target/i386/sse2-mmx-19b.c scan-assembler-not movd FAIL: gcc.target/i386/sse2-mmx-19b.c scan-assembler-times pbroadcastw 1 FAIL: gcc.target/i386/sse2-mmx-19d.c scan-assembler-times pbroadcastw 1 FAIL: gcc.target/i386/sse2-mmx-19e.c scan-assembler-times pbroadcastw 1 These testcases will be fixed or removed entirely. (The patch is prerequisite to implement support for generic v2sf/v2si/v4hi shuffles). 2020-05-24 Uroš Bizjak <ubiz...@gmail.com> gcc/ChangeLog: * config/i386/mmx.md (*vec_dupv2sf): Redefine as define_insn. (mmx_pshufw_1): Change Yv constraint to xYw. Correct type attribute. (*vec_dupv4hi): Redefine as define_insn. Remove alternative with general register input. (*vec_dupv2si): Ditto. Uros.
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 5deef683b0b..b5564711aa4 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -947,27 +947,22 @@ (set_attr "prefix_extra" "1") (set_attr "mode" "V2SF")]) -(define_insn_and_split "*vec_dupv2sf" +(define_insn "*vec_dupv2sf" [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv") (vec_duplicate:V2SF (match_operand:SF 1 "register_operand" "0,0,Yv")))] "TARGET_MMX || TARGET_MMX_WITH_SSE" "@ punpckldq\t%0, %0 - # - #" - "TARGET_SSE && reload_completed - && SSE_REGNO_P (REGNO (operands[0]))" - [(set (match_dup 0) - (vec_duplicate:V4SF (match_dup 1)))] -{ - operands[0] = lowpart_subreg (V4SFmode, operands[0], - GET_MODE (operands[0])); -} - [(set_attr "isa" "*,sse_noavx,avx") + shufps\t{$0xe0, %0, %0|%0, %0, 0xe0} + %vmovsldup\t{%1, %0|%0, %1}" + [(set_attr "isa" "*,sse_noavx,sse3") (set_attr "mmx_isa" "native,*,*") - (set_attr "type" "mmxcvt,ssemov,ssemov") - (set_attr "mode" "DI,TI,TI")]) + (set_attr "type" "mmxcvt,sseshuf1,sse") + (set_attr "length_immediate" "*,1,*") + (set_attr "prefix_rep" "*,*,1") + (set_attr "prefix" "*,orig,maybe_vex") + (set_attr "mode" "DI,V4SF,V4SF")]) (define_insn "*mmx_concatv2sf" [(set (match_operand:V2SF 0 "register_operand" "=y,y") @@ -1960,9 +1955,9 @@ }) (define_insn "mmx_pshufw_1" - [(set (match_operand:V4HI 0 "register_operand" "=y,Yv") + [(set (match_operand:V4HI 0 "register_operand" "=y,xYw") (vec_select:V4HI - (match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yv") + (match_operand:V4HI 1 "register_mmxmem_operand" "ym,xYw") (parallel [(match_operand 2 "const_0_to_3_operand") (match_operand 3 "const_0_to_3_operand") (match_operand 4 "const_0_to_3_operand") @@ -1989,7 +1984,7 @@ } [(set_attr "isa" "*,sse2") (set_attr "mmx_isa" "native,*") - (set_attr "type" "mmxcvt,sselog") + (set_attr "type" "mmxcvt,sselog1") (set_attr "length_immediate" "1") (set_attr "mode" "DI,TI")]) @@ -2004,77 +1999,37 @@ (set_attr "prefix_extra" "1") (set_attr "mode" "DI")]) -(define_insn_and_split "*vec_dupv4hi" - [(set (match_operand:V4HI 0 "register_operand" "=y,xYw,Yw") +(define_insn "*vec_dupv4hi" + [(set (match_operand:V4HI 0 "register_operand" "=y,xYw") (vec_duplicate:V4HI (truncate:HI - (match_operand:SI 1 "register_operand" "0,xYw,r"))))] + (match_operand:SI 1 "register_operand" "0,xYw"))))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A)" "@ pshufw\t{$0, %0, %0|%0, %0, 0} - # - #" - "TARGET_SSE2 && reload_completed - && SSE_REGNO_P (REGNO (operands[0]))" - [(const_int 0)] -{ - rtx op; - operands[0] = lowpart_subreg (V8HImode, operands[0], - GET_MODE (operands[0])); - if (TARGET_AVX2) - { - operands[1] = lowpart_subreg (HImode, operands[1], - GET_MODE (operands[1])); - op = gen_rtx_VEC_DUPLICATE (V8HImode, operands[1]); - } - else - { - operands[1] = lowpart_subreg (V8HImode, operands[1], - GET_MODE (operands[1])); - rtx mask = gen_rtx_PARALLEL (VOIDmode, - gen_rtvec (8, - GEN_INT (0), - GEN_INT (0), - GEN_INT (0), - GEN_INT (0), - GEN_INT (4), - GEN_INT (5), - GEN_INT (6), - GEN_INT (7))); - - op = gen_rtx_VEC_SELECT (V8HImode, operands[1], mask); - } - emit_insn (gen_rtx_SET (operands[0], op)); - DONE; -} - [(set_attr "mmx_isa" "native,sse,avx") - (set_attr "type" "mmxcvt,sselog1,ssemov") - (set_attr "length_immediate" "1,1,0") - (set_attr "mode" "DI,TI,TI")]) + %vpshuflw\t{$0, %1, %0|%0, %1, 0}" + [(set_attr "isa" "*,sse2") + (set_attr "mmx_isa" "native,*") + (set_attr "type" "mmxcvt,sselog1") + (set_attr "length_immediate" "1") + (set_attr "mode" "DI,TI")]) + -(define_insn_and_split "*vec_dupv2si" - [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv,Yw") +(define_insn "*vec_dupv2si" + [(set (match_operand:V2SI 0 "register_operand" "=y,Yv") (vec_duplicate:V2SI - (match_operand:SI 1 "register_operand" "0,0,Yv,r")))] + (match_operand:SI 1 "register_operand" "0,Yv")))] "TARGET_MMX || TARGET_MMX_WITH_SSE" "@ punpckldq\t%0, %0 - # - # - #" - "TARGET_SSE && reload_completed - && SSE_REGNO_P (REGNO (operands[0]))" - [(set (match_dup 0) - (vec_duplicate:V4SI (match_dup 1)))] -{ - operands[0] = lowpart_subreg (V4SImode, operands[0], - GET_MODE (operands[0])); -} - [(set_attr "isa" "*,sse_noavx,avx,avx") - (set_attr "mmx_isa" "native,*,*,*") - (set_attr "type" "mmxcvt,ssemov,ssemov,ssemov") - (set_attr "mode" "DI,TI,TI,TI")]) + %vpshufd\t{$0xe0, %1, %0|%0, %1, 0xe0}" + [(set_attr "isa" "*,sse2") + (set_attr "mmx_isa" "native,*") + (set_attr "type" "mmxcvt,sselog1") + (set_attr "prefix_data16" "*,1") + (set_attr "length_immediate" "*,1") + (set_attr "mode" "DI,TI")]) (define_insn "*mmx_concatv2si" [(set (match_operand:V2SI 0 "register_operand" "=y,y")