Hi! The same SCCVN change also introduced: FAIL: gcc.target/i386/avx512vl-vcvttpd2dq-2.c execution test FAIL: gcc.target/i386/avx512vl-vcvttpd2udq-2.c execution test failures, the reason in the generic code is similar, more constants propagated to builtins, but the actual bug is very different. The 128-bit instructions like vcvtpd2dqx when using non-{z} masking don't behave like their RTL pattern describes. The instructions only use low 2 bits of the mask register and the upper 2 elements of the 128-bit destination register are always cleared regardless of what the bit 2 and 3 in the mask register say and what is in the destination before, so it is incorrect to use: (set (match_operand:V4SI 0 "register_operand" "=v") (vec_merge:V4SI (vec_concat:V4SI (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm")) (const_vector:V2SI [ (const_int 0 [0]) (const_int 0 [0]) ])) (match_operand:V4SI 2 "nonimm_or_0_operand" "0C") (match_operand:QI 3 "register_operand" "Yk"))) which is correct for the low 2 elements, but for the upper 2 elements says that if the mask bit 2 (or 3) is set, then the element will be zero (correct), but if it is clear, then it will be operands[2] element (correct if it is const0_operand, but if it is some nonimmediate and doesn't have zeros there, it is incorrect). The RTL has been correct already for similar instructions, e.g. *float<floatunssuffix>v2div2sf2{,_mask{,_1}} or some narrowing movs, this patch just uses similar RTL for those. The _mask_1 patterns are added as PR69671 will trigger on these new patterns as well, for {z} masking RTL simplification can fold that second vec_merge operand into CONST0_RTX.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2019-07-11 Jakub Jelinek <ja...@redhat.com> PR target/91124 * config/i386/sse.md (sse2_cvtpd2dq<mask_name>): Change into ... (sse2_cvtpd2dq): ... this. Remove mask substitution macros. (sse2_cvtpd2dq_mask, sse2_cvtpd2dq_mask_1): New define_insns. (ufix_notruncv2dfv2si2<mask_name>): Change into ... (ufix_notruncv2dfv2si2): ... this. Remove mask substitution macros. (ufix_notruncv2dfv2si2_mask, ufix_notruncv2dfv2si2_mask_1): New define_insns. (ufix_truncv2dfv2si2<mask_name>): Change into ... (ufix_truncv2dfv2si2): ... this. Remove mask substitution macros. (ufix_truncv2dfv2si2_mask, ufix_truncv2dfv2si2_mask_1): New define_insns. (sse2_cvttpd2dq<mask_name>): Change into ... (sse2_cvttpd2dq): ... this. Remove mask substitution macros. (sse2_cvttpd2dq_mask, sse2_cvttpd2dq_mask_1): New define_insns. (*sse2_cvtpd2dq<mask_name>): Change into ... (*sse2_cvtpd2dq): ... this. Remove mask substitution macros. Add "C" constraint to const0_operand. (*sse2_cvtpd2dq_mask, *sse2_cvtpd2dq_mask_1): New define_insns. (sse2_cvtpd2ps_mask): Adjust expand to match *sse2_cvtpd2ps_mask changes. --- gcc/config/i386/sse.md.jj 2019-07-08 23:52:40.239757801 +0200 +++ gcc/config/i386/sse.md 2019-07-10 22:50:15.021381492 +0200 @@ -5927,16 +5927,16 @@ (define_insn "*avx_cvtpd2dq256_2" (set_attr "btver2_decode" "vector") (set_attr "mode" "OI")]) -(define_insn "sse2_cvtpd2dq<mask_name>" +(define_insn "sse2_cvtpd2dq" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_concat:V4SI (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")] UNSPEC_FIX_NOTRUNC) (const_vector:V2SI [(const_int 0) (const_int 0)])))] - "TARGET_SSE2 && <mask_avx512vl_condition>" + "TARGET_SSE2" { if (TARGET_AVX) - return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; + return "vcvtpd2dq{x}\t{%1, %0|%0, %1}"; else return "cvtpd2dq\t{%1, %0|%0, %1}"; } @@ -5949,6 +5949,38 @@ (define_insn "sse2_cvtpd2dq<mask_name>" (set_attr "athlon_decode" "vector") (set_attr "bdver1_decode" "double")]) +(define_insn "sse2_cvtpd2dq_mask" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_concat:V4SI + (vec_merge:V2SI + (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] + UNSPEC_FIX_NOTRUNC) + (vec_select:V2SI + (match_operand:V4SI 2 "nonimm_or_0_operand" "0C") + (parallel [(const_int 0) (const_int 1)])) + (match_operand:QI 3 "register_operand" "Yk")) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] + "TARGET_AVX512VL" + "vcvtpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "*sse2_cvtpd2dq_mask_1" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_concat:V4SI + (vec_merge:V2SI + (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] + UNSPEC_FIX_NOTRUNC) + (const_vector:V2SI [(const_int 0) (const_int 0)]) + (match_operand:QI 2 "register_operand" "Yk")) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] + "TARGET_AVX512VL" + "vcvtpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + ;; For ufix_notrunc* insn patterns (define_mode_attr pd2udqsuff [(V8DF "") (V4DF "{y}")]) @@ -5964,15 +5996,49 @@ (define_insn "ufix_notrunc<mode><si2dfmo (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "ufix_notruncv2dfv2si2<mask_name>" +(define_insn "ufix_notruncv2dfv2si2" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_concat:V4SI (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] - UNSPEC_UNSIGNED_FIX_NOTRUNC) + UNSPEC_UNSIGNED_FIX_NOTRUNC) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] + "TARGET_AVX512VL" + "vcvtpd2udq{x}\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "ufix_notruncv2dfv2si2_mask" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_concat:V4SI + (vec_merge:V2SI + (unspec:V2SI + [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] + UNSPEC_UNSIGNED_FIX_NOTRUNC) + (vec_select:V2SI + (match_operand:V4SI 2 "nonimm_or_0_operand" "0C") + (parallel [(const_int 0) (const_int 1)])) + (match_operand:QI 3 "register_operand" "Yk")) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] + "TARGET_AVX512VL" + "vcvtpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "*ufix_notruncv2dfv2si2_mask_1" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_concat:V4SI + (vec_merge:V2SI + (unspec:V2SI + [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] + UNSPEC_UNSIGNED_FIX_NOTRUNC) + (const_vector:V2SI [(const_int 0) (const_int 0)]) + (match_operand:QI 2 "register_operand" "Yk")) (const_vector:V2SI [(const_int 0) (const_int 0)])))] "TARGET_AVX512VL" - "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + "vcvtpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") (set_attr "mode" "TI")]) @@ -5987,13 +6053,43 @@ (define_insn "fix<fixunssuffix>_truncv8d (set_attr "prefix" "evex") (set_attr "mode" "OI")]) -(define_insn "ufix_truncv2dfv2si2<mask_name>" +(define_insn "ufix_truncv2dfv2si2" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_concat:V4SI (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) (const_vector:V2SI [(const_int 0) (const_int 0)])))] "TARGET_AVX512VL" - "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + "vcvttpd2udq{x}\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "ufix_truncv2dfv2si2_mask" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_concat:V4SI + (vec_merge:V2SI + (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) + (vec_select:V2SI + (match_operand:V4SI 2 "nonimm_or_0_operand" "0C") + (parallel [(const_int 0) (const_int 1)])) + (match_operand:QI 3 "register_operand" "Yk")) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] + "TARGET_AVX512VL" + "vcvttpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "*ufix_truncv2dfv2si2_mask_1" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_concat:V4SI + (vec_merge:V2SI + (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) + (const_vector:V2SI [(const_int 0) (const_int 0)]) + (match_operand:QI 2 "register_operand" "Yk")) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] + "TARGET_AVX512VL" + "vcvttpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") (set_attr "mode" "TI")]) @@ -6138,15 +6234,15 @@ (define_expand "avx_cvttpd2dq256_2" "TARGET_AVX" "operands[2] = CONST0_RTX (V4SImode);") -(define_insn "sse2_cvttpd2dq<mask_name>" +(define_insn "sse2_cvttpd2dq" [(set (match_operand:V4SI 0 "register_operand" "=v") (vec_concat:V4SI (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm")) (const_vector:V2SI [(const_int 0) (const_int 0)])))] - "TARGET_SSE2 && <mask_avx512vl_condition>" + "TARGET_SSE2" { if (TARGET_AVX) - return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"; + return "vcvttpd2dq{x}\t{%1, %0|%0, %1}"; else return "cvttpd2dq\t{%1, %0|%0, %1}"; } @@ -6157,6 +6253,36 @@ (define_insn "sse2_cvttpd2dq<mask_name>" (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) +(define_insn "sse2_cvttpd2dq_mask" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_concat:V4SI + (vec_merge:V2SI + (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) + (vec_select:V2SI + (match_operand:V4SI 2 "nonimm_or_0_operand" "0C") + (parallel [(const_int 0) (const_int 1)])) + (match_operand:QI 3 "register_operand" "Yk")) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] + "TARGET_AVX512VL" + "vcvttpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + +(define_insn "*sse2_cvttpd2dq_mask_1" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_concat:V4SI + (vec_merge:V2SI + (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) + (const_vector:V2SI [(const_int 0) (const_int 0)]) + (match_operand:QI 2 "register_operand" "Yk")) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] + "TARGET_AVX512VL" + "vcvttpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + (define_insn "sse2_cvtsd2ss<round_name>" [(set (match_operand:V4SF 0 "register_operand" "=x,x,v") (vec_merge:V4SF @@ -6276,26 +6402,28 @@ (define_expand "sse2_cvtpd2ps" (define_expand "sse2_cvtpd2ps_mask" [(set (match_operand:V4SF 0 "register_operand") - (vec_merge:V4SF - (vec_concat:V4SF + (vec_concat:V4SF + (vec_merge:V2SF (float_truncate:V2SF (match_operand:V2DF 1 "vector_operand")) - (match_dup 4)) - (match_operand:V4SF 2 "register_operand") - (match_operand:QI 3 "register_operand")))] + (vec_select:V2SF + (match_operand:V4SF 2 "nonimm_or_0_operand") + (parallel [(const_int 0) (const_int 1)])) + (match_operand:QI 3 "register_operand")) + (match_dup 4)))] "TARGET_SSE2" "operands[4] = CONST0_RTX (V2SFmode);") -(define_insn "*sse2_cvtpd2ps<mask_name>" +(define_insn "*sse2_cvtpd2ps" [(set (match_operand:V4SF 0 "register_operand" "=v") (vec_concat:V4SF (float_truncate:V2SF (match_operand:V2DF 1 "vector_operand" "vBm")) - (match_operand:V2SF 2 "const0_operand")))] - "TARGET_SSE2 && <mask_avx512vl_condition>" + (match_operand:V2SF 2 "const0_operand" "C")))] + "TARGET_SSE2" { if (TARGET_AVX) - return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"; + return "vcvtpd2ps{x}\t{%1, %0|%0, %1}"; else return "cvtpd2ps\t{%1, %0|%0, %1}"; } @@ -6307,6 +6435,38 @@ (define_insn "*sse2_cvtpd2ps<mask_name>" (set_attr "prefix" "maybe_vex") (set_attr "mode" "V4SF")]) +(define_insn "*sse2_cvtpd2ps_mask" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (vec_concat:V4SF + (vec_merge:V2SF + (float_truncate:V2SF + (match_operand:V2DF 1 "nonimmediate_operand" "vm")) + (vec_select:V2SF + (match_operand:V4SF 2 "nonimm_or_0_operand" "0C") + (parallel [(const_int 0) (const_int 1)])) + (match_operand:QI 3 "register_operand" "Yk")) + (match_operand:V2SF 4 "const0_operand" "C")))] + "TARGET_AVX512VL" + "vcvtpd2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "V4SF")]) + +(define_insn "*sse2_cvtpd2ps_mask_1" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (vec_concat:V4SF + (vec_merge:V2SF + (float_truncate:V2SF + (match_operand:V2DF 1 "nonimmediate_operand" "vm")) + (match_operand:V2SF 3 "const0_operand" "C") + (match_operand:QI 2 "register_operand" "Yk")) + (match_operand:V2SF 4 "const0_operand" "C")))] + "TARGET_AVX512VL" + "vcvtpd2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "V4SF")]) + ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern (define_mode_attr sf2dfmode [(V8DF "V8SF") (V4DF "V4SF")]) Jakub