At the rtl level, we cannot guarantee that the maskstore is not optimized to other full-memory accesses, as the current implementations are equivalent in terms of pattern, to solve this potential problem, this patch refines the pattern of the maskstore and the intrinsics with unspec.
One thing I'm not sure is VCOND_EXPR, should VCOND_EXPR also expect fault suppression for masked-out elements? Currently we're still using vec_merge for both AVX2 and AVX512 target. ------------------------ Similar like r14-2070-gc79476da46728e If mem_addr points to a memory region with less than whole vector size bytes of accessible memory and k is a mask that would prevent reading the inaccessible bytes from mem_addr, add UNSPEC_MASKMOV to prevent it to be transformed to any other whole memory access instructions. Bootstrapped and regtested on x86_64-pc-linu-gnu{-m32,}. Ready to push to trunk. gcc/ChangeLog: PR rtl-optimization/110237 * config/i386/sse.md (<avx512>_store<mode>_mask): Refine with UNSPEC_MASKMOV. (maskstore<mode><avx512fmaskmodelower): Ditto. (*<avx512>_store<mode>_mask): New define_insn, it's renamed from original <avx512>_store<mode>_mask. --- gcc/config/i386/sse.md | 69 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 12 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 3b50c7117f8..812cfca4b92 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1608,7 +1608,7 @@ (define_insn "<avx512>_blendm<mode>" (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "<avx512>_store<mode>_mask" +(define_insn "*<avx512>_store<mode>_mask" [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m") (vec_merge:V48_AVX512VL (match_operand:V48_AVX512VL 1 "register_operand" "v") @@ -1636,7 +1636,7 @@ (define_insn "<avx512>_store<mode>_mask" (set_attr "memory" "store") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "<avx512>_store<mode>_mask" +(define_insn "*<avx512>_store<mode>_mask" [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand" "=m") (vec_merge:VI12HFBF_AVX512VL (match_operand:VI12HFBF_AVX512VL 1 "register_operand" "v") @@ -27008,21 +27008,66 @@ (define_expand "maskstore<mode><sseintvecmodelower>" "TARGET_AVX") (define_expand "maskstore<mode><avx512fmaskmodelower>" - [(set (match_operand:V48H_AVX512VL 0 "memory_operand") - (vec_merge:V48H_AVX512VL - (match_operand:V48H_AVX512VL 1 "register_operand") - (match_dup 0) - (match_operand:<avx512fmaskmode> 2 "register_operand")))] + [(set (match_operand:V48_AVX512VL 0 "memory_operand") + (unspec:V48_AVX512VL + [(match_operand:V48_AVX512VL 1 "register_operand") + (match_dup 0) + (match_operand:<avx512fmaskmode> 2 "register_operand")] + UNSPEC_MASKMOV))] "TARGET_AVX512F") (define_expand "maskstore<mode><avx512fmaskmodelower>" - [(set (match_operand:VI12_AVX512VL 0 "memory_operand") - (vec_merge:VI12_AVX512VL - (match_operand:VI12_AVX512VL 1 "register_operand") - (match_dup 0) - (match_operand:<avx512fmaskmode> 2 "register_operand")))] + [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand") + (unspec:VI12HFBF_AVX512VL + [(match_operand:VI12HFBF_AVX512VL 1 "register_operand") + (match_dup 0) + (match_operand:<avx512fmaskmode> 2 "register_operand")] + UNSPEC_MASKMOV))] "TARGET_AVX512BW") +(define_insn "<avx512>_store<mode>_mask" + [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m") + (unspec:V48_AVX512VL + [(match_operand:V48_AVX512VL 1 "register_operand" "v") + (match_dup 0) + (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")] + UNSPEC_MASKMOV))] + "TARGET_AVX512F" +{ + if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode))) + { + if (misaligned_operand (operands[0], <MODE>mode)) + return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; + else + return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; + } + else + { + if (misaligned_operand (operands[0], <MODE>mode)) + return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; + else + return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; + } +} + [(set_attr "type" "ssemov") + (set_attr "prefix" "evex") + (set_attr "memory" "store") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "<avx512>_store<mode>_mask" + [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand" "=m") + (unspec:VI12HFBF_AVX512VL + [(match_operand:VI12HFBF_AVX512VL 1 "register_operand" "v") + (match_dup 0) + (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")] + UNSPEC_MASKMOV))] + "TARGET_AVX512BW" + "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "evex") + (set_attr "memory" "store") + (set_attr "mode" "<sseinsnmode>")]) + (define_expand "cbranch<mode>4" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:VI48_AVX 1 "register_operand") -- 2.39.1.388.g2fc9e9ca3c