At the rtl level, we cannot guarantee that the maskstore is not optimized
to other full-memory accesses, as the current implementations are equivalent
in terms of pattern, to solve this potential problem, this patch refines
the pattern of the maskstore and the intrinsics with unspec.

One thing I'm not sure is VCOND_EXPR, should VCOND_EXPR also expect
fault suppression for masked-out elements?

Currently we're still using vec_merge for both AVX2 and AVX512 target.

------------------------
Similar like r14-2070-gc79476da46728e

If mem_addr points to a memory region with less than whole vector size
bytes of accessible memory and k is a mask that would prevent reading
the inaccessible bytes from mem_addr, add UNSPEC_MASKMOV to prevent
it to be transformed to any other whole memory access instructions.

Bootstrapped and regtested on x86_64-pc-linu-gnu{-m32,}.
Ready to push to trunk.

gcc/ChangeLog:

        PR rtl-optimization/110237
        * config/i386/sse.md (<avx512>_store<mode>_mask): Refine with
        UNSPEC_MASKMOV.
        (maskstore<mode><avx512fmaskmodelower): Ditto.
        (*<avx512>_store<mode>_mask): New define_insn, it's renamed
        from original <avx512>_store<mode>_mask.
---
 gcc/config/i386/sse.md | 69 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 57 insertions(+), 12 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 3b50c7117f8..812cfca4b92 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1608,7 +1608,7 @@ (define_insn "<avx512>_blendm<mode>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<avx512>_store<mode>_mask"
+(define_insn "*<avx512>_store<mode>_mask"
   [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
        (vec_merge:V48_AVX512VL
          (match_operand:V48_AVX512VL 1 "register_operand" "v")
@@ -1636,7 +1636,7 @@ (define_insn "<avx512>_store<mode>_mask"
    (set_attr "memory" "store")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<avx512>_store<mode>_mask"
+(define_insn "*<avx512>_store<mode>_mask"
   [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand" "=m")
        (vec_merge:VI12HFBF_AVX512VL
          (match_operand:VI12HFBF_AVX512VL 1 "register_operand" "v")
@@ -27008,21 +27008,66 @@ (define_expand "maskstore<mode><sseintvecmodelower>"
   "TARGET_AVX")
 
 (define_expand "maskstore<mode><avx512fmaskmodelower>"
-  [(set (match_operand:V48H_AVX512VL 0 "memory_operand")
-       (vec_merge:V48H_AVX512VL
-         (match_operand:V48H_AVX512VL 1 "register_operand")
-         (match_dup 0)
-         (match_operand:<avx512fmaskmode> 2 "register_operand")))]
+  [(set (match_operand:V48_AVX512VL 0 "memory_operand")
+       (unspec:V48_AVX512VL
+         [(match_operand:V48_AVX512VL 1 "register_operand")
+          (match_dup 0)
+          (match_operand:<avx512fmaskmode> 2 "register_operand")]
+         UNSPEC_MASKMOV))]
   "TARGET_AVX512F")
 
 (define_expand "maskstore<mode><avx512fmaskmodelower>"
-  [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
-       (vec_merge:VI12_AVX512VL
-         (match_operand:VI12_AVX512VL 1 "register_operand")
-         (match_dup 0)
-         (match_operand:<avx512fmaskmode> 2 "register_operand")))]
+  [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand")
+       (unspec:VI12HFBF_AVX512VL
+         [(match_operand:VI12HFBF_AVX512VL 1 "register_operand")
+          (match_dup 0)
+          (match_operand:<avx512fmaskmode> 2 "register_operand")]
+         UNSPEC_MASKMOV))]
   "TARGET_AVX512BW")
 
+(define_insn "<avx512>_store<mode>_mask"
+  [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
+       (unspec:V48_AVX512VL
+         [(match_operand:V48_AVX512VL 1 "register_operand" "v")
+          (match_dup 0)
+          (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
+         UNSPEC_MASKMOV))]
+  "TARGET_AVX512F"
+{
+  if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
+    {
+      if (misaligned_operand (operands[0], <MODE>mode))
+       return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+      else
+       return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+    }
+  else
+    {
+      if (misaligned_operand (operands[0], <MODE>mode))
+       return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+      else
+       return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+    }
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "memory" "store")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<avx512>_store<mode>_mask"
+  [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand" "=m")
+       (unspec:VI12HFBF_AVX512VL
+         [(match_operand:VI12HFBF_AVX512VL 1 "register_operand" "v")
+          (match_dup 0)
+          (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
+          UNSPEC_MASKMOV))]
+  "TARGET_AVX512BW"
+  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "memory" "store")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_expand "cbranch<mode>4"
   [(set (reg:CC FLAGS_REG)
        (compare:CC (match_operand:VI48_AVX 1 "register_operand")
-- 
2.39.1.388.g2fc9e9ca3c

Reply via email to