If mem_addr points to a memory region with less than whole vector size
bytes of accessible memory and k is a mask that would prevent reading
the inaccessible bytes from mem_addr, add UNSPEC_MASKLOAD to prevent
it to be transformed to vpblendd.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready to push to master.

gcc/ChangeLog:

        PR target/110309
        * config/i386/sse.md (maskload<mode><avx512fmaskmodelower>):
        Refine pattern with UNSPEC_MASKLOAD.
        (maskload<mode><avx512fmaskmodelower>): Ditto.
        (*<avx512>_load<mode>_mask): Extend mode iterator to
        VI12HFBF_AVX512VL.
        (*<avx512>_load<mode>): Ditto.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/pr110309.c: New test.
---
 gcc/config/i386/sse.md                   | 32 +++++++++++++-----------
 gcc/testsuite/gcc.target/i386/pr110309.c | 10 ++++++++
 2 files changed, 28 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110309.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 87570357db6..4d1f7ac8d7e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1465,12 +1465,12 @@ (define_expand "<avx512>_load<mode>_mask"
 })
 
 (define_insn "*<avx512>_load<mode>_mask"
-  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
-       (vec_merge:VI12_AVX512VL
-         (unspec:VI12_AVX512VL
-           [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
+  [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v")
+       (vec_merge:VI12HFBF_AVX512VL
+         (unspec:VI12HFBF_AVX512VL
+           [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")]
            UNSPEC_MASKLOAD)
-         (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
+         (match_operand:VI12HFBF_AVX512VL 2 "nonimm_or_0_operand" "0C")
          (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
   "TARGET_AVX512BW"
   "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
@@ -1479,9 +1479,9 @@ (define_insn "*<avx512>_load<mode>_mask"
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn_and_split "*<avx512>_load<mode>"
-  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
-       (unspec:VI12_AVX512VL
-         [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
+  [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v")
+       (unspec:VI12HFBF_AVX512VL
+         [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")]
          UNSPEC_MASKLOAD))]
   "TARGET_AVX512BW"
   "#"
@@ -26883,17 +26883,21 @@ (define_expand "maskload<mode><sseintvecmodelower>"
   "TARGET_AVX")
 
 (define_expand "maskload<mode><avx512fmaskmodelower>"
-  [(set (match_operand:V48H_AVX512VL 0 "register_operand")
-       (vec_merge:V48H_AVX512VL
-         (match_operand:V48H_AVX512VL 1 "memory_operand")
+  [(set (match_operand:V48_AVX512VL 0 "register_operand")
+       (vec_merge:V48_AVX512VL
+         (unspec:V48_AVX512VL
+           [(match_operand:V48_AVX512VL 1 "memory_operand")]
+           UNSPEC_MASKLOAD)
          (match_dup 0)
          (match_operand:<avx512fmaskmode> 2 "register_operand")))]
   "TARGET_AVX512F")
 
 (define_expand "maskload<mode><avx512fmaskmodelower>"
-  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
-       (vec_merge:VI12_AVX512VL
-         (match_operand:VI12_AVX512VL 1 "memory_operand")
+  [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand")
+       (vec_merge:VI12HFBF_AVX512VL
+         (unspec:VI12HFBF_AVX512VL
+           [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")]
+           UNSPEC_MASKLOAD)
          (match_dup 0)
          (match_operand:<avx512fmaskmode> 2 "register_operand")))]
   "TARGET_AVX512BW")
diff --git a/gcc/testsuite/gcc.target/i386/pr110309.c 
b/gcc/testsuite/gcc.target/i386/pr110309.c
new file mode 100644
index 00000000000..f6e9e9c3c61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110309.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 --param vect-partial-vector-usage=1 -march=znver4 
-mprefer-vector-width=256" } */
+/* { dg-final { scan-assembler-not {(?n)vpblendd.*ymm} } } */
+
+
+void foo (int * __restrict a, int *b)
+{
+  for (int i = 0; i < 6; ++i)
+    a[i] = b[i] + 42;
+}
-- 
2.39.1.388.g2fc9e9ca3c

Reply via email to