Hello,
This patch introduces missing AVX-512PF intrinsics and tests.
It also renames store/load intrinsics according to EAS.
gcc/
* config/i386/avx512fintrin.h (_mm512_loadu_si512): Rename.
(_mm512_storeu_si512): Ditto.
* config/i386/avx512pfintrin.h (_mm512_mask_prefetch_i32gather_pd): New.
(_mm512_mask_prefetch_i64gather_pd): Ditto.
(_mm512_prefetch_i32scatter_pd): Ditto.
(_mm512_mask_prefetch_i32scatter_pd): Ditto.
(_mm512_prefetch_i64scatter_pd): Ditto.
(_mm512_mask_prefetch_i64scatter_pd): Ditto.
(_mm512_mask_prefetch_i32gather_ps): Fix operand type.
(_mm512_mask_prefetch_i64gather_ps): Ditto.
(_mm512_prefetch_i32scatter_ps): Ditto.
(_mm512_mask_prefetch_i32scatter_ps): Ditto.
(_mm512_prefetch_i64scatter_ps): Ditto.
(_mm512_mask_prefetch_i64scatter_ps): Ditto.
* config/i386/i386-builtin-types.def: Define
VOID_FTYPE_QI_V8SI_PCINT64_INT_INT and
VOID_FTYPE_QI_V8DI_PCINT64_INT_INT.
* config/i386/i386.c (ix86_builtins): Define IX86_BUILTIN_GATHERPFQPD,
IX86_BUILTIN_GATHERPFDPD, IX86_BUILTIN_SCATTERPFDPD,
IX86_BUILTIN_SCATTERPFQPD.
(ix86_init_mmx_sse_builtins): Define __builtin_ia32_gatherpfdpd,
__builtin_ia32_gatherpfdps, __builtin_ia32_gatherpfqpd,
__builtin_ia32_gatherpfqps, __builtin_ia32_scatterpfdpd,
__builtin_ia32_scatterpfdps, __builtin_ia32_scatterpfqpd,
__builtin_ia32_scatterpfqps.
(ix86_expand_builtin): Expand new built-ins.
* config/i386/sse.md (avx512pf_gatherpf<mode>): Add SF suffix,
fix memory access data type.
(*avx512pf_gatherpf<mode>_mask): Ditto.
(*avx512pf_gatherpf<mode>): Ditto.
(avx512pf_scatterpf<mode>): Ditto.
(*avx512pf_scatterpf<mode>_mask): Ditto.
(*avx512pf_scatterpf<mode>): Ditto.
(avx512pf_gatherpf<mode>df): New.
(*avx512pf_gatherpf<mode>df_mask): Ditto.
(*avx512pf_gatherpf<mode>df): Ditto.
(avx512pf_scatterpf<mode>df): Ditto.
(*avx512pf_scatterpf<mode>df_mask): Ditto.
(*avx512pf_scatterpf<mode>df): Ditto.
testsuite/
* gcc.target/i386/avx512f-vmovdqu32-1.c: Fix intrinsic name.
* gcc.target/i386/avx512f-vmovdqu32-2.c: Ditto.
* gcc.target/i386/avx512f-vpcmpd-2.c: Ditto.
* gcc.target/i386/avx512f-vpcmpq-2.c: Ditto.
* gcc.target/i386/avx512f-vpcmpud-2.c: Ditto.
* gcc.target/i386/avx512f-vpcmpuq-2.c: Ditto.
* gcc.target/i386/avx512pf-vgatherpf0dpd-1.c: Ditto.
* gcc.target/i386/avx512pf-vgatherpf0qpd-1.c: Ditto.
* gcc.target/i386/avx512pf-vgatherpf1dpd-1.c: Ditto.
* gcc.target/i386/avx512pf-vgatherpf1qpd-1.c: Ditto.
* gcc.target/i386/avx512pf-vscatterpf0dpd-1.c: Ditto.
* gcc.target/i386/avx512pf-vscatterpf0qpd-1.c: Ditto.
* gcc.target/i386/avx512pf-vscatterpf1dpd-1.c: Ditto.
* gcc.target/i386/avx512pf-vscatterpf1qpd-1.c: Ditto.
* gcc.target/i386/sse-14.c: Add new built-ins, fix AVX-512ER
built-ins roudning immediate.
* gcc.target/i386/sse-22.c: Add new built-ins.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/avx-1.c: Ditto.
I have a doubts about changes to sse.md.
I've splitted existing (SF-only) patterns into 2: DF and SF.
As far as insn operands and final instruction have no such data
type discrimination I set this data type to (mem:..) part.
Having this (for SF):
(define_expand "avx512pf_scatterpf<mode>sf"
[(unspec
[(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
(mem:SF
...
instead of this:
(define_expand "avx512pf_scatterpf<mode>"
[(unspec
[(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
(mem:<ssescalarmode>
...
Not sure if this (DI/SI) mode for mem is needed. Moreover, not sure what
that data type represents.
Patch in the bottom. AVX* and SSE* tests pass.
Comments or it is ok for trunk?
--
Thanks, K
---
gcc/config/i386/avx512fintrin.h | 4 +-
gcc/config/i386/avx512pfintrin.h | 113 ++++++++++++--
gcc/config/i386/i386-builtin-types.def | 2 +
gcc/config/i386/i386.c | 37 ++++-
gcc/config/i386/sse.md | 171 +++++++++++++++++++--
gcc/testsuite/gcc.target/i386/avx-1.c | 4 +
.../gcc.target/i386/avx512f-vmovdqu32-1.c | 4 +-
.../gcc.target/i386/avx512f-vmovdqu32-2.c | 4 +-
gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c | 4 +-
gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c | 4 +-
gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c | 4 +-
gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c | 4 +-
.../gcc.target/i386/avx512pf-vgatherpf0dpd-1.c | 15 ++
.../gcc.target/i386/avx512pf-vgatherpf0qpd-1.c | 15 ++
.../gcc.target/i386/avx512pf-vgatherpf1dpd-1.c | 15 ++
.../gcc.target/i386/avx512pf-vgatherpf1qpd-1.c | 15 ++
.../gcc.target/i386/avx512pf-vscatterpf0dpd-1.c | 17 ++
.../gcc.target/i386/avx512pf-vscatterpf0qpd-1.c | 17 ++
.../gcc.target/i386/avx512pf-vscatterpf1dpd-1.c | 17 ++
.../gcc.target/i386/avx512pf-vscatterpf1qpd-1.c | 17 ++
gcc/testsuite/gcc.target/i386/sse-14.c | 40 ++---
gcc/testsuite/gcc.target/i386/sse-22.c | 5 +
gcc/testsuite/gcc.target/i386/sse-23.c | 4 +
23 files changed, 469 insertions(+), 63 deletions(-)
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 26f8cb6..4e94174 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -5570,7 +5570,7 @@ _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U,
__m512i __A)
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_loadu_si512 (void const *__P)
+_mm512_loadu_epi32 (void const *__P)
{
return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
(__v16si)
@@ -5599,7 +5599,7 @@ _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_storeu_si512 (void *__P, __m512i __A)
+_mm512_storeu_epi32 (void *__P, __m512i __A)
{
__builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
(__mmask16) -1);
diff --git a/gcc/config/i386/avx512pfintrin.h b/gcc/config/i386/avx512pfintrin.h
index b8c0110..bc7598e 100644
--- a/gcc/config/i386/avx512pfintrin.h
+++ b/gcc/config/i386/avx512pfintrin.h
@@ -48,74 +48,157 @@ typedef unsigned short __mmask16;
#ifdef __OPTIMIZE__
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32gather_pd (__m256i index, __mmask8 mask,
+ void *addr, int scale, int hint)
+{
+ __builtin_ia32_gatherpfdpd (mask, (__v8si) index, (long long const *) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i32gather_ps (__m512i index, __mmask16 mask,
- int const *addr, int scale, int hint)
+ void *addr, int scale, int hint)
+{
+ __builtin_ia32_gatherpfdps (mask, (__v16si) index, (int const *) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i64gather_pd (__m512i index, __mmask8 mask,
+ void *addr, int scale, int hint)
{
- __builtin_ia32_gatherpfdps (mask, (__v16si) index, addr, scale, hint);
+ __builtin_ia32_gatherpfqpd (mask, (__v8di) index, (long long const *) addr,
+ scale, hint);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i64gather_ps (__m512i index, __mmask8 mask,
- int const *addr, int scale, int hint)
+ void *addr, int scale, int hint)
{
- __builtin_ia32_gatherpfqps (mask, (__v8di) index, addr, scale, hint);
+ __builtin_ia32_gatherpfqps (mask, (__v8di) index, (int const *) addr,
+ scale, hint);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_prefetch_i32scatter_ps (int const *addr, __m512i index, int scale,
+_mm512_prefetch_i32scatter_pd (void *addr, __m256i index, int scale,
int hint)
{
- __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) index, addr,
scale,
- hint);
+ __builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) index,
+ (long long const *)addr, scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i32scatter_ps (void *addr, __m512i index, int scale,
+ int hint)
+{
+ __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) index, (int const
*) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32scatter_pd (void *addr, __mmask8 mask,
+ __m256i index, int scale, int hint)
+{
+ __builtin_ia32_scatterpfdpd (mask, (__v8si) index, (long long const *) addr,
+ scale, hint);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i32scatter_ps (int const *addr, __mmask16 mask,
+_mm512_mask_prefetch_i32scatter_ps (void *addr, __mmask16 mask,
__m512i index, int scale, int hint)
{
- __builtin_ia32_scatterpfdps (mask, (__v16si) index, addr, scale, hint);
+ __builtin_ia32_scatterpfdps (mask, (__v16si) index, (int const *) addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i64scatter_pd (void *addr, __m512i index, int scale,
+ int hint)
+{
+ __builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) index, (long long
const *) addr,
+ scale, hint);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_prefetch_i64scatter_ps (int const *addr, __m512i index, int scale,
+_mm512_prefetch_i64scatter_ps (void *addr, __m512i index, int scale,
int hint)
{
- __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) index, addr, scale,
- hint);
+ __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) index, (int const *)
addr,
+ scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i64scatter_pd (void *addr, __mmask16 mask,
+ __m512i index, int scale, int hint)
+{
+ __builtin_ia32_scatterpfqpd (mask, (__v8di) index, (long long const *) addr,
+ scale, hint);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i64scatter_ps (int const *addr, __mmask16 mask,
+_mm512_mask_prefetch_i64scatter_ps (void *addr, __mmask16 mask,
__m512i index, int scale, int hint)
{
- __builtin_ia32_scatterpfqps (mask, (__v8di) index, addr, scale, hint);
+ __builtin_ia32_scatterpfqps (mask, (__v8di) index, (int const *) addr,
+ scale, hint);
}
+
#else
+#define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \
+ __builtin_ia32_gatherpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX, \
+ (long long const *)ADDR, (int)SCALE, (int)HINT)
+
#define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
- __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX,
\
+ __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX, \
(int const *)ADDR, (int)SCALE, (int)HINT)
+#define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \
+ __builtin_ia32_gatherpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
+ (long long const *)ADDR, (int)SCALE, (int)HINT)
+
#define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
__builtin_ia32_gatherpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
(int const *)ADDR, (int)SCALE, (int)HINT)
+#define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT) \
+ __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX, \
+ (long long const *)ADDR, (int)SCALE, (int)HINT)
+
#define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT) \
__builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \
(int const *)ADDR, (int)SCALE, (int)HINT)
+#define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
+ __builtin_ia32_scatterpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX, \
+ (long long const *)ADDR, (int)SCALE, (int)HINT)
+
#define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
__builtin_ia32_scatterpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX, \
(int const *)ADDR, (int)SCALE, (int)HINT)
+#define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT) \
+ __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
+ (long long const *)ADDR, (int)SCALE, (int)HINT)
+
#define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT) \
__builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
(int const *)ADDR, (int)SCALE, (int)HINT)
+#define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
+ __builtin_ia32_scatterpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
+ (long long const *)ADDR, (int)SCALE, (int)HINT)
+
#define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
__builtin_ia32_scatterpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
(int const *)ADDR, (int)SCALE, (int)HINT)
diff --git a/gcc/config/i386/i386-builtin-types.def
b/gcc/config/i386/i386-builtin-types.def
index acf2f32..f3c658b 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -733,7 +733,9 @@ DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8SI, V8DI, INT)
DEF_FUNCTION_TYPE (VOID, PINT, QI, V8DI, V8SI, INT)
DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8DI, V8DI, INT)
+DEF_FUNCTION_TYPE (VOID, QI, V8SI, PCINT64, INT, INT)
DEF_FUNCTION_TYPE (VOID, HI, V16SI, PCINT, INT, INT)
+DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCINT64, INT, INT)
DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCINT, INT, INT)
DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, ROUND)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3cda147..547c1f1 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -28323,9 +28323,13 @@ enum ix86_builtins
IX86_BUILTIN_SCATTERSIV8DI,
/* AVX512PF */
+ IX86_BUILTIN_GATHERPFQPD,
IX86_BUILTIN_GATHERPFDPS,
+ IX86_BUILTIN_GATHERPFDPD,
IX86_BUILTIN_GATHERPFQPS,
+ IX86_BUILTIN_SCATTERPFDPD,
IX86_BUILTIN_SCATTERPFDPS,
+ IX86_BUILTIN_SCATTERPFQPD,
IX86_BUILTIN_SCATTERPFQPS,
/* AVX-512ER */
@@ -30855,15 +30859,27 @@ ix86_init_mmx_sse_builtins (void)
IX86_BUILTIN_SCATTERDIV8DI);
/* AVX512PF */
+ def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
+ VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
+ IX86_BUILTIN_GATHERPFDPD);
def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
IX86_BUILTIN_GATHERPFDPS);
+ def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
+ VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
+ IX86_BUILTIN_GATHERPFQPD);
def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
IX86_BUILTIN_GATHERPFQPS);
+ def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
+ VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
+ IX86_BUILTIN_SCATTERPFDPD);
def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
IX86_BUILTIN_SCATTERPFDPS);
+ def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
+ VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
+ IX86_BUILTIN_SCATTERPFQPD);
def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
IX86_BUILTIN_SCATTERPFQPS);
@@ -35509,17 +35525,30 @@ addcarryx:
case IX86_BUILTIN_SCATTERDIV8DI:
icode = CODE_FOR_avx512f_scatterdiv8di;
goto scatter_gen;
+
+ case IX86_BUILTIN_GATHERPFDPD:
+ icode = CODE_FOR_avx512pf_gatherpfv8sidf;
+ goto vec_prefetch_gen;
case IX86_BUILTIN_GATHERPFDPS:
- icode = CODE_FOR_avx512pf_gatherpfv16si;
+ icode = CODE_FOR_avx512pf_gatherpfv16sisf;
+ goto vec_prefetch_gen;
+ case IX86_BUILTIN_GATHERPFQPD:
+ icode = CODE_FOR_avx512pf_gatherpfv8didf;
goto vec_prefetch_gen;
case IX86_BUILTIN_GATHERPFQPS:
- icode = CODE_FOR_avx512pf_gatherpfv8di;
+ icode = CODE_FOR_avx512pf_gatherpfv8disf;
+ goto vec_prefetch_gen;
+ case IX86_BUILTIN_SCATTERPFDPD:
+ icode = CODE_FOR_avx512pf_scatterpfv8sidf;
goto vec_prefetch_gen;
case IX86_BUILTIN_SCATTERPFDPS:
- icode = CODE_FOR_avx512pf_scatterpfv16si;
+ icode = CODE_FOR_avx512pf_scatterpfv16sisf;
+ goto vec_prefetch_gen;
+ case IX86_BUILTIN_SCATTERPFQPD:
+ icode = CODE_FOR_avx512pf_scatterpfv8didf;
goto vec_prefetch_gen;
case IX86_BUILTIN_SCATTERPFQPS:
- icode = CODE_FOR_avx512pf_scatterpfv8di;
+ icode = CODE_FOR_avx512pf_scatterpfv8disf;
goto vec_prefetch_gen;
gather_gen:
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 31e94fe..419c33a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -417,6 +417,7 @@
[V32QI V16HI V8SI (V8DI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")])
(define_mode_iterator VI48_256 [V8SI V4DI])
(define_mode_iterator VI48_512 [V16SI V8DI])
+(define_mode_iterator VI4_256_8_512 [V8SI V8DI])
;; Int-float size matches
(define_mode_iterator VI4F_128 [V4SI V4SF])
@@ -12492,10 +12493,11 @@
(set_attr "btver2_decode" "vector,vector,vector,vector")
(set_attr "mode" "TI")])
-(define_expand "avx512pf_gatherpf<mode>"
+;; Packed float variants
+(define_expand "avx512pf_gatherpf<mode>sf"
[(unspec
[(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
- (mem:<ssescalarmode>
+ (mem:SF
(match_par_dup 5
[(match_operand 2 "vsib_address_operand")
(match_operand:VI48_512 1 "register_operand")
@@ -12509,10 +12511,10 @@
operands[3]), UNSPEC_VSIBADDR);
})
-(define_insn "*avx512pf_gatherpf<mode>_mask"
+(define_insn "*avx512pf_gatherpf<mode>sf_mask"
[(unspec
[(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
- (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
+ (match_operator:SF 5 "vsib_mem_operator"
[(unspec:P
[(match_operand:P 2 "vsib_address_operand" "Tv")
(match_operand:VI48_512 1 "register_operand" "v")
@@ -12536,10 +12538,10 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "*avx512pf_gatherpf<mode>"
+(define_insn "*avx512pf_gatherpf<mode>sf"
[(unspec
[(const_int -1)
- (match_operator:<ssescalarmode> 4 "vsib_mem_operator"
+ (match_operator:SF 4 "vsib_mem_operator"
[(unspec:P
[(match_operand:P 1 "vsib_address_operand" "Tv")
(match_operand:VI48_512 0 "register_operand" "v")
@@ -12563,10 +12565,83 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_expand "avx512pf_scatterpf<mode>"
+;; Packed double variants
+(define_expand "avx512pf_gatherpf<mode>df"
[(unspec
[(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
- (mem:<ssescalarmode>
+ (mem:DF
+ (match_par_dup 5
+ [(match_operand 2 "vsib_address_operand")
+ (match_operand:VI4_256_8_512 1 "register_operand")
+ (match_operand:SI 3 "const1248_operand")]))
+ (match_operand:SI 4 "const_0_to_1_operand")]
+ UNSPEC_GATHER_PREFETCH)]
+ "TARGET_AVX512PF"
+{
+ operands[5]
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
+ operands[3]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512pf_gatherpf<mode>df_mask"
+ [(unspec
+ [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
+ (match_operator:DF 5 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 2 "vsib_address_operand" "Tv")
+ (match_operand:VI4_256_8_512 1 "register_operand" "v")
+ (match_operand:SI 3 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (match_operand:SI 4 "const_0_to_1_operand" "n")]
+ UNSPEC_GATHER_PREFETCH)]
+ "TARGET_AVX512PF"
+{
+ switch (INTVAL (operands[4]))
+ {
+ case 0:
+ return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+ case 1:
+ return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "*avx512pf_gatherpf<mode>df"
+ [(unspec
+ [(const_int -1)
+ (match_operator:DF 4 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 1 "vsib_address_operand" "Tv")
+ (match_operand:VI4_256_8_512 0 "register_operand" "v")
+ (match_operand:SI 2 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (match_operand:SI 3 "const_0_to_1_operand" "n")]
+ UNSPEC_GATHER_PREFETCH)]
+ "TARGET_AVX512PF"
+{
+ switch (INTVAL (operands[3]))
+ {
+ case 0:
+ return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
+ case 1:
+ return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+;; Packed float variants
+(define_expand "avx512pf_scatterpf<mode>sf"
+ [(unspec
+ [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
+ (mem:SF
(match_par_dup 5
[(match_operand 2 "vsib_address_operand")
(match_operand:VI48_512 1 "register_operand")
@@ -12580,10 +12655,10 @@
operands[3]), UNSPEC_VSIBADDR);
})
-(define_insn "*avx512pf_scatterpf<mode>_mask"
+(define_insn "*avx512pf_scatterpf<mode>sf_mask"
[(unspec
[(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
- (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
+ (match_operator:SF 5 "vsib_mem_operator"
[(unspec:P
[(match_operand:P 2 "vsib_address_operand" "Tv")
(match_operand:VI48_512 1 "register_operand" "v")
@@ -12607,10 +12682,10 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "*avx512pf_scatterpf<mode>"
+(define_insn "*avx512pf_scatterpf<mode>sf"
[(unspec
[(const_int -1)
- (match_operator:<ssescalarmode> 4 "vsib_mem_operator"
+ (match_operator:SF 4 "vsib_mem_operator"
[(unspec:P
[(match_operand:P 1 "vsib_address_operand" "Tv")
(match_operand:VI48_512 0 "register_operand" "v")
@@ -12634,6 +12709,78 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
+;; Packed double variants
+(define_expand "avx512pf_scatterpf<mode>df"
+ [(unspec
+ [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
+ (mem:DF
+ (match_par_dup 5
+ [(match_operand 2 "vsib_address_operand")
+ (match_operand:VI4_256_8_512 1 "register_operand")
+ (match_operand:SI 3 "const1248_operand")]))
+ (match_operand:SI 4 "const_0_to_1_operand")]
+ UNSPEC_SCATTER_PREFETCH)]
+ "TARGET_AVX512PF"
+{
+ operands[5]
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
+ operands[3]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512pf_scatterpf<mode>df_mask"
+ [(unspec
+ [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
+ (match_operator:DF 5 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 2 "vsib_address_operand" "Tv")
+ (match_operand:VI4_256_8_512 1 "register_operand" "v")
+ (match_operand:SI 3 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (match_operand:SI 4 "const_0_to_1_operand" "n")]
+ UNSPEC_SCATTER_PREFETCH)]
+ "TARGET_AVX512PF"
+{
+ switch (INTVAL (operands[4]))
+ {
+ case 0:
+ return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+ case 1:
+ return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "*avx512pf_scatterpf<mode>df"
+ [(unspec
+ [(const_int -1)
+ (match_operator:DF 4 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 1 "vsib_address_operand" "Tv")
+ (match_operand:VI4_256_8_512 0 "register_operand" "v")
+ (match_operand:SI 2 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (match_operand:SI 3 "const_0_to_1_operand" "n")]
+ UNSPEC_SCATTER_PREFETCH)]
+ "TARGET_AVX512PF"
+{
+ switch (INTVAL (operands[3]))
+ {
+ case 0:
+ return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
+ case 1:
+ return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v")
(unspec:VF_512
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c
b/gcc/testsuite/gcc.target/i386/avx-1.c
index 12674ad..8fb6fb88 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -362,6 +362,10 @@
#define __builtin_ia32_gatherpfqps(A, B, C, D, E)
__builtin_ia32_gatherpfqps(A, B, C, 1, 1)
#define __builtin_ia32_scatterpfdps(A, B, C, D, E)
__builtin_ia32_scatterpfdps(A, B, C, 1, 1)
#define __builtin_ia32_scatterpfqps(A, B, C, D, E)
__builtin_ia32_scatterpfqps(A, B, C, 1, 1)
+#define __builtin_ia32_gatherpfdpd(A, B, C, D, E)
__builtin_ia32_gatherpfdpd(A, B, C, 1, 1)
+#define __builtin_ia32_gatherpfqpd(A, B, C, D, E)
__builtin_ia32_gatherpfqpd(A, B, C, 1, 1)
+#define __builtin_ia32_scatterpfdpd(A, B, C, D, E)
__builtin_ia32_scatterpfdpd(A, B, C, 1, 1)
+#define __builtin_ia32_scatterpfqpd(A, B, C, D, E)
__builtin_ia32_scatterpfqpd(A, B, C, 1, 1)
/* shaintrin.h */
#define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c
b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c
index 79dbf9d..66e358a 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-1.c
@@ -15,10 +15,10 @@ volatile __mmask16 m;
void extern
avx512f_test (void)
{
- x = _mm512_loadu_si512 (p);
+ x = _mm512_loadu_epi32 (p);
x = _mm512_mask_loadu_epi32 (x, m, p);
x = _mm512_maskz_loadu_epi32 (m, p);
- _mm512_storeu_si512 (p, x);
+ _mm512_storeu_epi32 (p, x);
_mm512_mask_storeu_epi32 (p, m, x);
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-2.c
b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-2.c
index f1ae73c..0333d31 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vmovdqu32-2.c
@@ -33,8 +33,8 @@ TEST (void)
}
#if AVX512F_LEN == 512
- res1.x = _mm512_loadu_si512 (s1.a);
- _mm512_storeu_si512 (res2.a, s2.x);
+ res1.x = _mm512_loadu_epi32 (s1.a);
+ _mm512_storeu_epi32 (res2.a, s2.x);
#endif
res3.x = INTRINSIC (_mask_loadu_epi32) (res3.x, mask, s1.a);
res4.x = INTRINSIC (_maskz_loadu_epi32) (mask, s1.a);
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c
b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c
index 600dfd2..c044f42 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c
@@ -17,8 +17,8 @@
{ \
dst_ref = ((rel) << i) | dst_ref; \
} \
- source1.x = _mm512_loadu_si512 (s1); \
- source2.x = _mm512_loadu_si512 (s2); \
+ source1.x = _mm512_loadu_epi32 (s1); \
+ source2.x = _mm512_loadu_epi32 (s2); \
dst1 = _mm512_cmp_epi32_mask (source1.x, source2.x, imm);\
dst2 = _mm512_mask_cmp_epi32_mask (mask, source1.x, source2.x, imm);\
if (dst_ref != dst1) abort(); \
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c
b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c
index 2a9ceb6..e3a90d8 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c
@@ -18,8 +18,8 @@ __mmask8 dst_ref;
{ \
dst_ref = ((rel) << i) | dst_ref; \
} \
- source1.x = _mm512_loadu_si512 (s1); \
- source2.x = _mm512_loadu_si512 (s2); \
+ source1.x = _mm512_loadu_epi32 (s1); \
+ source2.x = _mm512_loadu_epi32 (s2); \
dst1 = _mm512_cmp_epi64_mask (source1.x, source2.x, imm);\
dst2 = _mm512_mask_cmp_epi64_mask (mask, source1.x, source2.x, imm);\
if (dst_ref != dst1) abort(); \
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c
b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c
index c0bb978..a90baf9 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c
@@ -17,8 +17,8 @@
{ \
dst_ref = ((rel) << i) | dst_ref; \
} \
- source1.x = _mm512_loadu_si512 (s1); \
- source2.x = _mm512_loadu_si512 (s2); \
+ source1.x = _mm512_loadu_epi32 (s1); \
+ source2.x = _mm512_loadu_epi32 (s2); \
dst1 = _mm512_cmp_epu32_mask (source1.x, source2.x, imm);\
dst2 = _mm512_mask_cmp_epu32_mask (mask, source1.x, source2.x, imm);\
if (dst_ref != dst1) abort(); \
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c
b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c
index 3bd1b86..c49f5e4 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c
@@ -17,8 +17,8 @@
{ \
dst_ref = ((rel) << i) | dst_ref; \
} \
- source1.x = _mm512_loadu_si512 (s1); \
- source2.x = _mm512_loadu_si512 (s2); \
+ source1.x = _mm512_loadu_epi32 (s1); \
+ source2.x = _mm512_loadu_epi32 (s2); \
dst1 = _mm512_cmp_epu64_mask (source1.x, source2.x, imm);\
dst2 = _mm512_mask_cmp_epu64_mask (mask, source1.x, source2.x, imm);\
if (dst_ref != dst1) abort(); \
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dpd-1.c
b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dpd-1.c
new file mode 100644
index 0000000..1368b7a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dpd-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vgatherpf0dpd\[
\\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i idx;
+volatile __mmask8 m8;
+void *base;
+
+void extern
+avx512pf_test (void)
+{
+ _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, 0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c
b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c
new file mode 100644
index 0000000..61a81bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vgatherpf0qpd\[
\\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i idx;
+volatile __mmask8 m8;
+int *base;
+
+void extern
+avx512pf_test (void)
+{
+ _mm512_mask_prefetch_i64gather_pd (idx, m8, base, 8, 0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c
b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c
new file mode 100644
index 0000000..5bc7599
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vgatherpf1dpd\[
\\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i idx;
+volatile __mmask8 m8;
+int *base;
+
+void extern
+avx512pf_test (void)
+{
+ _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c
b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c
new file mode 100644
index 0000000..96610db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vgatherpf1qpd\[
\\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i idx;
+volatile __mmask8 m8;
+int *base;
+
+void extern
+avx512pf_test (void)
+{
+ _mm512_mask_prefetch_i64gather_pd (idx, m8, base, 8, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dpd-1.c
b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dpd-1.c
new file mode 100644
index 0000000..83c31cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dpd-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vscatterpf0dpd\[
\\t\]+\[^\n\]*%ymm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterpf0dpd\[
\\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i idx;
+volatile __mmask8 m8;
+void *base;
+
+void extern
+avx512pf_test (void)
+{
+ _mm512_prefetch_i32scatter_pd (base, idx, 8, 0);
+ _mm512_mask_prefetch_i32scatter_pd (base, m8, idx, 8, 0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qpd-1.c
b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qpd-1.c
new file mode 100644
index 0000000..31172f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qpd-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vscatterpf0qpd\[
\\t\]+\[^\n\]*%zmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterpf0qpd\[
\\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i idx;
+volatile __mmask8 m8;
+void *base;
+
+void extern
+avx512pf_test (void)
+{
+ _mm512_prefetch_i64scatter_pd (base, idx, 8, 0);
+ _mm512_mask_prefetch_i64scatter_pd (base, m8, idx, 8, 0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dpd-1.c
b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dpd-1.c
new file mode 100644
index 0000000..205505b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dpd-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vscatterpf1dpd\[
\\t\]+\[^\n\]*%ymm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterpf1dpd\[
\\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i idx;
+volatile __mmask8 m8;
+void *base;
+
+void extern
+avx512pf_test (void)
+{
+ _mm512_prefetch_i32scatter_pd (base, idx, 8, 1);
+ _mm512_mask_prefetch_i32scatter_pd (base, m8, idx, 8, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qpd-1.c
b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qpd-1.c
new file mode 100644
index 0000000..64d7dfa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qpd-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vscatterpf1qpd\[
\\t\]+\[^\n\]*%zmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterpf1qpd\[
\\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i idx;
+volatile __mmask8 m8;
+int *base;
+
+void extern
+avx512pf_test (void)
+{
+ _mm512_prefetch_i64scatter_pd (base, idx, 8, 1);
+ _mm512_mask_prefetch_i64scatter_pd (base, m8, idx, 8, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c
b/gcc/testsuite/gcc.target/i386/sse-14.c
index c5d8876..643eb99 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -523,26 +523,30 @@ test_3vx (_mm512_mask_prefetch_i32gather_ps, __m512i,
__mmask16, void const *, 1
test_3vx (_mm512_mask_prefetch_i32scatter_ps, void const *, __mmask16,
__m512i, 1, 1)
test_3vx (_mm512_mask_prefetch_i64gather_ps, __m512i, __mmask8, void const *,
1, 1)
test_3vx (_mm512_mask_prefetch_i64scatter_ps, void const *, __mmask8, __m512i,
1, 1)
+test_3vx (_mm512_mask_prefetch_i32gather_pd, __m256i, __mmask8, void const *,
1, 1)
+test_3vx (_mm512_mask_prefetch_i32scatter_pd, void const *, __mmask8, __m256i,
1, 1)
+test_3vx (_mm512_mask_prefetch_i64gather_pd, __m512i, __mmask8, void const *,
1, 1)
+test_3vx (_mm512_mask_prefetch_i64scatter_pd, void const *, __mmask8, __m512i,
1, 1)
/* avx512erintrin.h */
-test_1 (_mm512_exp2a23_round_pd, __m512d, __m512d, 1)
-test_1 (_mm512_exp2a23_round_ps, __m512, __m512, 1)
-test_1 (_mm512_rcp28_round_pd, __m512d, __m512d, 1)
-test_1 (_mm512_rcp28_round_ps, __m512, __m512, 1)
-test_1 (_mm512_rsqrt28_round_pd, __m512d, __m512d, 1)
-test_1 (_mm512_rsqrt28_round_ps, __m512, __m512, 1)
-test_2 (_mm512_maskz_exp2a23_round_pd, __m512d, __mmask8, __m512d, 1)
-test_2 (_mm512_maskz_exp2a23_round_ps, __m512, __mmask16, __m512, 1)
-test_2 (_mm512_maskz_rcp28_round_pd, __m512d, __mmask8, __m512d, 1)
-test_2 (_mm512_maskz_rcp28_round_ps, __m512, __mmask16, __m512, 1)
-test_2 (_mm512_maskz_rsqrt28_round_pd, __m512d, __mmask8, __m512d, 1)
-test_2 (_mm512_maskz_rsqrt28_round_ps, __m512, __mmask16, __m512, 1)
-test_3 (_mm512_mask_exp2a23_round_pd, __m512d, __m512d, __mmask8, __m512d, 1)
-test_3 (_mm512_mask_exp2a23_round_ps, __m512, __m512, __mmask16, __m512, 1)
-test_3 (_mm512_mask_rcp28_round_pd, __m512d, __m512d, __mmask8, __m512d, 1)
-test_3 (_mm512_mask_rcp28_round_ps, __m512, __m512, __mmask16, __m512, 1)
-test_3 (_mm512_mask_rsqrt28_round_pd, __m512d, __m512d, __mmask8, __m512d, 1)
-test_3 (_mm512_mask_rsqrt28_round_ps, __m512, __m512, __mmask16, __m512, 1)
+test_1 (_mm512_exp2a23_round_pd, __m512d, __m512d, 5)
+test_1 (_mm512_exp2a23_round_ps, __m512, __m512, 5)
+test_1 (_mm512_rcp28_round_pd, __m512d, __m512d, 5)
+test_1 (_mm512_rcp28_round_ps, __m512, __m512, 5)
+test_1 (_mm512_rsqrt28_round_pd, __m512d, __m512d, 5)
+test_1 (_mm512_rsqrt28_round_ps, __m512, __m512, 5)
+test_2 (_mm512_maskz_exp2a23_round_pd, __m512d, __mmask8, __m512d, 5)
+test_2 (_mm512_maskz_exp2a23_round_ps, __m512, __mmask16, __m512, 5)
+test_2 (_mm512_maskz_rcp28_round_pd, __m512d, __mmask8, __m512d, 5)
+test_2 (_mm512_maskz_rcp28_round_ps, __m512, __mmask16, __m512, 5)
+test_2 (_mm512_maskz_rsqrt28_round_pd, __m512d, __mmask8, __m512d, 5)
+test_2 (_mm512_maskz_rsqrt28_round_ps, __m512, __mmask16, __m512, 5)
+test_3 (_mm512_mask_exp2a23_round_pd, __m512d, __m512d, __mmask8, __m512d, 5)
+test_3 (_mm512_mask_exp2a23_round_ps, __m512, __m512, __mmask16, __m512, 5)
+test_3 (_mm512_mask_rcp28_round_pd, __m512d, __m512d, __mmask8, __m512d, 5)
+test_3 (_mm512_mask_rcp28_round_ps, __m512, __m512, __mmask16, __m512, 5)
+test_3 (_mm512_mask_rsqrt28_round_pd, __m512d, __m512d, __mmask8, __m512d, 5)
+test_3 (_mm512_mask_rsqrt28_round_ps, __m512, __m512, __mmask16, __m512, 5)
/* shaintrin.h */
test_2 (_mm_sha1rnds4_epu32, __m128i, __m128i, __m128i, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c
b/gcc/testsuite/gcc.target/i386/sse-22.c
index 630c952..7d68be1 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -646,6 +646,11 @@ test_3vx (_mm512_mask_prefetch_i32scatter_ps, void const
*, __mmask16, __m512i,
test_3vx (_mm512_mask_prefetch_i64gather_ps, __m512i, __mmask8, void const *,
1, 1)
test_3vx (_mm512_mask_prefetch_i64scatter_ps, void const *, __mmask8, __m512i,
1, 1)
+test_3vx (_mm512_mask_prefetch_i32gather_pd, __m256i, __mmask8, void const *,
1, 1)
+test_3vx (_mm512_mask_prefetch_i32scatter_pd, void const *, __mmask8, __m256i,
1, 1)
+test_3vx (_mm512_mask_prefetch_i64gather_pd, __m512i, __mmask8, long long *,
1, 1)
+test_3vx (_mm512_mask_prefetch_i64scatter_pd, void const *, __mmask8, __m512i,
1, 1)
+
/* avx512erintrin.h */
test_1 (_mm512_exp2a23_round_pd, __m512d, __m512d, 5)
test_1 (_mm512_exp2a23_round_ps, __m512, __m512, 5)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c
b/gcc/testsuite/gcc.target/i386/sse-23.c
index 309cd73..77c8d67 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -365,6 +365,10 @@
#define __builtin_ia32_gatherpfqps(A, B, C, D, E)
__builtin_ia32_gatherpfqps(A, B, C, 1, 1)
#define __builtin_ia32_scatterpfdps(A, B, C, D, E)
__builtin_ia32_scatterpfdps(A, B, C, 1, 1)
#define __builtin_ia32_scatterpfqps(A, B, C, D, E)
__builtin_ia32_scatterpfqps(A, B, C, 1, 1)
+#define __builtin_ia32_gatherpfdpd(A, B, C, D, E)
__builtin_ia32_gatherpfdpd(A, B, C, 1, 1)
+#define __builtin_ia32_gatherpfqpd(A, B, C, D, E)
__builtin_ia32_gatherpfqpd(A, B, C, 1, 1)
+#define __builtin_ia32_scatterpfdpd(A, B, C, D, E)
__builtin_ia32_scatterpfdpd(A, B, C, 1, 1)
+#define __builtin_ia32_scatterpfqpd(A, B, C, D, E)
__builtin_ia32_scatterpfqpd(A, B, C, 1, 1)
/* avx512erintrin.h */
#define __builtin_ia32_exp2pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask (A,
B, C, 5)