2016-12-02 21:31 GMT+03:00 Uros Bizjak <ubiz...@gmail.com>: . . . . . >> >> I split this patch after last updates in md files, here is the first >> part which doesn't change md files. >> Regtested on x86_64-linux-gnu. Is this part ok? > > There is no point to scan for kmovX insn in e.g.: > > +/* { dg-final { scan-assembler-times "kmovq" 2 } } */ > + > +#include <immintrin.h> > + > +void > +avx512bw_test () > +{ > + __mmask64 k1, k2, k3; > + volatile __m512i x = _mm512_setzero_si512 (); > + > + __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) ); > + __asm__( "kmovq %1, %0" : "=k" (k2) : "r" (2) ); > > since you emit it from inline asm. > > Please remove these pointles kmovX scan-asm-times directives from the > testcases, and please also remove it from avx512f-kandnw-1.c > testcase. > > The patch is OK with this change.
Hi here is the second part of k-mask intrinsics, is it Ok? diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h index 9e6e0ce..7f40808 100644 --- a/gcc/config/i386/avx512bwintrin.h +++ b/gcc/config/i386/avx512bwintrin.h @@ -40,6 +40,62 @@ typedef char __v64qi __attribute__ ((__vector_size__ (64))); typedef unsigned long long __mmask64; +extern __inline unsigned int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_cvtmask32_u32 (__mmask32 __A) +{ + return (unsigned int) __builtin_ia32_kmov32 ((__mmask32) __A); +} + +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_cvtmask64_u64 (__mmask64 __A) +{ + return (unsigned long long) __builtin_ia32_kmov64 ((__mmask64) __A); +} + +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_cvtu32_mask32 (unsigned int __A) +{ + return (__mmask32) __builtin_ia32_kmov32 ((__mmask32) __A); +} + +extern __inline __mmask64 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_cvtu64_mask64 (unsigned long long __A) +{ + return (__mmask64) __builtin_ia32_kmov64 ((__mmask64) __A); +} + +extern __inline __mmask32 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_load_mask32 (__mmask32 *__A) +{ + return (__mmask32) __builtin_ia32_kmov32 (*__A); +} + +extern __inline __mmask64 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_load_mask64 (__mmask64 *__A) +{ + return (__mmask64) __builtin_ia32_kmov64 (*(__mmask64 *) __A); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_store_mask32 (__mmask32 *__A, __mmask32 __B) +{ + *(__mmask32 *) __A = __builtin_ia32_kmov32 (__B); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_store_mask64 (__mmask64 *__A, __mmask64 __B) +{ + *(__mmask64 *) __A = __builtin_ia32_kmov64 (__B); +} + extern __inline __mmask32 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _knot_mask32 (__mmask32 __A) diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h index d2405c3..d15d35d 100644 --- a/gcc/config/i386/avx512dqintrin.h +++ b/gcc/config/i386/avx512dqintrin.h @@ -34,6 +34,34 @@ #define __DISABLE_AVX512DQ__ #endif /* __AVX512DQ__ */ +extern __inline unsigned int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_cvtmask8_u32 (__mmask8 __A) +{ + return (unsigned int) __builtin_ia32_kmov8 ((__mmask8 ) __A); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_cvtu32_mask8 (unsigned int __A) +{ + return (__mmask8) __builtin_ia32_kmov8 ((__mmask8) __A); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_load_mask8 (__mmask8 *__A) +{ + return (__mmask8) __builtin_ia32_kmov8 (*(__mmask8 *) __A); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_store_mask8 (__mmask8 *__A, __mmask8 __B) +{ + *(__mmask8 *) __A = __builtin_ia32_kmov8 (__B); +} + extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _knot_mask8 (__mmask8 __A) diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index ab1704b..45e1949 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -9984,6 +9984,34 @@ _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P) #define _kxnor_mask16 _mm512_kxnor #define _kxor_mask16 _mm512_kxor +extern __inline unsigned int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_cvtmask16_u32 (__mmask16 __A) +{ + return (unsigned int) __builtin_ia32_kmov16 ((__mmask16 ) __A); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_cvtu32_mask16 (unsigned int __A) +{ + return (__mmask16) __builtin_ia32_kmov16 ((__mmask16 ) __A); +} + +extern __inline __mmask16 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_load_mask16 (__mmask16 *__A) +{ + return (__mmask16) __builtin_ia32_kmov16 (*(__mmask16 *) __A); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_store_mask16 (__mmask16 *__A, __mmask16 __B) +{ + *(__mmask16 *) __A = __builtin_ia32_kmov16 (__B); +} + extern __inline __mmask16 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_kand (__mmask16 __A, __mmask16 __B) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 83a5089..8030083 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -1463,7 +1463,10 @@ BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_kxorqi, "__builtin_ia32_kxorqi", IX86_ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kxorhi, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI) BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kxorsi, "__builtin_ia32_kxorsi", IX86_BUILTIN_KXOR32, UNKNOWN, (int) USI_FTYPE_USI_USI) BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kxordi, "__builtin_ia32_kxordi", IX86_BUILTIN_KXOR64, UNKNOWN, (int) UDI_FTYPE_UDI_UDI) +BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_kmovb, "__builtin_ia32_kmov8", IX86_BUILTIN_KMOV8, UNKNOWN, (int) UQI_FTYPE_UQI) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) UHI_FTYPE_UHI) +BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kmovd, "__builtin_ia32_kmov32", IX86_BUILTIN_KMOV32, UNKNOWN, (int) USI_FTYPE_USI) +BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kmovq, "__builtin_ia32_kmov64", IX86_BUILTIN_KMOV64, UNKNOWN, (int) UDI_FTYPE_UDI) /* SHA */ BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 454aeca..c7456d5 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1309,12 +1309,30 @@ ;; Mask variant shift mnemonics (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")]) +(define_expand "kmovb" + [(set (match_operand:QI 0 "nonimmediate_operand") + (match_operand:QI 1 "nonimmediate_operand"))] + "TARGET_AVX512DQ + && !(MEM_P (operands[0]) && MEM_P (operands[1]))") + (define_expand "kmovw" [(set (match_operand:HI 0 "nonimmediate_operand") (match_operand:HI 1 "nonimmediate_operand"))] "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))") +(define_expand "kmovd" + [(set (match_operand:SI 0 "nonimmediate_operand") + (match_operand:SI 1 "nonimmediate_operand"))] + "TARGET_AVX512BW + && !(MEM_P (operands[0]) && MEM_P (operands[1]))") + +(define_expand "kmovq" + [(set (match_operand:DI 0 "nonimmediate_operand") + (match_operand:DI 1 "nonimmediate_operand"))] + "TARGET_AVX512BW + && !(MEM_P (operands[0]) && MEM_P (operands[1]))") + (define_insn "k<code><mode>" [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k") (any_logic:SWI1248_AVX512BW diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-1.c new file mode 100644 index 0000000..2fbdafd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -O2" } */ +/* { dg-final { scan-assembler-times "kmovd\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> +volatile __mmask32 k1; + +void +avx512bw_test () +{ + __mmask32 k = _cvtu32_mask32 (11); + + asm volatile ("" : "+k" (k)); + k1 = k; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-2.c new file mode 100644 index 0000000..581affe --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-2.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -O2" } */ +/* { dg-final { scan-assembler-times "kmovd\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> +volatile __mmask32 k1; + +void +avx512bw_test () +{ + __mmask32 k0 = 11; + __mmask32 k = _load_mask32 (&k0); + + asm volatile ("" : "+k" (k)); + k1 = k; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-3.c b/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-3.c new file mode 100644 index 0000000..4cf22fe --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-3.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -O2" } */ +/* { dg-final { scan-assembler-times "kmovd\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> +volatile __mmask32 k1 = 11; + +void +avx512bw_test () +{ + __mmask32 k0, k; + + _store_mask32 (&k, k1); + + asm volatile ("" : "+k" (k)); + k0 = k; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-4.c b/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-4.c new file mode 100644 index 0000000..d61f944 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-kmovd-4.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -O2" } */ +/* { dg-final { scan-assembler-times "kmovd\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> +volatile unsigned int i; + +void +avx512bw_test () +{ + __mmask32 k = 11; + + asm volatile ("" : "+k" (k)); + i = _cvtmask32_u32 (k); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-1.c new file mode 100644 index 0000000..20586b8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -O2" } */ +/* { dg-final { scan-assembler-times "kmovq\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> +volatile __mmask64 k1; + +void +avx512bw_test () +{ + __mmask64 k = _cvtu64_mask64 (11); + + asm volatile ("" : "+k" (k)); + k1 = k; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-2.c new file mode 100644 index 0000000..1a5f94c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-2.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -O2" } */ +/* { dg-final { scan-assembler-times "kmovq\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> +volatile __mmask64 k1; + +void +avx512bw_test () +{ + __mmask64 k0 = 11; + __mmask64 k = _load_mask64 (&k0); + + asm volatile ("" : "+k" (k)); + k1 = k; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-3.c b/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-3.c new file mode 100644 index 0000000..53c6a17 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-3.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -O2" } */ +/* { dg-final { scan-assembler-times "kmovq\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> +volatile __mmask64 k1 = 11; + +void +avx512bw_test () +{ + __mmask64 k0, k; + + _store_mask64 (&k, k1); + + asm volatile ("" : "+k" (k)); + k0 = k; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-4.c b/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-4.c new file mode 100644 index 0000000..0122c6c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-kmovq-4.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -O2" } */ +/* { dg-final { scan-assembler-times "kmovq\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> +volatile unsigned long long i; + +void +avx512bw_test () +{ + __mmask64 k = 11; + + asm volatile ("" : "+k" (k)); + i = _cvtmask64_u64 (k); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-2.c new file mode 100644 index 0000000..162ce38 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512dq -O2" } */ +/* { dg-final { scan-assembler-times "kmovb\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> +volatile __mmask8 k1; + +void +avx512dq_test () +{ + __mmask8 k = _cvtu32_mask8 (11); + + asm volatile ("" : "+k" (k)); + k1 = k; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-3.c b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-3.c new file mode 100644 index 0000000..c10dd1e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-3.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512dq -O2" } */ +/* { dg-final { scan-assembler-times "kmovb\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> +volatile __mmask8 k1; + +void +avx512dq_test () +{ + __mmask8 k0 = 11; + __mmask8 k = _load_mask8 (&k0); + + asm volatile ("" : "+k" (k)); + k1 = k; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-4.c b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-4.c new file mode 100644 index 0000000..b3120dd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-4.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512dq -O2" } */ +/* { dg-final { scan-assembler-times "kmovb\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> +volatile __mmask8 k1 = 11; + +void +avx512bw_test () +{ + __mmask8 k0, k; + + _store_mask8 (&k, k1); + + asm volatile ("" : "+k" (k)); + k0 = k; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-5.c b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-5.c new file mode 100644 index 0000000..f4fbc49 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-5.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512dq -O2" } */ +/* { dg-final { scan-assembler-times "kmovb\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> +volatile unsigned int i; + +void +avx512dq_test () +{ + __mmask8 k = 11; + + asm volatile ("" : "+k" (k)); + i = _cvtmask8_u32 (k); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kmovw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-2.c new file mode 100644 index 0000000..95d203b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "kmovw\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> +volatile __mmask16 k1; + +void +avx512f_test () +{ + __mmask16 k = _cvtu32_mask16 (11); + + asm volatile ("" : "+k" (k)); + k1 = k; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kmovw-3.c b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-3.c new file mode 100644 index 0000000..82d1b30 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-3.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "kmovw\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> +volatile __mmask16 k1; + +void +avx512f_test () +{ + __mmask16 k0 = 11; + __mmask16 k = _load_mask16 (&k0); + + asm volatile ("" : "+k" (k)); + k1 = k; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kmovw-4.c b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-4.c new file mode 100644 index 0000000..c1221e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-4.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "kmovw\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> +volatile __mmask16 k1 = 11; + +void +avx512f_test () +{ + __mmask16 k0, k; + + _store_mask16 (&k, k1); + + asm volatile ("" : "+k" (k)); + k0 = k; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kmovw-5.c b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-5.c new file mode 100644 index 0000000..21ad934 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-5.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "kmovw\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ + +#include <immintrin.h> +volatile unsigned int i; + +void +avx512f_test () +{ + __mmask16 k = 11; + + asm volatile ("" : "+k" (k)); + i = _cvtmask16_u32 (k); +} -- WBR, Andrew
avx512-kmask-intrin-part2.patch
Description: Binary data