Re: [PATCH] i386: Fix vpblendm{b,w} intrins and insns
On Tue, Apr 18, 2023 at 3:15 PM Haochen Jiang via Gcc-patches wrote: > > Hi all, > > For vpblendm{b,w}, they actually do not have constant parameters. > Therefore, there is no need for them been wrapped in __OPTIMIZE__. > > Also, we should check TARGET_AVX512VL for 128/256 bit vectors in patterns. > > This patch did the fixes mentioned above. Tested on x86_64-pc-linux-gnu. > Ok for trunk? Ok. > > BRs, > Haochen > > gcc/ChangeLog: > > * config/i386/avx512vlbwintrin.h > (_mm_mask_blend_epi16): Remove __OPTIMIZE__ wrapper. > (_mm_mask_blend_epi8): Ditto. > (_mm256_mask_blend_epi16): Ditto. > (_mm256_mask_blend_epi8): Ditto. > * config/i386/avx512vlintrin.h > (_mm256_mask_blend_pd): Ditto. > (_mm256_mask_blend_ps): Ditto. > (_mm256_mask_blend_epi64): Ditto. > (_mm256_mask_blend_epi32): Ditto. > (_mm_mask_blend_pd): Ditto. > (_mm_mask_blend_ps): Ditto. > (_mm_mask_blend_epi64): Ditto. > (_mm_mask_blend_epi32): Ditto. > * config/i386/sse.md (VF_AVX512BWHFBF16): Removed. > (VF_AVX512HFBFVL): Move it before the first usage. > (_blendm): Change iterator from VF_AVX512BWHFBF16 > to VF_AVX512HFBFVL. > --- > gcc/config/i386/avx512vlbwintrin.h | 92 ++- > gcc/config/i386/avx512vlintrin.h | 184 +++-- > gcc/config/i386/sse.md | 17 ++- > 3 files changed, 115 insertions(+), 178 deletions(-) > > diff --git a/gcc/config/i386/avx512vlbwintrin.h > b/gcc/config/i386/avx512vlbwintrin.h > index 0232783a362..9d2aba2a8ff 100644 > --- a/gcc/config/i386/avx512vlbwintrin.h > +++ b/gcc/config/i386/avx512vlbwintrin.h > @@ -257,6 +257,42 @@ _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P) > (__mmask16) __U); > } > > +extern __inline __m128i > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) > +{ > + return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A, > + (__v8hi) __W, > + (__mmask8) __U); > +} > + > +extern __inline __m128i > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) > +{ > + return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A, > + (__v16qi) __W, > + (__mmask16) __U); > +} > + > +extern __inline __m256i > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) > +{ > + return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A, > + (__v16hi) __W, > + (__mmask16) __U); > +} > + > +extern __inline __m256i > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) > +{ > + return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A, > + (__v32qi) __W, > + (__mmask32) __U); > +} > + > extern __inline __m128i > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm256_cvtepi16_epi8 (__m256i __A) > @@ -1442,42 +1478,6 @@ _mm_maskz_dbsad_epu8 (__mmask8 __U, __m128i __A, > __m128i __B, > (__mmask8) __U); > } > > -extern __inline __m128i > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) > -{ > - return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A, > - (__v8hi) __W, > - (__mmask8) __U); > -} > - > -extern __inline __m128i > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) > -{ > - return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A, > - (__v16qi) __W, > - (__mmask16) __U); > -} > - > -extern __inline __m256i > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) > -{ > - return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A, > - (__v16hi) __W, > - (__mmask16) __U); > -} > - > -extern __inline __m256i > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) >
[PATCH] i386: Fix vpblendm{b,w} intrins and insns
Hi all, For vpblendm{b,w}, they actually do not have constant parameters. Therefore, there is no need for them been wrapped in __OPTIMIZE__. Also, we should check TARGET_AVX512VL for 128/256 bit vectors in patterns. This patch did the fixes mentioned above. Tested on x86_64-pc-linux-gnu. Ok for trunk? BRs, Haochen gcc/ChangeLog: * config/i386/avx512vlbwintrin.h (_mm_mask_blend_epi16): Remove __OPTIMIZE__ wrapper. (_mm_mask_blend_epi8): Ditto. (_mm256_mask_blend_epi16): Ditto. (_mm256_mask_blend_epi8): Ditto. * config/i386/avx512vlintrin.h (_mm256_mask_blend_pd): Ditto. (_mm256_mask_blend_ps): Ditto. (_mm256_mask_blend_epi64): Ditto. (_mm256_mask_blend_epi32): Ditto. (_mm_mask_blend_pd): Ditto. (_mm_mask_blend_ps): Ditto. (_mm_mask_blend_epi64): Ditto. (_mm_mask_blend_epi32): Ditto. * config/i386/sse.md (VF_AVX512BWHFBF16): Removed. (VF_AVX512HFBFVL): Move it before the first usage. (_blendm): Change iterator from VF_AVX512BWHFBF16 to VF_AVX512HFBFVL. --- gcc/config/i386/avx512vlbwintrin.h | 92 ++- gcc/config/i386/avx512vlintrin.h | 184 +++-- gcc/config/i386/sse.md | 17 ++- 3 files changed, 115 insertions(+), 178 deletions(-) diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h index 0232783a362..9d2aba2a8ff 100644 --- a/gcc/config/i386/avx512vlbwintrin.h +++ b/gcc/config/i386/avx512vlbwintrin.h @@ -257,6 +257,42 @@ _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P) (__mmask16) __U); } +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) +{ + return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) +{ + return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A, + (__v16qi) __W, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) +{ + return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A, + (__v16hi) __W, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) +{ + return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A, + (__v32qi) __W, + (__mmask32) __U); +} + extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepi16_epi8 (__m256i __A) @@ -1442,42 +1478,6 @@ _mm_maskz_dbsad_epu8 (__mmask8 __U, __m128i __A, __m128i __B, (__mmask8) __U); } -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) -{ - return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A, - (__v8hi) __W, - (__mmask8) __U); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) -{ - return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A, - (__v16qi) __W, - (__mmask16) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) -{ - return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A, - (__v16hi) __W, - (__mmask16) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) -{ - return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A, - (__v32qi) __W, - (__mmask32) __U); -} - extern __inline __mmask8