[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR
This revision was automatically updated to reflect the committed changes. Closed by commit rL313624: Lowering Mask Set1 intrinsics to LLVM IR (authored by jina.nahias). Changed prior to commit: https://reviews.llvm.org/D37668?vs=115622&id=115823#toc Repository: rL LLVM https://reviews.llvm.org/D37668 Files: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/include/clang/Basic/BuiltinsX86_64.def cfe/trunk/lib/Headers/avx512bwintrin.h cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Headers/avx512vlbwintrin.h cfe/trunk/lib/Headers/avx512vlintrin.h cfe/trunk/test/CodeGen/avx512bw-builtins.c cfe/trunk/test/CodeGen/avx512f-builtins.c cfe/trunk/test/CodeGen/avx512vl-builtins.c cfe/trunk/test/CodeGen/avx512vlbw-builtins.c Index: cfe/trunk/lib/Headers/avx512vlintrin.h === --- cfe/trunk/lib/Headers/avx512vlintrin.h +++ cfe/trunk/lib/Headers/avx512vlintrin.h @@ -5723,59 +5723,72 @@ (__v4df)_mm256_setzero_pd()); } +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) +{ + return (__m128i)__builtin_ia32_selectd_128(__M, + (__v4si) _mm_set1_epi32(__A), + (__v4si)__O); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_set1_epi32( __mmask8 __M, int __A) +{ + return (__m128i)__builtin_ia32_selectd_128(__M, + (__v4si) _mm_set1_epi32(__A), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) +{ + return (__m256i)__builtin_ia32_selectd_256(__M, + (__v8si) _mm256_set1_epi32(__A), + (__v8si)__O); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_set1_epi32( __mmask8 __M, int __A) +{ + return (__m256i)__builtin_ia32_selectd_256(__M, + (__v8si) _mm256_set1_epi32(__A), + (__v8si)_mm256_setzero_si256()); +} -#define _mm_mask_set1_epi32(O, M, A) __extension__ ({ \ - (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \ - (__v4si)(__m128i)(O), \ - (__mmask8)(M)); }) - -#define _mm_maskz_set1_epi32(M, A) __extension__ ({ \ - (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)(M)); }) - -#define _mm256_mask_set1_epi32(O, M, A) __extension__ ({ \ - (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \ - (__v8si)(__m256i)(O), \ - (__mmask8)(M)); }) - -#define _mm256_maskz_set1_epi32(M, A) __extension__ ({ \ - (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(M)); }) #ifdef __x86_64__ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) { - return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O, - __M); + return (__m128i) __builtin_ia32_selectq_128(__M, + (__v2di) _mm_set1_epi8(__A), + (__v2di) __O); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_set1_epi64 (__mmask8 __M, long long __A) { - return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, - (__v2di) - _mm_setzero_si128 (), - __M); + return (__m128i) __builtin_ia32_selectq_128(__M, + (__v2di) _mm_set1_epi8(__A), + (__v2di) _mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) { - return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O, - __M); + return (__m256i) __builtin_ia32_selectq_256(__M, + (__v4di) _mm256_set1_epi64x(__A), + (__v4di) __O) ; } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) { - return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, - (__v4di) - _mm256_setzero_si256 (), - __M); + return (__m256i) __builtin_i
[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR
craig.topper accepted this revision. craig.topper added a comment. This revision is now accepted and ready to land. LGTM https://reviews.llvm.org/D37668 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR
jina.nahias added inline comments. Comment at: lib/Headers/avx512fintrin.h:9742 #ifdef __x86_64__ static __inline__ __m512i __DEFAULT_FN_ATTRS craig.topper wrote: > Please remove the #ifdef __x86_64__ from this. It should work in 32-bits as > well. the current generated code for 32-bit is not optimal, 32-bit needs some more work which will be in a following patch . https://reviews.llvm.org/D37668 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR
jina.nahias updated this revision to Diff 115622. jina.nahias added a comment. rebase on @craig.topper commit. https://reviews.llvm.org/D37668 Files: include/clang/Basic/BuiltinsX86.def include/clang/Basic/BuiltinsX86_64.def lib/Headers/avx512bwintrin.h lib/Headers/avx512fintrin.h lib/Headers/avx512vlbwintrin.h lib/Headers/avx512vlintrin.h test/CodeGen/avx512bw-builtins.c test/CodeGen/avx512f-builtins.c test/CodeGen/avx512vl-builtins.c test/CodeGen/avx512vlbw-builtins.c Index: test/CodeGen/avx512vlbw-builtins.c === --- test/CodeGen/avx512vlbw-builtins.c +++ test/CodeGen/avx512vlbw-builtins.c @@ -2670,28 +2670,195 @@ // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_broadcastw_epi16(__M, __A); } +__m128i test_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A){ + // CHECK-LABEL: @test_mm_mask_set1_epi8 + // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + return _mm_mask_set1_epi8(__O, __M, __A); +} +__m128i test_mm_maskz_set1_epi8 ( __mmask16 __M, char __A){ + // CHECK-LABEL: @test_mm_maskz_set1_epi8 + // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + return _mm_maskz_set1_epi8( __M, __A); +} + +__m256i test_mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A) { + // CHECK-LABEL: @test_mm256_mask_set1_epi8 + // CHECK: insertelement <32 x i8> undef, i8 %{{.*}}, i32 0 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 1 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 2 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 3 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 4 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 5 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 6 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 7 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 8 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 9 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 10 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 11 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 12 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 13 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 14 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 15 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 16 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 17 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 18 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 19 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 20 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 21 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 22 + // CHECK: insertelement
[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR
craig.topper added a comment. I'm going to go ahead and remove __builtin_ia32_pbroadcastq512_mem_mask from clang and change _mm512_maskz_set1_epi64 to be disabled in 32-bit mode. I want to nominate this for 5.0.1 because using it in 32-bit mode causes the compile to throw a cannot select error. So disabling it in the header at least gives a better user experience. After that goes in you should rebase this patch and enable all of the set1_epi64 intrinsics to work in 32-bit mode like they should. Comment at: lib/Headers/avx512fintrin.h:9742 #ifdef __x86_64__ static __inline__ __m512i __DEFAULT_FN_ATTRS Please remove the #ifdef __x86_64__ from this. It should work in 32-bits as well. Comment at: lib/Headers/avx512vlintrin.h:5759 #ifdef __x86_64__ static __inline__ __m128i __DEFAULT_FN_ATTRS Please remove the __x86_64__ from these. They should work in 32-bit mode. https://reviews.llvm.org/D37668 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR
jina.nahias added inline comments. Comment at: include/clang/Basic/BuiltinsX86.def:981 -TARGET_BUILTIN(__builtin_ia32_pbroadcastd512_gpr_mask, "V16iiV16iUs", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_mem_mask, "V8LLiLLiV8LLiUc", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "", "avx512f") craig.topper wrote: > I think you patch removed the only use of > __builtin_ia32_pbroadcastq512_mem_mask right? Does your change work properly > in 32-bit mode? yes for both of the questions, i have deleted __builtin_ia32_pbroadcastq512_mem_mask you can see it in the new update. https://reviews.llvm.org/D37668 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR
jina.nahias updated this revision to Diff 114978. https://reviews.llvm.org/D37668 Files: include/clang/Basic/BuiltinsX86.def include/clang/Basic/BuiltinsX86_64.def lib/Headers/avx512bwintrin.h lib/Headers/avx512fintrin.h lib/Headers/avx512vlbwintrin.h lib/Headers/avx512vlintrin.h test/CodeGen/avx512bw-builtins.c test/CodeGen/avx512f-builtins.c test/CodeGen/avx512vl-builtins.c test/CodeGen/avx512vlbw-builtins.c Index: test/CodeGen/avx512vlbw-builtins.c === --- test/CodeGen/avx512vlbw-builtins.c +++ test/CodeGen/avx512vlbw-builtins.c @@ -2602,28 +2602,195 @@ // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_broadcastw_epi16(__M, __A); } +__m128i test_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A){ + // CHECK-LABEL: @test_mm_mask_set1_epi8 + // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + return _mm_mask_set1_epi8(__O, __M, __A); +} +__m128i test_mm_maskz_set1_epi8 ( __mmask16 __M, char __A){ + // CHECK-LABEL: @test_mm_maskz_set1_epi8 + // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + return _mm_maskz_set1_epi8( __M, __A); +} + +__m256i test_mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A) { + // CHECK-LABEL: @test_mm256_mask_set1_epi8 + // CHECK: insertelement <32 x i8> undef, i8 %{{.*}}, i32 0 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 1 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 2 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 3 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 4 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 5 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 6 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 7 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 8 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 9 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 10 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 11 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 12 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 13 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 14 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 15 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 16 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 17 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 18 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 19 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 20 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 21 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 22 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 23 + // CHECK: insertelemen
[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR
RKSimon added a comment. some very minor whitespace/indentation issues please can you confirm @craig.topper's query about __builtin_ia32_pbroadcastq512_mem_mask Comment at: lib/Headers/avx512vlintrin.h:5727 +static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) { + return (__m128i)__builtin_ia32_selectd_128(__M, weird indentation? Comment at: lib/Headers/avx512vlintrin.h:5734 +static __inline__ __m128i __DEFAULT_FN_ATTRS + _mm_maskz_set1_epi32( __mmask8 __M, int __A) { + return (__m128i)__builtin_ia32_selectd_128(__M, weird indentation? Comment at: lib/Headers/avx512vlintrin.h:5741 +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) { + return (__m256i)__builtin_ia32_selectd_256(__M, weird indentation? Comment at: lib/Headers/avx512vlintrin.h:5748 +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_maskz_set1_epi32( __mmask8 __M, int __A) { + return (__m256i)__builtin_ia32_selectd_256(__M, weird indentation? Comment at: test/CodeGen/avx512f-builtins.c:7732 + // CHECK-LABEL: @test_mm512_mask_set1_epi32 +// CHECK: insertelement <16 x i32> undef, i32 %{{.*}}, i32 0 + // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 1 weird indentation? Comment at: test/CodeGen/avx512f-builtins.c:7755 + // CHECK-LABEL: @test_mm512_maskz_set1_epi32 +// CHECK: insertelement <16 x i32> undef, i32 %{{.*}}, i32 0 + // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 1 weird indentation? Comment at: test/CodeGen/avx512f-builtins.c:7966 //CHECK: insertelement{{.*}}i32 14 //CHECK: insertelement{{.*}}i32 15 return _mm512_setr_epi32( __A, __B, __C, __D,__E, __F, __G, __H, weird indentation? Comment at: test/CodeGen/avx512vl-builtins.c:4597 // CHECK-LABEL: @test_mm256_maskz_set1_epi64 - // CHECK: @llvm.x86.avx512.mask.pbroadcast.q.gpr.256 +// CHECK: insertelement <4 x i64> undef, i64 %{{.*}}, i32 0 + // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 1 weird indentation? https://reviews.llvm.org/D37668 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR
jina.nahias updated this revision to Diff 114836. https://reviews.llvm.org/D37668 Files: include/clang/Basic/BuiltinsX86.def include/clang/Basic/BuiltinsX86_64.def lib/Headers/avx512bwintrin.h lib/Headers/avx512fintrin.h lib/Headers/avx512vlbwintrin.h lib/Headers/avx512vlintrin.h test/CodeGen/avx512bw-builtins.c test/CodeGen/avx512f-builtins.c test/CodeGen/avx512vl-builtins.c test/CodeGen/avx512vlbw-builtins.c Index: test/CodeGen/avx512vlbw-builtins.c === --- test/CodeGen/avx512vlbw-builtins.c +++ test/CodeGen/avx512vlbw-builtins.c @@ -2602,28 +2602,195 @@ // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_broadcastw_epi16(__M, __A); } +__m128i test_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A){ + // CHECK-LABEL: @test_mm_mask_set1_epi8 + // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + return _mm_mask_set1_epi8(__O, __M, __A); +} +__m128i test_mm_maskz_set1_epi8 ( __mmask16 __M, char __A){ + // CHECK-LABEL: @test_mm_maskz_set1_epi8 + // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + return _mm_maskz_set1_epi8( __M, __A); +} + +__m256i test_mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A) { + // CHECK-LABEL: @test_mm256_mask_set1_epi8 + // CHECK: insertelement <32 x i8> undef, i8 %{{.*}}, i32 0 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 1 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 2 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 3 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 4 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 5 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 6 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 7 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 8 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 9 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 10 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 11 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 12 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 13 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 14 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 15 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 16 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 17 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 18 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 19 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 20 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 21 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 22 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 23 + // CHECK: insertelemen
[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR
craig.topper added inline comments. Comment at: include/clang/Basic/BuiltinsX86.def:981 -TARGET_BUILTIN(__builtin_ia32_pbroadcastd512_gpr_mask, "V16iiV16iUs", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_mem_mask, "V8LLiLLiV8LLiUc", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "", "avx512f") I think you patch removed the only use of __builtin_ia32_pbroadcastq512_mem_mask right? Does your change work properly in 32-bit mode? Comment at: lib/Headers/avx512bwintrin.h:2031 { - return (__m512i) __builtin_ia32_pbroadcastb512_gpr_mask (__A, - (__v64qi) __O, - __M); + __m512i __V = _mm512_set1_epi8(__A); + return (__m512i) __builtin_ia32_selectb_512(__M,(__v64qi)__V,(__v64qi) __O); We usually don't declare variables in the intrinsics if we can avoid it. Just nest the calls. Comment at: test/CodeGen/avx512vl-builtins.c:4511 // CHECK-LABEL: @test_mm256_mask_set1_epi32 - // CHECK: @llvm.x86.avx512.mask.pbroadcast.d.gpr.256 +// CHECK: insertelement <8 x i32> undef, i32 %{{.*}}, i32 0 + // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 1 The first line is over indented Comment at: test/CodeGen/avx512vl-builtins.c:4525 // CHECK-LABEL: @test_mm256_maskz_set1_epi32 - // CHECK: @llvm.x86.avx512.mask.pbroadcast.d.gpr.256 +// CHECK: insertelement <8 x i32> undef, i32 %{{.*}}, i32 0 + // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 1 The first line is overindented https://reviews.llvm.org/D37668 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR
jina.nahias updated this revision to Diff 114765. https://reviews.llvm.org/D37668 Files: include/clang/Basic/BuiltinsX86.def include/clang/Basic/BuiltinsX86_64.def lib/Headers/avx512bwintrin.h lib/Headers/avx512fintrin.h lib/Headers/avx512vlbwintrin.h lib/Headers/avx512vlintrin.h test/CodeGen/avx512bw-builtins.c test/CodeGen/avx512f-builtins.c test/CodeGen/avx512vl-builtins.c test/CodeGen/avx512vlbw-builtins.c Index: test/CodeGen/avx512vlbw-builtins.c === --- test/CodeGen/avx512vlbw-builtins.c +++ test/CodeGen/avx512vlbw-builtins.c @@ -2602,28 +2602,196 @@ // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_broadcastw_epi16(__M, __A); } + __m128i test_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A){ +// CHECK-LABEL: @test_mm_mask_set1_epi8 +// CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 +// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + return _mm_mask_set1_epi8(__O, __M, __A); + } + + __m128i test_mm_maskz_set1_epi8 ( __mmask16 __M, char __A){ +// CHECK-LABEL: @test_mm_maskz_set1_epi8 + // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 +// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + return _mm_maskz_set1_epi8( __M, __A); + } + +__m256i test_mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A) { + // CHECK-LABEL: @test_mm256_mask_set1_epi8 +// CHECK: insertelement <32 x i8> undef, i8 %{{.*}}, i32 0 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 1 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 2 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 3 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 4 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 5 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 6 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 7 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 8 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 9 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 10 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 11 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 12 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 13 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 14 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 15 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 16 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 17 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 18 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 19 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 20 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 21 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 22 + // CHECK:
[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR
craig.topper added a comment. I think when you uploaded the changes to remove it from BuiltinsX86.def you lost your earlier changes to the header files https://reviews.llvm.org/D37668 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR
jina.nahias updated this revision to Diff 114594. jina.nahias added a comment. delete from include/clang/Basic/BuiltinsX86.def and include/clang/Basic/BuiltinsX86_64.def https://reviews.llvm.org/D37668 Files: include/clang/Basic/BuiltinsX86.def include/clang/Basic/BuiltinsX86_64.def Index: include/clang/Basic/BuiltinsX86_64.def === --- include/clang/Basic/BuiltinsX86_64.def +++ include/clang/Basic/BuiltinsX86_64.def @@ -71,9 +71,6 @@ TARGET_BUILTIN(__builtin_ia32_bextri_u64, "ULLiULLiIULLi", "", "tbm") TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcULLiUiUi", "", "lwp") TARGET_BUILTIN(__builtin_ia32_lwpval64, "vULLiUiUi", "", "lwp") -TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_gpr_mask, "V8LLiLLiV8LLiUc", "", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pbroadcastq128_gpr_mask, "V2LLiULLiV2LLiUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_pbroadcastq256_gpr_mask, "V4LLiULLiV4LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_vcvtsd2si64, "LLiV2dIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi64, "ULLiV2dIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtss2si64, "LLiV4fIi","","avx512f") Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -977,7 +977,6 @@ TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8LLiV16iV16i", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_ptestmd512, "UsV16iV16iUs", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_ptestmq512, "UcV8LLiV8LLiUc", "", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pbroadcastd512_gpr_mask, "V16iiV16iUs", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_mem_mask, "V8LLiLLiV8LLiUc", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8LLiLLiC*V8LLiUc", "", "avx512f") @@ -1381,11 +1380,6 @@ TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4LLiV4LLiC*V4LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2LLi*V2LLiUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4LLi*V4LLiUc","","avx512f") -TARGET_BUILTIN(__builtin_ia32_pbroadcastb512_gpr_mask, "V64ccV64cULLi","","avx512bw") -TARGET_BUILTIN(__builtin_ia32_pbroadcastb128_gpr_mask, "V16ccV16cUs","","avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_pbroadcastb256_gpr_mask, "V32ccV32cUi","","avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_pbroadcastd128_gpr_mask, "V4iiV4iUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_pbroadcastd256_gpr_mask, "V8iiV8iUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512_mask, "V8LLiV8LLiV8LLiV8LLiUc","","avx512ifma") TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc","","avx512ifma") TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512_mask, "V8LLiV8LLiV8LLiV8LLiUc","","avx512ifma") @@ -1596,9 +1590,6 @@ TARGET_BUILTIN(__builtin_ia32_broadcastmb256, "V4LLiUc","","avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_broadcastmw128, "V4iUs","","avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_broadcastmw256, "V8iUs","","avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_pbroadcastw512_gpr_mask, "V32shV32sUi","","avx512bw") -TARGET_BUILTIN(__builtin_ia32_pbroadcastw256_gpr_mask, "V16shV16sUs","","avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_gpr_mask, "V8ssV8sUc","","avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_pmovswb512mem_mask, "vV32c*V32sUi","","avx512bw") Index: include/clang/Basic/BuiltinsX86_64.def === --- include/clang/Basic/BuiltinsX86_64.def +++ include/clang/Basic/BuiltinsX86_64.def @@ -71,9 +71,6 @@ TARGET_BUILTIN(__builtin_ia32_bextri_u64, "ULLiULLiIULLi", "", "tbm") TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcULLiUiUi", "", "lwp") TARGET_BUILTIN(__builtin_ia32_lwpval64, "vULLiUiUi", "", "lwp") -TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_gpr_mask, "V8LLiLLiV8LLiUc", "", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pbroadcastq128_gpr_mask, "V2LLiULLiV2LLiUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_pbroadcastq256_gpr_mask, "V4LLiULLiV4LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_vcvtsd2si64, "LLiV2dIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi64, "ULLiV2dIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtss2si64, "LLiV4fIi","","avx512f") Index: include/clang/Basic/BuiltinsX86.def === --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -977,7 +977,6 @@ TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8LLiV16iV16i", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_ptestmd
[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR
RKSimon added a comment. As with https://reviews.llvm.org/D37562, strip the builtins from include/clang/Basic/BuiltinsX86.def https://reviews.llvm.org/D37668 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR
jina.nahias created this revision. this is clang part , the llvm part is https://reviews.llvm.org/differential/diff/114515/ https://reviews.llvm.org/D37668 Files: lib/Headers/avx512bwintrin.h lib/Headers/avx512fintrin.h lib/Headers/avx512vlbwintrin.h lib/Headers/avx512vlintrin.h test/CodeGen/avx512bw-builtins.c test/CodeGen/avx512f-builtins.c test/CodeGen/avx512vl-builtins.c test/CodeGen/avx512vlbw-builtins.c Index: test/CodeGen/avx512vlbw-builtins.c === --- test/CodeGen/avx512vlbw-builtins.c +++ test/CodeGen/avx512vlbw-builtins.c @@ -2602,28 +2602,196 @@ // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_broadcastw_epi16(__M, __A); } + __m128i test_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A){ +// CHECK-LABEL: @test_mm_mask_set1_epi8 +// CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 +// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + return _mm_mask_set1_epi8(__O, __M, __A); + } + + __m128i test_mm_maskz_set1_epi8 ( __mmask16 __M, char __A){ +// CHECK-LABEL: @test_mm_maskz_set1_epi8 + // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14 +// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 +// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + return _mm_maskz_set1_epi8( __M, __A); + } + +__m256i test_mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A) { + // CHECK-LABEL: @test_mm256_mask_set1_epi8 +// CHECK: insertelement <32 x i8> undef, i8 %{{.*}}, i32 0 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 1 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 2 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 3 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 4 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 5 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 6 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 7 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 8 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 9 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 10 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 11 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 12 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 13 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 14 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 15 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 16 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 17 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 18 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 19 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 20 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 21 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 22 + // CHECK: in