[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR

2017-09-19 Thread jina via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL313624: Lowering Mask Set1 intrinsics to LLVM IR (authored 
by jina.nahias).

Changed prior to commit:
  https://reviews.llvm.org/D37668?vs=115622&id=115823#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D37668

Files:
  cfe/trunk/include/clang/Basic/BuiltinsX86.def
  cfe/trunk/include/clang/Basic/BuiltinsX86_64.def
  cfe/trunk/lib/Headers/avx512bwintrin.h
  cfe/trunk/lib/Headers/avx512fintrin.h
  cfe/trunk/lib/Headers/avx512vlbwintrin.h
  cfe/trunk/lib/Headers/avx512vlintrin.h
  cfe/trunk/test/CodeGen/avx512bw-builtins.c
  cfe/trunk/test/CodeGen/avx512f-builtins.c
  cfe/trunk/test/CodeGen/avx512vl-builtins.c
  cfe/trunk/test/CodeGen/avx512vlbw-builtins.c

Index: cfe/trunk/lib/Headers/avx512vlintrin.h
===
--- cfe/trunk/lib/Headers/avx512vlintrin.h
+++ cfe/trunk/lib/Headers/avx512vlintrin.h
@@ -5723,59 +5723,72 @@
   (__v4df)_mm256_setzero_pd());
 }
 
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
+{
+   return (__m128i)__builtin_ia32_selectd_128(__M,
+  (__v4si) _mm_set1_epi32(__A),
+  (__v4si)__O);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_set1_epi32( __mmask8 __M, int __A)
+{
+   return (__m128i)__builtin_ia32_selectd_128(__M,
+  (__v4si) _mm_set1_epi32(__A),
+  (__v4si)_mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
+{
+   return (__m256i)__builtin_ia32_selectd_256(__M,
+  (__v8si) _mm256_set1_epi32(__A),
+  (__v8si)__O);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_set1_epi32( __mmask8 __M, int __A)
+{
+   return (__m256i)__builtin_ia32_selectd_256(__M,
+  (__v8si) _mm256_set1_epi32(__A),
+  (__v8si)_mm256_setzero_si256());
+}
 
-#define _mm_mask_set1_epi32(O, M, A) __extension__ ({ \
-  (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
-  (__v4si)(__m128i)(O), \
-  (__mmask8)(M)); })
-
-#define _mm_maskz_set1_epi32(M, A) __extension__ ({ \
-  (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
-  (__v4si)_mm_setzero_si128(), \
-  (__mmask8)(M)); })
-
-#define _mm256_mask_set1_epi32(O, M, A) __extension__ ({ \
-  (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
-  (__v8si)(__m256i)(O), \
-  (__mmask8)(M)); })
-
-#define _mm256_maskz_set1_epi32(M, A) __extension__ ({ \
-  (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
-  (__v8si)_mm256_setzero_si256(), \
-  (__mmask8)(M)); })
 
 #ifdef __x86_64__
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
 {
-  return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
- __M);
+  return (__m128i) __builtin_ia32_selectq_128(__M,
+  (__v2di) _mm_set1_epi8(__A),
+  (__v2di) __O);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
 {
-  return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
- (__v2di)
- _mm_setzero_si128 (),
- __M);
+  return (__m128i) __builtin_ia32_selectq_128(__M,
+  (__v2di) _mm_set1_epi8(__A),
+  (__v2di) _mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
 {
-  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
- __M);
+  return (__m256i) __builtin_ia32_selectq_256(__M,
+  (__v4di) _mm256_set1_epi64x(__A),
+  (__v4di) __O) ;
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
 {
-  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
- (__v4di)
- _mm256_setzero_si256 (),
- __M);
+   return (__m256i) __builtin_i

[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR

2017-09-18 Thread Craig Topper via Phabricator via cfe-commits
craig.topper accepted this revision.
craig.topper added a comment.
This revision is now accepted and ready to land.

LGTM


https://reviews.llvm.org/D37668



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR

2017-09-18 Thread jina via Phabricator via cfe-commits
jina.nahias added inline comments.



Comment at: lib/Headers/avx512fintrin.h:9742
 
 #ifdef __x86_64__
 static __inline__ __m512i __DEFAULT_FN_ATTRS

craig.topper wrote:
> Please remove the #ifdef __x86_64__ from this. It should work in 32-bits as 
> well.
the current generated code for 32-bit is not optimal, 32-bit needs some more 
work which will be in a following patch .


https://reviews.llvm.org/D37668



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR

2017-09-18 Thread jina via Phabricator via cfe-commits
jina.nahias updated this revision to Diff 115622.
jina.nahias added a comment.

rebase on @craig.topper  commit.


https://reviews.llvm.org/D37668

Files:
  include/clang/Basic/BuiltinsX86.def
  include/clang/Basic/BuiltinsX86_64.def
  lib/Headers/avx512bwintrin.h
  lib/Headers/avx512fintrin.h
  lib/Headers/avx512vlbwintrin.h
  lib/Headers/avx512vlintrin.h
  test/CodeGen/avx512bw-builtins.c
  test/CodeGen/avx512f-builtins.c
  test/CodeGen/avx512vl-builtins.c
  test/CodeGen/avx512vlbw-builtins.c

Index: test/CodeGen/avx512vlbw-builtins.c
===
--- test/CodeGen/avx512vlbw-builtins.c
+++ test/CodeGen/avx512vlbw-builtins.c
@@ -2670,28 +2670,195 @@
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_broadcastw_epi16(__M, __A);
 }
+__m128i test_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A){
+  // CHECK-LABEL: @test_mm_mask_set1_epi8
+  // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
+  return _mm_mask_set1_epi8(__O, __M, __A);
+}
+__m128i test_mm_maskz_set1_epi8 ( __mmask16 __M, char __A){
+  // CHECK-LABEL: @test_mm_maskz_set1_epi8
+  // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
+  return _mm_maskz_set1_epi8( __M, __A);
+}
+
+__m256i test_mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A) {
+  // CHECK-LABEL: @test_mm256_mask_set1_epi8
+  // CHECK: insertelement <32 x i8> undef, i8 %{{.*}}, i32 0
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 1
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 2
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 3
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 4
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 5
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 6
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 8
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 9
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 10
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 11
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 12
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 13
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 14
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 15
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 16
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 17
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 18
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 19
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 20
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 21
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 22
+  // CHECK: insertelement

[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR

2017-09-15 Thread Craig Topper via Phabricator via cfe-commits
craig.topper added a comment.

I'm going to go ahead and remove __builtin_ia32_pbroadcastq512_mem_mask from 
clang and change _mm512_maskz_set1_epi64 to be disabled in 32-bit mode. I want 
to nominate this for 5.0.1 because using it in 32-bit mode causes the compile 
to throw a cannot select error. So disabling it in the header at least gives a 
better user experience.

After that goes in you should rebase this patch and enable all of the 
set1_epi64 intrinsics to work in 32-bit mode like they should.




Comment at: lib/Headers/avx512fintrin.h:9742
 
 #ifdef __x86_64__
 static __inline__ __m512i __DEFAULT_FN_ATTRS

Please remove the #ifdef __x86_64__ from this. It should work in 32-bits as 
well.



Comment at: lib/Headers/avx512vlintrin.h:5759
 
 #ifdef __x86_64__
 static __inline__ __m128i __DEFAULT_FN_ATTRS

Please remove the __x86_64__ from these. They should work in 32-bit mode.


https://reviews.llvm.org/D37668



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR

2017-09-13 Thread jina via Phabricator via cfe-commits
jina.nahias added inline comments.



Comment at: include/clang/Basic/BuiltinsX86.def:981
-TARGET_BUILTIN(__builtin_ia32_pbroadcastd512_gpr_mask, "V16iiV16iUs", "", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_mem_mask, "V8LLiLLiV8LLiUc", "", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "", 
"avx512f")

craig.topper wrote:
> I think you patch removed the only use of 
> __builtin_ia32_pbroadcastq512_mem_mask right? Does your change work properly 
> in 32-bit mode?
yes for both of the questions, i have deleted 
__builtin_ia32_pbroadcastq512_mem_mask  you can see it in the new update.


https://reviews.llvm.org/D37668



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR

2017-09-12 Thread jina via Phabricator via cfe-commits
jina.nahias updated this revision to Diff 114978.

https://reviews.llvm.org/D37668

Files:
  include/clang/Basic/BuiltinsX86.def
  include/clang/Basic/BuiltinsX86_64.def
  lib/Headers/avx512bwintrin.h
  lib/Headers/avx512fintrin.h
  lib/Headers/avx512vlbwintrin.h
  lib/Headers/avx512vlintrin.h
  test/CodeGen/avx512bw-builtins.c
  test/CodeGen/avx512f-builtins.c
  test/CodeGen/avx512vl-builtins.c
  test/CodeGen/avx512vlbw-builtins.c

Index: test/CodeGen/avx512vlbw-builtins.c
===
--- test/CodeGen/avx512vlbw-builtins.c
+++ test/CodeGen/avx512vlbw-builtins.c
@@ -2602,28 +2602,195 @@
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_broadcastw_epi16(__M, __A);
 }
+__m128i test_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A){
+  // CHECK-LABEL: @test_mm_mask_set1_epi8
+  // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
+  return _mm_mask_set1_epi8(__O, __M, __A);
+}
+__m128i test_mm_maskz_set1_epi8 ( __mmask16 __M, char __A){
+  // CHECK-LABEL: @test_mm_maskz_set1_epi8
+  // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
+  return _mm_maskz_set1_epi8( __M, __A);
+}
+
+__m256i test_mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A) {
+  // CHECK-LABEL: @test_mm256_mask_set1_epi8
+  // CHECK: insertelement <32 x i8> undef, i8 %{{.*}}, i32 0
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 1
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 2
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 3
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 4
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 5
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 6
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 8
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 9
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 10
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 11
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 12
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 13
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 14
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 15
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 16
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 17
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 18
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 19
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 20
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 21
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 22
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 23
+  // CHECK: insertelemen

[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR

2017-09-12 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

some very minor whitespace/indentation issues

please can you confirm  @craig.topper's query about 
__builtin_ia32_pbroadcastq512_mem_mask




Comment at: lib/Headers/avx512vlintrin.h:5727
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+ _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) {
+   return (__m128i)__builtin_ia32_selectd_128(__M,

weird indentation?



Comment at: lib/Headers/avx512vlintrin.h:5734
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+ _mm_maskz_set1_epi32( __mmask8 __M, int __A) {
+   return (__m128i)__builtin_ia32_selectd_128(__M,

weird indentation?



Comment at: lib/Headers/avx512vlintrin.h:5741
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+ _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) {
+   return (__m256i)__builtin_ia32_selectd_256(__M,

weird indentation?



Comment at: lib/Headers/avx512vlintrin.h:5748
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+ _mm256_maskz_set1_epi32( __mmask8 __M, int __A) {
+   return (__m256i)__builtin_ia32_selectd_256(__M,

weird indentation?



Comment at: test/CodeGen/avx512f-builtins.c:7732
+   // CHECK-LABEL: @test_mm512_mask_set1_epi32
+// CHECK: insertelement <16 x i32> undef, i32 %{{.*}}, i32 0
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 1

weird indentation?



Comment at: test/CodeGen/avx512f-builtins.c:7755
+  // CHECK-LABEL: @test_mm512_maskz_set1_epi32
+// CHECK: insertelement <16 x i32> undef, i32 %{{.*}}, i32 0
+  // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}}, i32 1

weird indentation?



Comment at: test/CodeGen/avx512f-builtins.c:7966
 //CHECK: insertelement{{.*}}i32 14
 //CHECK: insertelement{{.*}}i32 15
  return _mm512_setr_epi32( __A, __B, __C, __D,__E, __F, __G, __H,

weird indentation?



Comment at: test/CodeGen/avx512vl-builtins.c:4597
   // CHECK-LABEL: @test_mm256_maskz_set1_epi64
-  // CHECK: @llvm.x86.avx512.mask.pbroadcast.q.gpr.256
+// CHECK: insertelement <4 x i64> undef, i64 %{{.*}}, i32 0
+  // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 1

weird indentation?


https://reviews.llvm.org/D37668



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR

2017-09-12 Thread jina via Phabricator via cfe-commits
jina.nahias updated this revision to Diff 114836.

https://reviews.llvm.org/D37668

Files:
  include/clang/Basic/BuiltinsX86.def
  include/clang/Basic/BuiltinsX86_64.def
  lib/Headers/avx512bwintrin.h
  lib/Headers/avx512fintrin.h
  lib/Headers/avx512vlbwintrin.h
  lib/Headers/avx512vlintrin.h
  test/CodeGen/avx512bw-builtins.c
  test/CodeGen/avx512f-builtins.c
  test/CodeGen/avx512vl-builtins.c
  test/CodeGen/avx512vlbw-builtins.c

Index: test/CodeGen/avx512vlbw-builtins.c
===
--- test/CodeGen/avx512vlbw-builtins.c
+++ test/CodeGen/avx512vlbw-builtins.c
@@ -2602,28 +2602,195 @@
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_broadcastw_epi16(__M, __A);
 }
+__m128i test_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A){
+  // CHECK-LABEL: @test_mm_mask_set1_epi8
+  // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
+  return _mm_mask_set1_epi8(__O, __M, __A);
+}
+__m128i test_mm_maskz_set1_epi8 ( __mmask16 __M, char __A){
+  // CHECK-LABEL: @test_mm_maskz_set1_epi8
+  // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
+  return _mm_maskz_set1_epi8( __M, __A);
+}
+
+__m256i test_mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A) {
+  // CHECK-LABEL: @test_mm256_mask_set1_epi8
+  // CHECK: insertelement <32 x i8> undef, i8 %{{.*}}, i32 0
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 1
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 2
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 3
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 4
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 5
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 6
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 8
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 9
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 10
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 11
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 12
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 13
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 14
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 15
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 16
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 17
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 18
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 19
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 20
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 21
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 22
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 23
+  // CHECK: insertelemen

[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR

2017-09-12 Thread Craig Topper via Phabricator via cfe-commits
craig.topper added inline comments.



Comment at: include/clang/Basic/BuiltinsX86.def:981
-TARGET_BUILTIN(__builtin_ia32_pbroadcastd512_gpr_mask, "V16iiV16iUs", "", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_mem_mask, "V8LLiLLiV8LLiUc", "", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "", 
"avx512f")

I think you patch removed the only use of 
__builtin_ia32_pbroadcastq512_mem_mask right? Does your change work properly in 
32-bit mode?



Comment at: lib/Headers/avx512bwintrin.h:2031
 {
-  return (__m512i) __builtin_ia32_pbroadcastb512_gpr_mask (__A,
- (__v64qi) __O,
- __M);
+  __m512i __V = _mm512_set1_epi8(__A);
+  return (__m512i) __builtin_ia32_selectb_512(__M,(__v64qi)__V,(__v64qi) __O);

We usually don't declare variables in the intrinsics if we can avoid it. Just 
nest the calls.



Comment at: test/CodeGen/avx512vl-builtins.c:4511
   // CHECK-LABEL: @test_mm256_mask_set1_epi32
-  // CHECK: @llvm.x86.avx512.mask.pbroadcast.d.gpr.256
+// CHECK:  insertelement <8 x i32> undef, i32 %{{.*}}, i32 0
+  // CHECK:  insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 1

The first line is over indented



Comment at: test/CodeGen/avx512vl-builtins.c:4525
   // CHECK-LABEL: @test_mm256_maskz_set1_epi32
-  // CHECK: @llvm.x86.avx512.mask.pbroadcast.d.gpr.256
+// CHECK:  insertelement <8 x i32> undef, i32 %{{.*}}, i32 0
+  // CHECK:  insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 1

The first line is overindented


https://reviews.llvm.org/D37668



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR

2017-09-11 Thread jina via Phabricator via cfe-commits
jina.nahias updated this revision to Diff 114765.

https://reviews.llvm.org/D37668

Files:
  include/clang/Basic/BuiltinsX86.def
  include/clang/Basic/BuiltinsX86_64.def
  lib/Headers/avx512bwintrin.h
  lib/Headers/avx512fintrin.h
  lib/Headers/avx512vlbwintrin.h
  lib/Headers/avx512vlintrin.h
  test/CodeGen/avx512bw-builtins.c
  test/CodeGen/avx512f-builtins.c
  test/CodeGen/avx512vl-builtins.c
  test/CodeGen/avx512vlbw-builtins.c

Index: test/CodeGen/avx512vlbw-builtins.c
===
--- test/CodeGen/avx512vlbw-builtins.c
+++ test/CodeGen/avx512vlbw-builtins.c
@@ -2602,28 +2602,196 @@
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_broadcastw_epi16(__M, __A);
 }
+  __m128i test_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A){
+// CHECK-LABEL: @test_mm_mask_set1_epi8
+// CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
+// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
+	return _mm_mask_set1_epi8(__O, __M, __A);
+	}
+
+  __m128i test_mm_maskz_set1_epi8 ( __mmask16 __M, char __A){
+// CHECK-LABEL: @test_mm_maskz_set1_epi8
+	// CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
+// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
+	return _mm_maskz_set1_epi8( __M, __A);
+	}
+
+__m256i test_mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A) {
+  // CHECK-LABEL: @test_mm256_mask_set1_epi8
+// CHECK: insertelement <32 x i8> undef, i8 %{{.*}}, i32 0
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 1
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 2
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 3
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 4
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 5
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 6
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 8
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 9
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 10
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 11
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 12
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 13
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 14
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 15
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 16
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 17
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 18
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 19
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 20
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 21
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 22
+  // CHECK: 

[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR

2017-09-11 Thread Craig Topper via Phabricator via cfe-commits
craig.topper added a comment.

I think when you uploaded the changes to remove it from BuiltinsX86.def you 
lost your earlier changes to the header files


https://reviews.llvm.org/D37668



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR

2017-09-11 Thread jina via Phabricator via cfe-commits
jina.nahias updated this revision to Diff 114594.
jina.nahias added a comment.

delete from include/clang/Basic/BuiltinsX86.def and 
include/clang/Basic/BuiltinsX86_64.def


https://reviews.llvm.org/D37668

Files:
  include/clang/Basic/BuiltinsX86.def
  include/clang/Basic/BuiltinsX86_64.def


Index: include/clang/Basic/BuiltinsX86_64.def
===
--- include/clang/Basic/BuiltinsX86_64.def
+++ include/clang/Basic/BuiltinsX86_64.def
@@ -71,9 +71,6 @@
 TARGET_BUILTIN(__builtin_ia32_bextri_u64, "ULLiULLiIULLi", "", "tbm")
 TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcULLiUiUi", "", "lwp")
 TARGET_BUILTIN(__builtin_ia32_lwpval64, "vULLiUiUi", "", "lwp")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_gpr_mask, "V8LLiLLiV8LLiUc", "", 
"avx512f")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastq128_gpr_mask, 
"V2LLiULLiV2LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastq256_gpr_mask, 
"V4LLiULLiV4LLiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtsd2si64, "LLiV2dIi","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi64, "ULLiV2dIi","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtss2si64, "LLiV4fIi","","avx512f")
Index: include/clang/Basic/BuiltinsX86.def
===
--- include/clang/Basic/BuiltinsX86.def
+++ include/clang/Basic/BuiltinsX86.def
@@ -977,7 +977,6 @@
 TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8LLiV16iV16i", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_ptestmd512, "UsV16iV16iUs", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_ptestmq512, "UcV8LLiV8LLiUc", "", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastd512_gpr_mask, "V16iiV16iUs", "", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_mem_mask, "V8LLiLLiV8LLiUc", "", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8LLiLLiC*V8LLiUc", "", 
"avx512f")
@@ -1381,11 +1380,6 @@
 TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, 
"V4LLiV4LLiC*V4LLiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, 
"vV2LLi*V2LLiUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, 
"vV4LLi*V4LLiUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastb512_gpr_mask, 
"V64ccV64cULLi","","avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastb128_gpr_mask, 
"V16ccV16cUs","","avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastb256_gpr_mask, 
"V32ccV32cUi","","avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastd128_gpr_mask, 
"V4iiV4iUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastd256_gpr_mask, 
"V8iiV8iUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512_mask, 
"V8LLiV8LLiV8LLiV8LLiUc","","avx512ifma")
 TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512_maskz, 
"V8LLiV8LLiV8LLiV8LLiUc","","avx512ifma")
 TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512_mask, 
"V8LLiV8LLiV8LLiV8LLiUc","","avx512ifma")
@@ -1596,9 +1590,6 @@
 TARGET_BUILTIN(__builtin_ia32_broadcastmb256, "V4LLiUc","","avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_broadcastmw128, "V4iUs","","avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_broadcastmw256, "V8iUs","","avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastw512_gpr_mask, 
"V32shV32sUi","","avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastw256_gpr_mask, 
"V16shV16sUs","","avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_gpr_mask, 
"V8ssV8sUc","","avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovswb512mem_mask, "vV32c*V32sUi","","avx512bw")


Index: include/clang/Basic/BuiltinsX86_64.def
===
--- include/clang/Basic/BuiltinsX86_64.def
+++ include/clang/Basic/BuiltinsX86_64.def
@@ -71,9 +71,6 @@
 TARGET_BUILTIN(__builtin_ia32_bextri_u64, "ULLiULLiIULLi", "", "tbm")
 TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcULLiUiUi", "", "lwp")
 TARGET_BUILTIN(__builtin_ia32_lwpval64, "vULLiUiUi", "", "lwp")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_gpr_mask, "V8LLiLLiV8LLiUc", "", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastq128_gpr_mask, "V2LLiULLiV2LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pbroadcastq256_gpr_mask, "V4LLiULLiV4LLiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtsd2si64, "LLiV2dIi","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi64, "ULLiV2dIi","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtss2si64, "LLiV4fIi","","avx512f")
Index: include/clang/Basic/BuiltinsX86.def
===
--- include/clang/Basic/BuiltinsX86.def
+++ include/clang/Basic/BuiltinsX86.def
@@ -977,7 +977,6 @@
 TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8LLiV16iV16i", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_ptestmd

[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR

2017-09-10 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

As with https://reviews.llvm.org/D37562, strip the builtins from 
include/clang/Basic/BuiltinsX86.def


https://reviews.llvm.org/D37668



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D37668: [X86][intrinsics] lower _mm[256|512]_mask[z]_set1_epi[8|16|32|64] intrinsic to IR

2017-09-10 Thread jina via Phabricator via cfe-commits
jina.nahias created this revision.

this is clang part , the llvm part is 
https://reviews.llvm.org/differential/diff/114515/


https://reviews.llvm.org/D37668

Files:
  lib/Headers/avx512bwintrin.h
  lib/Headers/avx512fintrin.h
  lib/Headers/avx512vlbwintrin.h
  lib/Headers/avx512vlintrin.h
  test/CodeGen/avx512bw-builtins.c
  test/CodeGen/avx512f-builtins.c
  test/CodeGen/avx512vl-builtins.c
  test/CodeGen/avx512vlbw-builtins.c

Index: test/CodeGen/avx512vlbw-builtins.c
===
--- test/CodeGen/avx512vlbw-builtins.c
+++ test/CodeGen/avx512vlbw-builtins.c
@@ -2602,28 +2602,196 @@
   // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
   return _mm256_maskz_broadcastw_epi16(__M, __A);
 }
+  __m128i test_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A){
+// CHECK-LABEL: @test_mm_mask_set1_epi8
+// CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
+// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
+	return _mm_mask_set1_epi8(__O, __M, __A);
+	}
+
+  __m128i test_mm_maskz_set1_epi8 ( __mmask16 __M, char __A){
+// CHECK-LABEL: @test_mm_maskz_set1_epi8
+	// CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
+// CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
+// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
+	return _mm_maskz_set1_epi8( __M, __A);
+	}
+
+__m256i test_mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A) {
+  // CHECK-LABEL: @test_mm256_mask_set1_epi8
+// CHECK: insertelement <32 x i8> undef, i8 %{{.*}}, i32 0
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 1
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 2
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 3
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 4
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 5
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 6
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 7
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 8
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 9
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 10
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 11
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 12
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 13
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 14
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 15
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 16
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 17
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 18
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 19
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 20
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 21
+  // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 22
+  // CHECK: in