[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-20 Thread LiuChen via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG72e4bf12eec4: [X86] Support some missing intrinsics 
(authored by LiuChen3).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100368/new/

https://reviews.llvm.org/D100368

Files:
  clang/lib/Headers/avx512fintrin.h
  clang/test/CodeGen/X86/avx512f-builtins.c

Index: clang/test/CodeGen/X86/avx512f-builtins.c
===
--- clang/test/CodeGen/X86/avx512f-builtins.c
+++ clang/test/CodeGen/X86/avx512f-builtins.c
@@ -10819,3 +10819,51 @@
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> 
   return _mm512_zextsi256_si512(A);
 }
+
+__m512d test_mm512_i32logather_pd(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_i32logather_pd(__index, __addr, 2);
+}
+
+__m512d test_mm512_mask_i32logather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_mask_i32logather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+void test_mm512_i32loscatter_pd(void *__addr, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_i32loscatter_pd(__addr, __index, __v1, 2);
+}
+
+void test_mm512_mask_i32loscatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_mask_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_mask_i32loscatter_pd(__addr, __mask, __index, __v1, 2);
+}
+
+__m512i test_mm512_i32logather_epi64(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_i32logather_epi64(__index, __addr, 2);
+}
+
+__m512i test_mm512_mask_i32logather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_mask_i32logather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
+
+void test_mm512_i32loscatter_epi64(void *__addr, __m512i __index, __m512i __v1) {
+  // CHECK-LABEL: @test_mm512_i32loscatter_epi64
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
+  _mm512_i32loscatter_epi64(__addr, __index, __v1, 2);
+}
+
+void test_mm512_mask_i32loscatter_epi64(void *__addr, __mmask8 __mask, __m512i __index, __m512i __v1) {
+  // CHECK-LABEL: @test_mm512_mask_i32loscatter_epi64
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
+  _mm512_mask_i32loscatter_epi64(__addr, __mask, __index, __v1, 2);
+}
Index: clang/lib/Headers/avx512fintrin.h
===
--- clang/lib/Headers/avx512fintrin.h
+++ clang/lib/Headers/avx512fintrin.h
@@ -9588,6 +9588,169 @@
   return __b[0];
 }
 
+/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
+/// locations starting at location \a base_addr at packed 32-bit integer indices
+/// stored in the lower half of \a vindex scaled by \a scale them in dst.
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///   dst[i+63:i] := MEM[addr+63:addr]
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_i32logather_pd(vindex, base_addr, scale)\
+  _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
+
+/// Loads 8 double-precision (64-bit) floating-point elements from memory
+/// starting at location \a base_addr at packed 32-bit integer indices stored in
+/// the lower half of \a vindex scaled by \a scale into dst using writemask
+/// \a mask (elements are copied from \a src when the corresponding mask bit is
+/// not set).
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   IF mask[j]
+/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+/// dst[i+63:i] := MEM[addr+63:addr]
+///   ELSE
+/// dst[i+63:i] := src[i+63:i]
+///   FI
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale)\
+  _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex),  \
+   (base_addr), (scale))
+
+/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
+/// at packed 32-bit integer indices stored in the lower half of \a vindex
+/// 

[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-19 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei accepted this revision.
pengfei added a comment.
This revision is now accepted and ready to land.

LGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100368/new/

https://reviews.llvm.org/D100368

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-19 Thread LiuChen via Phabricator via cfe-commits
LiuChen3 added inline comments.



Comment at: clang/test/CodeGen/X86/avx512f-builtins.c:10853
+
+__m512i test_mm512_mask_i32logather_epi64(__m512i __v1_old, __mmask8 __mask, 
__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_epi64

craig.topper wrote:
> Any idea why _mm512_mask_i32loscatter_epi64 is documented as only being KNCI?
After confirmation, we think it’s a intrinsics guide bug. The intrinsics guide 
will be updated later.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100368/new/

https://reviews.llvm.org/D100368

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-19 Thread LiuChen via Phabricator via cfe-commits
LiuChen3 updated this revision to Diff 338445.
LiuChen3 added a comment.

Fix format issue.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100368/new/

https://reviews.llvm.org/D100368

Files:
  clang/lib/Headers/avx512fintrin.h
  clang/test/CodeGen/X86/avx512f-builtins.c

Index: clang/test/CodeGen/X86/avx512f-builtins.c
===
--- clang/test/CodeGen/X86/avx512f-builtins.c
+++ clang/test/CodeGen/X86/avx512f-builtins.c
@@ -10819,3 +10819,51 @@
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> 
   return _mm512_zextsi256_si512(A);
 }
+
+__m512d test_mm512_i32logather_pd(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_i32logather_pd(__index, __addr, 2);
+}
+
+__m512d test_mm512_mask_i32logather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_mask_i32logather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+void test_mm512_i32loscatter_pd(void *__addr, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_i32loscatter_pd(__addr, __index, __v1, 2);
+}
+
+void test_mm512_mask_i32loscatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_mask_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_mask_i32loscatter_pd(__addr, __mask, __index, __v1, 2);
+}
+
+__m512i test_mm512_i32logather_epi64(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_i32logather_epi64(__index, __addr, 2);
+}
+
+__m512i test_mm512_mask_i32logather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_mask_i32logather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
+
+void test_mm512_i32loscatter_epi64(void *__addr, __m512i __index, __m512i __v1) {
+  // CHECK-LABEL: @test_mm512_i32loscatter_epi64
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
+  _mm512_i32loscatter_epi64(__addr, __index, __v1, 2);
+}
+
+void test_mm512_mask_i32loscatter_epi64(void *__addr, __mmask8 __mask, __m512i __index, __m512i __v1) {
+  // CHECK-LABEL: @test_mm512_mask_i32loscatter_epi64
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
+  _mm512_mask_i32loscatter_epi64(__addr, __mask, __index, __v1, 2);
+}
Index: clang/lib/Headers/avx512fintrin.h
===
--- clang/lib/Headers/avx512fintrin.h
+++ clang/lib/Headers/avx512fintrin.h
@@ -9588,6 +9588,169 @@
   return __b[0];
 }
 
+/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
+/// locations starting at location \a base_addr at packed 32-bit integer indices
+/// stored in the lower half of \a vindex scaled by \a scale them in dst.
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///   dst[i+63:i] := MEM[addr+63:addr]
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_i32logather_pd(vindex, base_addr, scale)\
+  _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
+
+/// Loads 8 double-precision (64-bit) floating-point elements from memory
+/// starting at location \a base_addr at packed 32-bit integer indices stored in
+/// the lower half of \a vindex scaled by \a scale into dst using writemask
+/// \a mask (elements are copied from \a src when the corresponding mask bit is
+/// not set).
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   IF mask[j]
+/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+/// dst[i+63:i] := MEM[addr+63:addr]
+///   ELSE
+/// dst[i+63:i] := src[i+63:i]
+///   FI
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale)\
+  _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex),  \
+   (base_addr), (scale))
+
+/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
+/// at packed 32-bit integer indices stored in the lower half of \a vindex
+/// scaled by \a scale and stores them in dst.
+///
+/// This intrinsic corresponds to the  VPGATHERDQ  instructions.
+///
+/// \operation

[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-19 Thread LiuChen via Phabricator via cfe-commits
LiuChen3 updated this revision to Diff 338443.
LiuChen3 added a comment.

1. Rebase.
2. Adding _mm512_i32loscatter_epi64 and _mm512_mask_i32loscatter_epi64.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100368/new/

https://reviews.llvm.org/D100368

Files:
  clang/lib/Headers/avx512fintrin.h
  clang/test/CodeGen/X86/avx512f-builtins.c

Index: clang/test/CodeGen/X86/avx512f-builtins.c
===
--- clang/test/CodeGen/X86/avx512f-builtins.c
+++ clang/test/CodeGen/X86/avx512f-builtins.c
@@ -10819,3 +10819,51 @@
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> 
   return _mm512_zextsi256_si512(A);
 }
+
+__m512d test_mm512_i32logather_pd(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_i32logather_pd(__index, __addr, 2);
+}
+
+__m512d test_mm512_mask_i32logather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_mask_i32logather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+void test_mm512_i32loscatter_pd(void *__addr, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_i32loscatter_pd(__addr, __index, __v1, 2);
+}
+
+void test_mm512_mask_i32loscatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_mask_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_mask_i32loscatter_pd(__addr, __mask, __index, __v1, 2);
+}
+
+__m512i test_mm512_i32logather_epi64(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_i32logather_epi64(__index, __addr, 2);
+}
+
+__m512i test_mm512_mask_i32logather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_mask_i32logather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
+
+void test_mm512_i32loscatter_epi64(void *__addr, __m512i __index, __m512i __v1) {
+  // CHECK-LABEL: @test_mm512_i32loscatter_epi64
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
+  _mm512_i32loscatter_epi64(__addr, __index, __v1, 2);
+}
+
+void test_mm512_mask_i32loscatter_epi64(void *__addr, __mmask8 __mask, __m512i __index, __m512i __v1) {
+  // CHECK-LABEL: @test_mm512_mask_i32loscatter_epi64
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
+  _mm512_mask_i32loscatter_epi64(__addr, __mask, __index, __v1, 2);
+}
Index: clang/lib/Headers/avx512fintrin.h
===
--- clang/lib/Headers/avx512fintrin.h
+++ clang/lib/Headers/avx512fintrin.h
@@ -9588,6 +9588,169 @@
   return __b[0];
 }
 
+/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
+/// locations starting at location \a base_addr at packed 32-bit integer indices
+/// stored in the lower half of \a vindex scaled by \a scale them in dst.
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///   dst[i+63:i] := MEM[addr+63:addr]
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_i32logather_pd(vindex, base_addr, scale)\
+  _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
+
+/// Loads 8 double-precision (64-bit) floating-point elements from memory
+/// starting at location \a base_addr at packed 32-bit integer indices stored in
+/// the lower half of \a vindex scaled by \a scale into dst using writemask
+/// \a mask (elements are copied from \a src when the corresponding mask bit is
+/// not set).
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   IF mask[j]
+/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+/// dst[i+63:i] := MEM[addr+63:addr]
+///   ELSE
+/// dst[i+63:i] := src[i+63:i]
+///   FI
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale)\
+  _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex),  \
+   (base_addr), (scale))
+
+/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
+/// at packed 32-bit integer indices stored in the lower half of \a vindex
+/// scaled by \a scale and stores them in dst.
+///
+/// This intrinsic 

[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-14 Thread Pengfei Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: clang/lib/Headers/avx512fintrin.h:9611
+/// starting at location \a base_addr at packed 32-bit integer indices stored 
in
+/// the lower half of \a vindex scaled by \a scale into dst using writemask
+/// \a mask (elements are copied from \a src when the corresponding mask bit is

craig.topper wrote:
> Use of "dst" here doesn't make sense since it isn't a parameter name so 
> shouldn't be abbreviated.
Can we arrange "dst" as the abbreviation for the result of the intrinsic?
Intrinsic guide uses such naming conversion and correspondings to its operation.
We are trying to unify the descriptions here and intrinsic guide.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100368/new/

https://reviews.llvm.org/D100368

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-14 Thread Craig Topper via Phabricator via cfe-commits
craig.topper added inline comments.



Comment at: clang/lib/Headers/avx512fintrin.h:9593
+/// locations starting at location \a base_addr at packed 32-bit integer 
indices
+/// stored in the lower half of \a vindex scaled by \a scale them in dst.
+///

"scale them in dst" doesn't make sense.



Comment at: clang/lib/Headers/avx512fintrin.h:9611
+/// starting at location \a base_addr at packed 32-bit integer indices stored 
in
+/// the lower half of \a vindex scaled by \a scale into dst using writemask
+/// \a mask (elements are copied from \a src when the corresponding mask bit is

Use of "dst" here doesn't make sense since it isn't a parameter name so 
shouldn't be abbreviated.



Comment at: clang/lib/Headers/avx512fintrin.h:9636
+/// at packed 32-bit integer indices stored in the lower half of \a vindex
+/// scaled by \a scale and stores them in dst.
+///

Same



Comment at: clang/lib/Headers/avx512fintrin.h:9654
+/// at packed 32-bit integer indices stored in the lower half of \a vindex
+/// scaled by \a scale and stores them in dst using writemask \a mask (elements
+/// are copied from \a src when the corresponding mask bit is not set).

Same



Comment at: clang/test/CodeGen/X86/avx512f-builtins.c:10853
+
+__m512i test_mm512_mask_i32logather_epi64(__m512i __v1_old, __mmask8 __mask, 
__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_epi64

Any idea why _mm512_mask_i32loscatter_epi64 is documented as only being KNCI?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100368/new/

https://reviews.llvm.org/D100368

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-13 Thread LiuChen via Phabricator via cfe-commits
LiuChen3 updated this revision to Diff 337301.
LiuChen3 added a comment.

Address Simon's comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100368/new/

https://reviews.llvm.org/D100368

Files:
  clang/lib/Headers/avx512fintrin.h
  clang/test/CodeGen/X86/avx512f-builtins.c

Index: clang/test/CodeGen/X86/avx512f-builtins.c
===
--- clang/test/CodeGen/X86/avx512f-builtins.c
+++ clang/test/CodeGen/X86/avx512f-builtins.c
@@ -10819,3 +10819,39 @@
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> 
   return _mm512_zextsi256_si512(A);
 }
+
+__m512d test_mm512_i32logather_pd(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_i32logather_pd(__index, __addr, 2);
+}
+
+__m512d test_mm512_mask_i32logather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_mask_i32logather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+void test_mm512_i32loscatter_pd(void *__addr, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_i32loscatter_pd(__addr, __index, __v1, 2);
+}
+
+void test_mm512_mask_i32loscatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_mask_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_mask_i32loscatter_pd(__addr, __mask, __index, __v1, 2);
+}
+
+__m512i test_mm512_i32logather_epi64(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_i32logather_epi64(__index, __addr, 2);
+}
+
+__m512i test_mm512_mask_i32logather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_mask_i32logather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
Index: clang/lib/Headers/avx512fintrin.h
===
--- clang/lib/Headers/avx512fintrin.h
+++ clang/lib/Headers/avx512fintrin.h
@@ -9588,6 +9588,130 @@
   return __b[0];
 }
 
+/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
+/// locations starting at location \a base_addr at packed 32-bit integer indices
+/// stored in the lower half of \a vindex scaled by \a scale them in dst.
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///   dst[i+63:i] := MEM[addr+63:addr]
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_i32logather_pd(vindex, base_addr, scale)\
+  _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
+
+/// Loads 8 double-precision (64-bit) floating-point elements from memory
+/// starting at location \a base_addr at packed 32-bit integer indices stored in
+/// the lower half of \a vindex scaled by \a scale into dst using writemask
+/// \a mask (elements are copied from \a src when the corresponding mask bit is
+/// not set).
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   IF mask[j]
+/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+/// dst[i+63:i] := MEM[addr+63:addr]
+///   ELSE
+/// dst[i+63:i] := src[i+63:i]
+///   FI
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale)\
+  _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex),  \
+   (base_addr), (scale))
+
+/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
+/// at packed 32-bit integer indices stored in the lower half of \a vindex
+/// scaled by \a scale and stores them in dst.
+///
+/// This intrinsic corresponds to the  VPGATHERDQ  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///   dst[i+63:i] := MEM[addr+63:addr]
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_i32logather_epi64(vindex, base_addr, scale) \
+  _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
+
+/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
+/// at packed 32-bit integer indices stored in 

[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-13 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

That's why I said "completeness" :)




Comment at: clang/lib/Headers/avx512fintrin.h:9692
+   (scale))
+/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
+/// to memory locations starting at location \a base_addr at packed 32-bit

Newline between instructions


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100368/new/

https://reviews.llvm.org/D100368

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-13 Thread Craig Topper via Phabricator via cfe-commits
craig.topper added a comment.

In D100368#2685559 , @LiuChen3 wrote:

> In D100368#2685189 , @RKSimon wrote:
>
>> Add _mm512_i32loscatter_epi64  and _mm512_mask_i32loscatter_epi64 for 
>> completeness?
>
> It seems we doesn't support KNCNI in LLVM. Am I right?

Correct. We do not support KNCI.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100368/new/

https://reviews.llvm.org/D100368

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-13 Thread LiuChen via Phabricator via cfe-commits
LiuChen3 added a comment.

In D100368#2685189 , @RKSimon wrote:

> Add _mm512_i32loscatter_epi64  and _mm512_mask_i32loscatter_epi64 for 
> completeness?

It seems we doesn't support KNCNI in LLVM. Am I right?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100368/new/

https://reviews.llvm.org/D100368

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-13 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

Add _mm512_i32loscatter_epi64  and _mm512_mask_i32loscatter_epi64 for 
completeness?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100368/new/

https://reviews.llvm.org/D100368

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100368: [X86] Support some missing intrinsics

2021-04-13 Thread LiuChen via Phabricator via cfe-commits
LiuChen3 created this revision.
Herald added a subscriber: pengfei.
LiuChen3 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Support for _mm512_i32logather_pd, _mm512_mask_i32logather_pd,
_mm512_i32logather_epi64, _mm512_mask_i32logather_epi64, _mm512_i32loscatter_pd,
_mm512_mask_i32loscatter_pd.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D100368

Files:
  clang/lib/Headers/avx512fintrin.h
  clang/test/CodeGen/X86/avx512f-builtins.c

Index: clang/test/CodeGen/X86/avx512f-builtins.c
===
--- clang/test/CodeGen/X86/avx512f-builtins.c
+++ clang/test/CodeGen/X86/avx512f-builtins.c
@@ -10819,3 +10819,39 @@
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> 
   return _mm512_zextsi256_si512(A);
 }
+
+__m512d test_mm512_i32logather_pd(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_i32logather_pd(__index, __addr, 2);
+}
+
+__m512d test_mm512_mask_i32logather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_mask_i32logather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+void test_mm512_i32loscatter_pd(void *__addr, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_i32loscatter_pd(__addr, __index, __v1, 2);
+}
+
+void test_mm512_mask_i32loscatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_mask_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_mask_i32loscatter_pd(__addr, __mask, __index, __v1, 2);
+}
+
+__m512i test_mm512_i32logather_epi64(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_i32logather_epi64(__index, __addr, 2);
+}
+
+__m512i test_mm512_mask_i32logather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_mask_i32logather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
Index: clang/lib/Headers/avx512fintrin.h
===
--- clang/lib/Headers/avx512fintrin.h
+++ clang/lib/Headers/avx512fintrin.h
@@ -9588,6 +9588,129 @@
   return __b[0];
 }
 
+/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
+/// locations starting at location \a base_addr at packed 32-bit integer indices
+/// stored in the lower half of \a vindex scaled by \a scale them in dst.
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///   dst[i+63:i] := MEM[addr+63:addr]
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_i32logather_pd(vindex, base_addr, scale)\
+  _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
+
+/// Loads 8 double-precision (64-bit) floating-point elements from memory
+/// starting at location \a base_addr at packed 32-bit integer indices stored in
+/// the lower half of \a vindex scaled by \a scale into dst using writemask
+/// \a mask (elements are copied from \a src when the corresponding mask bit is
+/// not set).
+///
+/// This intrinsic corresponds to the  VGATHERDPD  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   IF mask[j]
+/// addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+/// dst[i+63:i] := MEM[addr+63:addr]
+///   ELSE
+/// dst[i+63:i] := src[i+63:i]
+///   FI
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale)\
+  _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex),  \
+   (base_addr), (scale))
+/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
+/// at packed 32-bit integer indices stored in the lower half of \a vindex
+/// scaled by \a scale and stores them in dst.
+///
+/// This intrinsic corresponds to the  VPGATHERDQ  instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///   dst[i+63:i] := MEM[addr+63:addr]
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_i32logather_epi64(vindex, base_addr, scale) \
+