[PATCH] D25063: [x86][inline-asm][AVX512][clang][PART-1] Introducing "k" and "Yk" constraints for extended inline assembly, enabling use of AVX512 masked vectorized instructions.

2016-10-31 Thread Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL285604: [x86][inline-asm][AVX512][clang][PART-1] Introducing 
"k" and "Yk" constraints… (authored by mzuckerm).

Changed prior to commit:
  https://reviews.llvm.org/D25063?vs=75542=76445#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D25063

Files:
  cfe/trunk/lib/Basic/Targets.cpp
  cfe/trunk/test/CodeGen/avx512-kconstraints-att_inline_asm.c

Index: cfe/trunk/test/CodeGen/avx512-kconstraints-att_inline_asm.c
===
--- cfe/trunk/test/CodeGen/avx512-kconstraints-att_inline_asm.c
+++ cfe/trunk/test/CodeGen/avx512-kconstraints-att_inline_asm.c
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -O0  -emit-llvm -S -o - -Wall -Werror | FileCheck %s
+// This test checks validity of att\gcc style inline assmebly for avx512 k and Yk constraints.
+// Also checks mask register allows flexible type (size <= 64 bit)
+
+void mask_Yk_i8(char msk){ 
+//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}
+ asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
+   ://output
+   : "Yk" (msk));   //inputs
+}
+
+void mask_Yk_i16(short msk){
+//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}
+ asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
+   ://output
+   :  "Yk" (msk));  //inputs
+}
+
+void mask_Yk_i32(int msk){
+//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}
+asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
+   ://output
+   :  "Yk" (msk));   //inputs
+}
+
+void mask_Yk_i64(long long msk){
+//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}
+ asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
+   ://output
+   :  "Yk" (msk));   //inputs
+}
+
+void k_wise_op_i8(char msk_dst,char msk_src1,char msk_src2){
+//CHECK: kandw %k1, %k0, %k0
+ asm ("kandw\t%2, %1, %0"
+   : "=k" (msk_dst)
+   : "k" (msk_src1), "k" (msk_src2));
+}
+
+void k_wise_op_i16(short msk_dst, short msk_src1, short msk_src2){
+//CHECK: kandw %k1, %k0, %k0
+  asm ("kandw\t%2, %1, %0"
+   : "=k" (msk_dst)
+   : "k" (msk_src1), "k" (msk_src2));
+}
+
+void k_wise_op_i32(int msk_dst, int msk_src1, int msk_src2){
+//CHECK: kandw %k1, %k0, %k0
+  asm ("kandw\t%2, %1, %0"
+   : "=k" (msk_dst)
+   : "k" (msk_src1), "k" (msk_src2));
+}
+
+void k_wise_op_i64(long long msk_dst, long long msk_src1, long long msk_src2){
+//CHECK: kandw %k1, %k0, %k0
+  asm ("kandw\t%2, %1, %0"
+   : "=k" (msk_dst)
+   : "k" (msk_src1), "k" (msk_src2));
+}
Index: cfe/trunk/lib/Basic/Targets.cpp
===
--- cfe/trunk/lib/Basic/Targets.cpp
+++ cfe/trunk/lib/Basic/Targets.cpp
@@ -3997,6 +3997,7 @@
 case 't': // Any SSE register, when SSE2 is enabled.
 case 'i': // Any SSE register, when SSE2 and inter-unit moves enabled.
 case 'm': // Any MMX register, when inter-unit moves enabled.
+case 'k': // AVX512 arch mask registers: k1-k7.
   Info.setAllowsRegister();
   return true;
 }
@@ -4018,6 +4019,8 @@
   case 'q': // Any register accessible as [r]l: a, b, c, and d.
   case 'y': // Any MMX register.
   case 'x': // Any SSE register.
+  case 'k': // Any AVX512 mask register (same as Yk, additionaly allows k0
+// for intermideate k reg operations).
   case 'Q': // Any register accessible as [r]h: a, b, c, and d.
   case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp.
   case 'l': // "Index" registers: any general register that can be used as an
@@ -4051,6 +4054,8 @@
 unsigned Size) const {
   switch (Constraint[0]) {
   default: break;
+  case 'k':
+  // Registers k0-k7 (AVX512) size limit is 64 bit.
   case 'y':
 return Size <= 64;
   case 'f':
@@ -4071,6 +4076,7 @@
 default: break;
 case 'm':
   // 'Ym' is synonymous with 'y'.
+case 'k':
   return Size <= 64;
 case 'i':
 case 't':
@@ -4102,6 +4108,20 @@
 return std::string("{st}");
   case 'u': // second from top of floating point stack.
 return std::string("{st(1)}"); // second from top of floating point stack.
+  case 'Y':
+switch (Constraint[1]) {
+default:
+  // Break from inner switch and fall through (copy single char),
+  // continue parsing after copying the current constraint into 
+  // the return string.
+  break;
+case 'k':
+  // "^" hints llvm that this is a 2 letter constraint.
+  // "Constraint++" is used to promote the string iterator 
+  // to the next constraint.
+  return std::string("^") + std::string(Constraint++, 2);
+} 
+LLVM_FALLTHROUGH;
   default:
 return std::string(1, *Constraint);
   }
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D25063: [x86][inline-asm][AVX512][clang][PART-1] Introducing "k" and "Yk" constraints for extended inline assembly, enabling use of AVX512 masked vectorized instructions.

2016-10-24 Thread Reid Kleckner via cfe-commits
rnk accepted this revision.
rnk added a comment.
This revision is now accepted and ready to land.

lgtm


Repository:
  rL LLVM

https://reviews.llvm.org/D25063



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D25063: [x86][inline-asm][AVX512][clang][PART-1] Introducing "k" and "Yk" constraints for extended inline assembly, enabling use of AVX512 masked vectorized instructions.

2016-10-23 Thread Matan via cfe-commits
mharoush added inline comments.



Comment at: test/CodeGen/avx512-kconstraints-att_inline_asm.c:6
+void mask_Yk_i8(char msk){ 
+//CHECK: #APP 
+//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}

rnk wrote:
> The LLVM IR won't have #APP markers in it. Does this test really pass?
Sorry I uploaded the wrong patch file.


Repository:
  rL LLVM

https://reviews.llvm.org/D25063



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D25063: [x86][inline-asm][AVX512][clang][PART-1] Introducing "k" and "Yk" constraints for extended inline assembly, enabling use of AVX512 masked vectorized instructions.

2016-10-23 Thread Matan via cfe-commits
mharoush set the repository for this revision to rL LLVM.
mharoush updated this revision to Diff 75542.
mharoush added a comment.

Only test affected(correct version this time: checking LLVM IR instead of x86 
asm), 
This test depends on D25012  being applied.


Repository:
  rL LLVM

https://reviews.llvm.org/D25063

Files:
  lib/Basic/Targets.cpp
  test/CodeGen/avx512-kconstraints-att_inline_asm.c

Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -3987,6 +3987,7 @@
 case 't': // Any SSE register, when SSE2 is enabled.
 case 'i': // Any SSE register, when SSE2 and inter-unit moves enabled.
 case 'm': // Any MMX register, when inter-unit moves enabled.
+case 'k': // AVX512 arch mask registers: k1-k7.
   Info.setAllowsRegister();
   return true;
 }
@@ -4008,6 +4009,8 @@
   case 'q': // Any register accessible as [r]l: a, b, c, and d.
   case 'y': // Any MMX register.
   case 'x': // Any SSE register.
+  case 'k': // Any AVX512 mask register (same as Yk, additionaly allows k0
+// for intermideate k reg operations).
   case 'Q': // Any register accessible as [r]h: a, b, c, and d.
   case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp.
   case 'l': // "Index" registers: any general register that can be used as an
@@ -4041,6 +4044,8 @@
 unsigned Size) const {
   switch (Constraint[0]) {
   default: break;
+  case 'k':
+  // Registers k0-k7 (AVX512) size limit is 64 bit.
   case 'y':
 return Size <= 64;
   case 'f':
@@ -4061,6 +4066,7 @@
 default: break;
 case 'm':
   // 'Ym' is synonymous with 'y'.
+case 'k':
   return Size <= 64;
 case 'i':
 case 't':
@@ -4092,6 +4098,20 @@
 return std::string("{st}");
   case 'u': // second from top of floating point stack.
 return std::string("{st(1)}"); // second from top of floating point stack.
+  case 'Y':
+switch (Constraint[1]) {
+default:
+  // Break from inner switch and fall through (copy single char),
+  // continue parsing after copying the current constraint into 
+  // the return string.
+  break;
+case 'k':
+  // "^" hints llvm that this is a 2 letter constraint.
+  // "Constraint++" is used to promote the string iterator 
+  // to the next constraint.
+  return std::string("^") + std::string(Constraint++, 2);
+} 
+LLVM_FALLTHROUGH;
   default:
 return std::string(1, *Constraint);
   }
Index: test/CodeGen/avx512-kconstraints-att_inline_asm.c
===
--- test/CodeGen/avx512-kconstraints-att_inline_asm.c
+++ test/CodeGen/avx512-kconstraints-att_inline_asm.c
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 %s -target-cpu skylake-avx512 -O0  -emit-llvm -S -o - -Wall -Werror | FileCheck %s
+// This test checks validity of att\gcc style inline assmebly for avx512 k and Yk constraints.
+// Also checks mask register allows flexible type (size <= 64 bit)
+
+void mask_Yk_i8(char msk){ 
+//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}
+	asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
+   ://output
+   : "Yk" (msk));   //inputs
+}
+
+void mask_Yk_i16(short msk){
+//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}
+	asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
+   ://output
+   :  "Yk" (msk));  //inputs
+}
+
+void mask_Yk_i32(int msk){
+//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}
+asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
+   ://output
+   :  "Yk" (msk)); 	//inputs
+}
+
+void mask_Yk_i64(long long msk){
+//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}
+	asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
+   ://output
+   :  "Yk" (msk)); 	//inputs
+}
+
+void k_wise_op_i8(char msk_dst,char msk_src1,char msk_src2){
+//CHECK: kandw %k1, %k0, %k0
+ asm ("kandw\t%2, %1, %0"
+   : "=k" (msk_dst)
+   : "k" (msk_src1), "k" (msk_src2));
+}
+
+void k_wise_op_i16(short msk_dst, short msk_src1, short msk_src2){
+//CHECK: kandw %k1, %k0, %k0
+  asm ("kandw\t%2, %1, %0"
+   : "=k" (msk_dst)
+   : "k" (msk_src1), "k" (msk_src2));
+}
+
+void k_wise_op_i32(int msk_dst, int msk_src1, int msk_src2){
+//CHECK: kandw %k1, %k0, %k0
+  asm ("kandw\t%2, %1, %0"
+   : "=k" (msk_dst)
+   : "k" (msk_src1), "k" (msk_src2));
+}
+
+void k_wise_op_i64(long long msk_dst, long long msk_src1, long long msk_src2){
+//CHECK: kandw %k1, %k0, %k0
+  asm ("kandw\t%2, %1, %0"
+   : "=k" (msk_dst)
+   : "k" (msk_src1), "k" (msk_src2));
+}
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D25063: [x86][inline-asm][AVX512][clang][PART-1] Introducing "k" and "Yk" constraints for extended inline assembly, enabling use of AVX512 masked vectorized instructions.

2016-10-20 Thread Reid Kleckner via cfe-commits
rnk added inline comments.



Comment at: test/CodeGen/avx512-kconstraints-att_inline_asm.c:6
+void mask_Yk_i8(char msk){ 
+//CHECK: #APP 
+//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}

The LLVM IR won't have #APP markers in it. Does this test really pass?


https://reviews.llvm.org/D25063



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D25063: [x86][inline-asm][AVX512][clang][PART-1] Introducing "k" and "Yk" constraints for extended inline assembly, enabling use of AVX512 masked vectorized instructions.

2016-10-20 Thread Matan via cfe-commits
mharoush removed rL LLVM as the repository for this revision.
mharoush updated this revision to Diff 75277.
mharoush added a comment.
Herald added a subscriber: mehdi_amini.

I renamed the test file to be more informative, added LLVM_FALLTHROUGH, removed 
check of {z} and changed the test to check LLVM IR.


https://reviews.llvm.org/D25063

Files:
  lib/Basic/Targets.cpp
  test/CodeGen/avx512-kconstraints-att_inline_asm.c

Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -3987,6 +3987,7 @@
 case 't': // Any SSE register, when SSE2 is enabled.
 case 'i': // Any SSE register, when SSE2 and inter-unit moves enabled.
 case 'm': // Any MMX register, when inter-unit moves enabled.
+case 'k': // AVX512 arch mask registers: k1-k7.
   Info.setAllowsRegister();
   return true;
 }
@@ -4008,6 +4009,8 @@
   case 'q': // Any register accessible as [r]l: a, b, c, and d.
   case 'y': // Any MMX register.
   case 'x': // Any SSE register.
+  case 'k': // Any AVX512 mask register (same as Yk, additionaly allows k0
+// for intermideate k reg operations).
   case 'Q': // Any register accessible as [r]h: a, b, c, and d.
   case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp.
   case 'l': // "Index" registers: any general register that can be used as an
@@ -4041,6 +4044,8 @@
 unsigned Size) const {
   switch (Constraint[0]) {
   default: break;
+  case 'k':
+  // Registers k0-k7 (AVX512) size limit is 64 bit.
   case 'y':
 return Size <= 64;
   case 'f':
@@ -4061,6 +4066,7 @@
 default: break;
 case 'm':
   // 'Ym' is synonymous with 'y'.
+case 'k':
   return Size <= 64;
 case 'i':
 case 't':
@@ -4092,6 +4098,20 @@
 return std::string("{st}");
   case 'u': // second from top of floating point stack.
 return std::string("{st(1)}"); // second from top of floating point stack.
+  case 'Y':
+switch (Constraint[1]) {
+default:
+  // Break from inner switch and fall through (copy single char),
+  // continue parsing after copying the current constraint into 
+  // the return string.
+  break;
+case 'k':
+  // "^" hints llvm that this is a 2 letter constraint.
+  // "Constraint++" is used to promote the string iterator 
+  // to the next constraint.
+  return std::string("^") + std::string(Constraint++, 2);
+} 
+LLVM_FALLTHROUGH;
   default:
 return std::string(1, *Constraint);
   }
Index: test/CodeGen/avx512-kconstraints-att_inline_asm.c
===
--- test/CodeGen/avx512-kconstraints-att_inline_asm.c
+++ test/CodeGen/avx512-kconstraints-att_inline_asm.c
@@ -0,0 +1,75 @@
+// RUN: %clang_cc1 %s -target-cpu skylake-avx512 -O0  -emit-llvm -S -o - -Wall -Werror | FileCheck %s
+// This test checks validity of att\gcc style inline assmebly for avx512 k and Yk constraints.
+// Also checks mask register allows flexible type (size <= 64 bit)
+
+void mask_Yk_i8(char msk){ 
+//CHECK: #APP 
+//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}
+//CHECK: #NO_APP 
+	asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
+   ://output
+   : "Yk" (msk));   //inputs
+}
+
+void mask_Yk_i16(short msk){
+//CHECK: #APP
+//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}
+//CHECK: #NO_APP
+	asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
+   ://output
+   :  "Yk" (msk));  //inputs
+}
+
+void mask_Yk_i32(int msk){
+//CHECK: #APP
+//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}
+//CHECK: #NO_APP
+asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
+   ://output
+   :  "Yk" (msk)); 	//inputs
+}
+
+void mask_Yk_i64(long long msk){
+//CHECK: #APP 
+//CHECK: vpaddb %xmm1, %xmm0, %xmm1 {%k1}
+//CHECK: #NO_APP 
+	asm ("vpaddb\t %%xmm1, %%xmm0, %%xmm1 %{%0%}\t"
+   ://output
+   :  "Yk" (msk)); 	//inputs
+}
+
+void k_wise_op_i8(char msk_dst,char msk_src1,char msk_src2){
+//CHECK: #APP
+//CHECK: kandw %k1, %k0, %k0
+//CHECK: #NO_APP 
+ asm ("kandw\t%2, %1, %0"
+   : "=k" (msk_dst)
+   : "k" (msk_src1), "k" (msk_src2));
+}
+
+void k_wise_op_i16(short msk_dst, short msk_src1, short msk_src2){
+//CHECK: #APP
+//CHECK: kandw %k1, %k0, %k0
+//CHECK: #NO_APP 
+  asm ("kandw\t%2, %1, %0"
+   : "=k" (msk_dst)
+   : "k" (msk_src1), "k" (msk_src2));
+}
+
+void k_wise_op_i32(int msk_dst, int msk_src1, int msk_src2){
+//CHECK: #APP
+//CHECK: kandw %k1, %k0, %k0
+//CHECK: #NO_APP 
+  asm ("kandw\t%2, %1, %0"
+   : "=k" (msk_dst)
+   : "k" (msk_src1), "k" (msk_src2));
+}
+
+void k_wise_op_i64(long long msk_dst, long long msk_src1, long long msk_src2){
+//CHECK: #APP
+//CHECK: kandw %k1, %k0, %k0
+//CHECK: #NO_APP 
+  asm ("kandw\t%2, %1, %0"
+   : "=k" (msk_dst)
+   : "k" (msk_src1), "k" (msk_src2));
+}
\ No newline at end of file

[PATCH] D25063: [x86][inline-asm][AVX512][clang][PART-1] Introducing "k" and "Yk" constraints for extended inline assembly, enabling use of AVX512 masked vectorized instructions.

2016-10-05 Thread Reid Kleckner via cfe-commits
rnk added inline comments.


> avx512-mask-op-inline_asm_specific.c:1
> +// RUN: %clang_cc1 %s -target-cpu skylake-avx512 -O0  -S -o - -Wall -Werror 
> | FileCheck %s
> +// This test checks validity of inline assembly for avx512 supported 
> constraint k and Yk along with the required 

Please check the generated LLVM IR, not the assembly.

Repository:
  rL LLVM

https://reviews.llvm.org/D25063



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits