On Mon, Aug 17, 2020 at 6:08 PM Uros Bizjak <ubiz...@gmail.com> wrote:
>
> On Fri, Aug 14, 2020 at 10:26 AM Hongtao Liu <crazy...@gmail.com> wrote:
> >
> > Enable operator or/xor/and/andn/not for mask register, kxnor is not
> > enabled since there's no corresponding instruction for general
> > registers.
> >
> > gcc/
> >         PR target/88808
> >         * config/i386/i386.md: (*movsi_internal): Adjust constraints
> >         for mask registers.
> >         (*movhi_internal): Ditto.
> >         (*movqi_internal): Ditto.
> >         (*anddi_1): Support mask register operations
> >         (*and<mode>_1): Ditto.
> >         (*andqi_1): Ditto.
> >         (*andn<mode>_1): Ditto.
> >         (*<code><mode>_1): Ditto.
> >         (*<code>qi_1): Ditto.
> >         (*one_cmpl<mode>2_1): Ditto.
> >         (*one_cmplsi2_1_zext): Ditto.
> >         (*one_cmplqi2_1): Ditto.
> >
> > gcc/testsuite/
> >         * gcc.target/i386/bitwise_mask_op-1.c: New test.
> >         * gcc.target/i386/bitwise_mask_op-2.c: New test.
> >         * gcc.target/i386/avx512bw-kunpckwd-1.c: Adjust testcase.
> >         * gcc.target/i386/avx512bw-kunpckwd-3.c: Ditto.
> >         * gcc.target/i386/avx512dq-kmovb-5.c: Ditto.
> >         * gcc.target/i386/avx512f-kmovw-5.c: Ditto.
>
> index 74d207c3711..e8ad79d1b0a 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2294,7 +2294,7 @@
>
>  (define_insn "*movsi_internal"
>    [(set (match_operand:SI 0 "nonimmediate_operand"
> -    "=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k ,*rm,*k")
> +    "=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k ,*rm,k")
>      (match_operand:SI 1 "general_operand"
>      "g ,re,C ,*y,m  ,*y,*y,r  ,C ,*v,m ,*v,*v,r  ,*r,*km,*k ,CBC"))]
>    "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
>
> I'd rather see *k everywhere, also with *movqi_internal and
> *movhi_internal patterns. The "*" means that the allocator won't
> allocate a mask register by default, but it will be used to optimize
> moves. With the above change, you are risking that during integer
> register pressure, the register allocator will allocate zero to a mask
> register, and later "optimize" the move with a direct maskreg-intreg
> move.
>
> The current strategy is that only general registers get allocated for
> integer modes. Let's keep it this way for now.
>

Yes,  though it would fail gcc.target/i386/avx512dq-pr88465.c and
gcc.target/i386/avx512f-pr88465.c, i think it's more reasonable not to
move zero into mask register directly.

> Otherwise, the patchset LGTM, but please test the suggested changes and 
> repost.
>
> BTW: Do you plan to remove mask operations from sse.md? ATM, they are
> used to distinguish mask operations, generated from builtins from
> generic operations, so I'd like to keep them for a while. The drawback
> is, that they are not combined with other operations, but at the end
> of the day, this is what the programmer asked for by using builtins.

Agree, I prefer to keep them.

>
> Uros.

Bootstrap is ok, regression test is ok for i386/x86-64 backend(After
adjusting testcase).

impact for SPEC2017 on SKL.

500.perlbench_r 0.00%
502.gcc_r 1.59%
505.mcf_r 1.49%
520.omnetpp_r 1.91%
523.xalancbmk_r -1.22%
525.x264_r 0.00%
531.deepsjeng_r 0.00%
541.leela_r -0.22%
548.exchange2_r 2.27%
557.xz_r 0.63%
INT geomean 0.64%

503.bwaves_r 3.68%
507.cactuBSSN_r -0.62%
508.namd_r 0.51%
510.parest_r -0.16%
511.povray_r 0.57%
519.lbm_r 0.50%
521.wrf_r 0.00%
526.blender_r 0.00%
527.cam4_r 0.00%
538.imagick_r -0.41%
544.nab_r 0.00%
549.fotonik3d_r -0.20%
554.roms_r 4.19%
FP geomean 0.66%

-- 
BR,
Hongtao
From e546516449ec4ed9301b83a063efdefbf0f7e75a Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao....@intel.com>
Date: Thu, 13 Aug 2020 14:20:43 +0800
Subject: [PATCH 4/4] Enable bitwise operation for type mask.

Enable operator or/xor/and/andn/not for mask register, kxnor is not
enabled since there's no corresponding instruction for general
registers.

gcc/
	PR target/88808
	* config/i386/i386.md: (*movsi_internal): Adjust constraints
	for mask registers.
	(*movhi_internal): Ditto.
	(*movqi_internal): Ditto.
	(*anddi_1): Support mask register operations
	(*and<mode>_1): Ditto.
	(*andqi_1): Ditto.
	(*andn<mode>_1): Ditto.
	(*<code><mode>_1): Ditto.
	(*<code>qi_1): Ditto.
	(*one_cmpl<mode>2_1): Ditto.
	(*one_cmplsi2_1_zext): Ditto.
	(*one_cmplqi2_1): Ditto.

gcc/testsuite/
	* gcc.target/i386/bitwise_mask_op-1.c: New test.
	* gcc.target/i386/bitwise_mask_op-2.c: New test.
	* gcc.target/i386/avx512bw-kunpckwd-1.c: Adjust testcase.
	* gcc.target/i386/avx512bw-kunpckwd-3.c: Ditto.
	* gcc.target/i386/avx512dq-kmovb-5.c: Ditto.
	* gcc.target/i386/avx512f-kmovw-5.c: Ditto.
	* gcc.target/i386/avx512bw-pr88465.c: Ditto.
	* gcc.target/i386/avx512f-pr88465.c: Ditto.
---
 gcc/config/i386/i386.md                       | 260 +++++++++++++-----
 .../gcc.target/i386/avx512bw-kunpckwd-1.c     |   2 +-
 .../gcc.target/i386/avx512bw-kunpckwd-3.c     |   2 +-
 .../gcc.target/i386/avx512dq-kmovb-5.c        |   2 +-
 .../gcc.target/i386/avx512dq-pr88465.c        |   4 +-
 .../gcc.target/i386/avx512f-kmovw-5.c         |   2 +-
 .../gcc.target/i386/avx512f-pr88465.c         |   4 +-
 .../gcc.target/i386/bitwise_mask_op-1.c       | 177 ++++++++++++
 .../gcc.target/i386/bitwise_mask_op-2.c       |   7 +
 9 files changed, 380 insertions(+), 80 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/bitwise_mask_op-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/bitwise_mask_op-2.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3a15941c3e8..4255b9a7a64 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2403,8 +2403,8 @@
 	   (symbol_ref "true")))])
 
 (define_insn "*movhi_internal"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,k,k ,r,m,k")
-	(match_operand:HI 1 "general_operand"      "r ,rn,rm,rn,r,km,k,k,CBC"))]
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,*k,*k ,*r,*m,*k")
+	(match_operand:HI 1 "general_operand"      "r ,rn,rm,rn,*r,*km,*k,*k,CBC"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
@@ -2491,9 +2491,9 @@
 
 (define_insn "*movqi_internal"
   [(set (match_operand:QI 0 "nonimmediate_operand"
-			"=Q,R,r,q,q,r,r ,?r,m ,k,k,r,m,k,k,k")
+			"=Q,R,r,q,q,r,r ,?r,m ,*k,*k,*r,*m,*k,*k,*k")
 	(match_operand:QI 1 "general_operand"
-			"Q ,R,r,n,m,q,rn, m,qn,r,k,k,k,m,C,BC"))]
+			"Q ,R,r,n,m,q,rn, m,qn,*r,*k,*k,*k,*m,C,BC"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   char buf[128];
@@ -9044,19 +9044,21 @@
 })
 
 (define_insn "*anddi_1"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,k")
 	(and:DI
-	 (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm")
-	 (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,L")))
+	 (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm,k")
+	 (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,L,k")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
   "@
    and{l}\t{%k2, %k0|%k0, %k2}
    and{q}\t{%2, %0|%0, %2}
    and{q}\t{%2, %0|%0, %2}
-   #"
-  [(set_attr "type" "alu,alu,alu,imovx")
-   (set_attr "length_immediate" "*,*,*,0")
+   and{q}\t{%2, %0|%0, %2}
+   kandq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "x64,x64,x64,x64,avx512bw")
+   (set_attr "type" "alu,alu,alu,imovx,msklog")
+   (set_attr "length_immediate" "*,*,*,0,*")
    (set (attr "prefix_rex")
      (if_then_else
        (and (eq_attr "type" "imovx")
@@ -9064,7 +9066,7 @@
 		 (match_operand 1 "ext_QIreg_operand")))
        (const_string "1")
        (const_string "*")))
-   (set_attr "mode" "SI,DI,DI,SI")])
+   (set_attr "mode" "SI,DI,DI,SI,DI")])
 
 (define_insn_and_split "*anddi_1_btr"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
@@ -9130,17 +9132,25 @@
    (set_attr "mode" "SI")])
 
 (define_insn "*and<mode>_1"
-  [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,Ya")
-	(and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,qm")
-		   (match_operand:SWI24 2 "<general_operand>" "r<i>,m,L")))
+  [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,Ya,k")
+	(and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,qm,k")
+		   (match_operand:SWI24 2 "<general_operand>" "r<i>,m,L,k")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (AND, <MODE>mode, operands)"
   "@
    and{<imodesuffix>}\t{%2, %0|%0, %2}
    and{<imodesuffix>}\t{%2, %0|%0, %2}
-   #"
-  [(set_attr "type" "alu,alu,imovx")
-   (set_attr "length_immediate" "*,*,0")
+   and{<imodesuffix>}\t{%2, %0|%0, %2}
+   kand<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set (attr "isa")
+	(cond [(eq_attr "alternative" "3")
+		 (if_then_else (eq_attr "mode" "SI")
+		   (const_string "avx512bw")
+		   (const_string "avx512f"))
+	      ]
+	      (const_string "*")))
+   (set_attr "type" "alu,alu,imovx,msklog")
+   (set_attr "length_immediate" "*,*,0,*")
    (set (attr "prefix_rex")
      (if_then_else
        (and (eq_attr "type" "imovx")
@@ -9148,20 +9158,39 @@
 		 (match_operand 1 "ext_QIreg_operand")))
        (const_string "1")
        (const_string "*")))
-   (set_attr "mode" "<MODE>,<MODE>,SI")])
+   (set_attr "mode" "<MODE>,<MODE>,SI,<MODE>")])
 
 (define_insn "*andqi_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
-	(and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
-		(match_operand:QI 2 "general_operand" "qn,m,rn")))
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,k")
+	(and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
+		(match_operand:QI 2 "general_operand" "qn,m,rn,k")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (AND, QImode, operands)"
-  "@
-   and{b}\t{%2, %0|%0, %2}
-   and{b}\t{%2, %0|%0, %2}
-   and{l}\t{%k2, %k0|%k0, %k2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "QI,QI,SI")
+{
+  switch (which_alternative)
+    {
+     case 0:
+     case 1:
+       return "and{b}\t{%2, %0|%0, %2}";
+     case 2:
+       return "and{l}\t{%k2, %k0|%k0, %k2}";
+     case 3:
+       if (TARGET_AVX512DQ)
+	 return "kandb\t{%2, %1, %0|%0, %1, %2}";
+       return "kandw\t{%2, %1, %0|%0, %1, %2}";
+     default:
+       gcc_unreachable ();
+     }
+}
+  [(set_attr "type" "alu,alu,alu,msklog")
+   (set (attr "mode")
+	(cond [(eq_attr "alternative" "2")
+		 (const_string "SI")
+		(and (eq_attr "alternative" "3")
+		     (match_test "!TARGET_AVX512DQ"))
+		 (const_string "HI")
+	       ]
+	       (const_string "QI")))
    ;; Potential partial reg stall on alternative 2.
    (set (attr "preferred_for_speed")
      (cond [(eq_attr "alternative" "2")
@@ -9539,28 +9568,53 @@
 })
 
 (define_insn "*andn<mode>_1"
-  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
+  [(set (match_operand:SWI48 0 "register_operand" "=r,r,k")
 	(and:SWI48
-	  (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r"))
-	  (match_operand:SWI48 2 "nonimmediate_operand" "r,m")))
+	  (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r,k"))
+	  (match_operand:SWI48 2 "nonimmediate_operand" "r,m,k")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_BMI"
-  "andn\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "bitmanip")
-   (set_attr "btver2_decode" "direct, double")
+  "TARGET_BMI || TARGET_AVX512BW"
+  "@
+    andn\t{%2, %1, %0|%0, %1, %2}
+    andn\t{%2, %1, %0|%0, %1, %2}
+    kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "bmi,bmi,avx512bw")
+   (set_attr "type" "bitmanip,bitmanip,msklog")
+   (set_attr "btver2_decode" "direct, double,*")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*andn<mode>_1"
-  [(set (match_operand:SWI12 0 "register_operand" "=r")
+  [(set (match_operand:SWI12 0 "register_operand" "=r,k")
 	(and:SWI12
-	  (not:SWI12 (match_operand:SWI12 1 "register_operand" "r"))
-	  (match_operand:SWI12 2 "register_operand" "r")))
+	  (not:SWI12 (match_operand:SWI12 1 "register_operand" "r,k"))
+	  (match_operand:SWI12 2 "register_operand" "r,k")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_BMI"
-  "andn\t{%k2, %k1, %k0|%k0, %k1, %k2}"
-  [(set_attr "type" "bitmanip")
-   (set_attr "btver2_decode" "direct")
-   (set_attr "mode" "SI")])
+  "TARGET_BMI || TARGET_AVX512BW"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "andn\t{%k2, %k1, %k0|%k0, %k1, %k2}";
+    case 1:
+      if (TARGET_AVX512DQ)
+	return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+      return "kandnw\t{%2, %1, %0|%0, %1, %2}";
+    default:
+      gcc_unreachable ();
+    }
+}
+
+  [(set_attr "isa" "bmi,avx512f")
+   (set_attr "type" "bitmanip,msklog")
+   (set_attr "btver2_decode" "direct,*")
+   (set (attr "mode")
+	(cond [(eq_attr "alternative" "0")
+		 (const_string "SI")
+	       (and (eq_attr "alternative" "1")
+		    (match_test "!TARGET_AVX512DQ"))
+		  (const_string "HI")
+	      ]
+	      (const_string "<MODE>")))])
 
 (define_insn "*andn_<mode>_ccno"
   [(set (reg FLAGS_REG)
@@ -9631,14 +9685,24 @@
 })
 
 (define_insn "*<code><mode>_1"
-  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r")
+  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,k")
 	(any_or:SWI248
-	 (match_operand:SWI248 1 "nonimmediate_operand" "%0,0")
-	 (match_operand:SWI248 2 "<general_operand>" "r<i>,m")))
+	 (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k")
+	 (match_operand:SWI248 2 "<general_operand>" "r<i>,m,k")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
-  [(set_attr "type" "alu")
+  "@
+    <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+    <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+    k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set (attr "isa")
+	(cond [(eq_attr "alternative" "2")
+		 (if_then_else (eq_attr "mode" "SI,DI")
+		   (const_string "avx512bw")
+		   (const_string "avx512f"))
+	      ]
+	      (const_string "*")))
+   (set_attr "type" "alu, alu, msklog")
    (set_attr "mode" "<MODE>")])
 
 (define_insn_and_split "*iordi_1_bts"
@@ -9711,17 +9775,37 @@
    (set_attr "mode" "SI")])
 
 (define_insn "*<code>qi_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
-	(any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
-		   (match_operand:QI 2 "general_operand" "qn,m,rn")))
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,k")
+	(any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
+		   (match_operand:QI 2 "general_operand" "qn,m,rn,k")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, QImode, operands)"
-  "@
-   <logic>{b}\t{%2, %0|%0, %2}
-   <logic>{b}\t{%2, %0|%0, %2}
-   <logic>{l}\t{%k2, %k0|%k0, %k2}"
-  [(set_attr "type" "alu")
-   (set_attr "mode" "QI,QI,SI")
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return "<logic>{b}\t{%2, %0|%0, %2}";
+    case 2:
+      return "<logic>{l}\t{%k2, %k0|%k0, %k2}";
+    case 3:
+      if (TARGET_AVX512DQ)
+	return "k<logic>b\t{%2, %1, %0|%0, %1, %2}";
+      return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "*,*,*,avx512f")
+   (set_attr "type" "alu,alu,alu,msklog")
+   (set (attr "mode")
+	(cond [(eq_attr "alternative" "2")
+		 (const_string "SI")
+		(and (eq_attr "alternative" "3")
+		     (match_test "!TARGET_AVX512DQ"))
+		 (const_string "HI")
+	       ]
+	       (const_string "QI")))
    ;; Potential partial reg stall on alternative 2.
    (set (attr "preferred_for_speed")
      (cond [(eq_attr "alternative" "2")
@@ -10370,31 +10454,63 @@
   "split_double_mode (DImode, &operands[0], 2, &operands[0], &operands[2]);")
 
 (define_insn "*one_cmpl<mode>2_1"
-  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
-	(not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0")))]
+  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,k")
+	(not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0,k")))]
   "ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
-  "not{<imodesuffix>}\t%0"
-  [(set_attr "type" "negnot")
+  "@
+  not{<imodesuffix>}\t%0
+  knot<mskmodesuffix>\t{%1, %0|%0, %1}"
+  [(set (attr "isa")
+	(cond [(eq_attr "alternative" "2")
+		 (if_then_else (eq_attr "mode" "SI,DI")
+		   (const_string "avx512bw")
+		   (const_string "avx512f"))
+	      ]
+	      (const_string "*")))
+   (set_attr "type" "negnot,msklog")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*one_cmplsi2_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand" "=r,k")
 	(zero_extend:DI
-	  (not:SI (match_operand:SI 1 "register_operand" "0"))))]
+	  (not:SI (match_operand:SI 1 "register_operand" "0,k"))))]
   "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)"
-  "not{l}\t%k0"
-  [(set_attr "type" "negnot")
-   (set_attr "mode" "SI")])
+  "@
+    not{l}\t%k0
+    knotd\t{%1, %0|%0, %1}"
+  [(set_attr "isa" "x64,avx512bw")
+   (set_attr "type" "negnot,msklog")
+   (set_attr "mode" "SI,SI")])
 
 (define_insn "*one_cmplqi2_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r")
-	(not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")))]
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,k")
+	(not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))]
   "ix86_unary_operator_ok (NOT, QImode, operands)"
-  "@
-   not{b}\t%0
-   not{l}\t%k0"
-  [(set_attr "type" "negnot")
-   (set_attr "mode" "QI,SI")
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "not{b}\t%0";
+    case 1:
+      return "not{l}\t%k0";
+    case 2:
+      if (TARGET_AVX512DQ)
+	return "knotb\t{%1, %0|%0, %1}";
+      return "knotw\t{%1, %0|%0, %1}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "*,*,avx512f")
+   (set_attr "type" "negnot,negnot,msklog")
+   (set (attr "mode")
+	(cond [(eq_attr "alternative" "1")
+		 (const_string "SI")
+		(and (eq_attr "alternative" "2")
+		     (match_test "!TARGET_AVX512DQ"))
+		 (const_string "HI")
+	       ]
+	       (const_string "QI")))
    ;; Potential partial reg stall on alternative 1.
    (set (attr "preferred_for_speed")
      (cond [(eq_attr "alternative" "1")
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-1.c
index 94422f36010..46d9351f275 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-1.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512bw -O2" } */
-/* { dg-final { scan-assembler-times "kunpckwd\[ \\t\]+\[^\{\n\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kunpckwd\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-3.c b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-3.c
index c68ad8cc1f7..fe13f4f33fc 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-3.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-3.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512bw -O2" } */
-/* { dg-final { scan-assembler-times "kunpckwd\[ \\t\]+\[^\{\n\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "kunpckwd\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-5.c b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-5.c
index 49817097e26..114e03ee93d 100644
--- a/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-5.c
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-5.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-mavx512dq -O2" } */
+/* { dg-options "-mavx512dq -mno-avx512bw -O2" } */
 /* { dg-final { scan-assembler-times "kmovb\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-pr88465.c b/gcc/testsuite/gcc.target/i386/avx512dq-pr88465.c
index a11fd26a44e..4690e7ba9e8 100644
--- a/gcc/testsuite/gcc.target/i386/avx512dq-pr88465.c
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-pr88465.c
@@ -1,8 +1,8 @@
 /* PR target/88465 */
 /* { dg-do compile } */
 /* { dg-options "-O2 -mavx512dq -mno-avx512bw" } */
-/* { dg-final { scan-assembler-times "kxorb\[ \t]" 1 } } */
-/* { dg-final { scan-assembler-times "kxnorb\[ \t]" 1 } } */
+/* { dg-final { scan-assembler-times "kxorb\[ \t]" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "kxnorb\[ \t]" 1 { xfail *-*-* } } } */
 
 void
 foo (void)
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kmovw-5.c b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-5.c
index 7bb34d34d8d..79d37394b36 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-kmovw-5.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-5.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-mavx512f -O2" } */
+/* { dg-options "-mavx512f -mno-avx512bw -O2" } */
 /* { dg-final { scan-assembler-times "kmovw\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88465.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88465.c
index e66ea64db02..b1ab9633522 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-pr88465.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88465.c
@@ -1,8 +1,8 @@
 /* PR target/88465 */
 /* { dg-do compile } */
 /* { dg-options "-O2 -mavx512f -mno-avx512dq -mno-avx512bw" } */
-/* { dg-final { scan-assembler-times "kxorw\[ \t]" 2 } } */
-/* { dg-final { scan-assembler-times "kxnorw\[ \t]" 1 } } */
+/* { dg-final { scan-assembler-times "kxorw\[ \t]" 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "kxnorw\[ \t]" 1 { xfail *-*-* } } } */
 
 void
 foo (void)
diff --git a/gcc/testsuite/gcc.target/i386/bitwise_mask_op-1.c b/gcc/testsuite/gcc.target/i386/bitwise_mask_op-1.c
new file mode 100644
index 00000000000..2757bcaaf50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bitwise_mask_op-1.c
@@ -0,0 +1,177 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mno-avx512dq -O2" } */
+
+#include <immintrin.h>
+__m512i
+foo_orq (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b);
+  __mmask64 m2 = _mm512_cmpeq_epi8_mask (c, d);
+  return _mm512_mask_add_epi8 (c, m1 | m2, a, d);
+}
+
+/* { dg-final { scan-assembler-times "korq" "1" { target { ! ia32 } } } } */
+
+__m512i
+foo_ord (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b);
+  __mmask32 m2 = _mm512_cmpeq_epi16_mask (c, d);
+  return _mm512_mask_add_epi16 (c, m1 | m2, a, d);
+}
+
+/* { dg-final { scan-assembler-times "kord" "1" } }  */
+
+__m512i
+foo_orw (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b);
+  __mmask16 m2 = _mm512_cmpeq_epi32_mask (c, d);
+  return _mm512_mask_add_epi32 (c, m1 | m2, a, d);
+}
+
+__m512i
+foo_orb (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b);
+  __mmask8 m2 = _mm512_cmpeq_epi64_mask (c, d);
+  return _mm512_mask_add_epi64 (c, m1 | m2, a, d);
+}
+
+/* { dg-final { scan-assembler-times "korw" "2" } }  */
+
+__m512i
+foo_xorq (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b);
+  __mmask64 m2 = _mm512_cmpeq_epi8_mask (c, d);
+  return _mm512_mask_add_epi8 (c, m1 ^ m2, a, d);
+}
+
+/* { dg-final { scan-assembler-times "kxorq" "1" { target { ! ia32 } } } }  */
+
+__m512i
+foo_xord (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b);
+  __mmask32 m2 = _mm512_cmpeq_epi16_mask (c, d);
+  return _mm512_mask_add_epi16 (c, m1 ^ m2, a, d);
+}
+
+/* { dg-final { scan-assembler-times "kxord" "1" } }  */
+
+__m512i
+foo_xorw (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b);
+  __mmask16 m2 = _mm512_cmpeq_epi32_mask (c, d);
+  return _mm512_mask_add_epi32 (c, m1 ^ m2, a, d);
+}
+
+__m512i
+foo_xorb (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b);
+  __mmask8 m2 = _mm512_cmpeq_epi64_mask (c, d);
+  return _mm512_mask_add_epi64 (c, m1 ^ m2, a, d);
+}
+
+/* { dg-final { scan-assembler-times "korw" "2" } }  */
+
+__m512i
+foo_andq (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b);
+  __mmask64 m2 = _mm512_cmpeq_epi8_mask (c, d);
+  return _mm512_mask_add_epi8 (c, m1 & m2, a, d);
+}
+
+__m512i
+foo_andd (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b);
+  __mmask32 m2 = _mm512_cmpeq_epi16_mask (c, d);
+  return _mm512_mask_add_epi16 (c, m1 & m2, a, d);
+}
+
+__m512i
+foo_andw (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b);
+  __mmask16 m2 = _mm512_cmpeq_epi32_mask (c, d);
+  return _mm512_mask_add_epi32 (c, m1 & m2, a, d);
+}
+
+__m512i
+foo_andb (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b);
+  __mmask8 m2 = _mm512_cmpeq_epi64_mask (c, d);
+  return _mm512_mask_add_epi64 (c, m1 & m2, a, d);
+}
+
+__m512i
+foo_andnq (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b);
+  __mmask64 m2 = _mm512_cmpeq_epi8_mask (c, d);
+  return _mm512_mask_add_epi8 (c, m1 & ~m2, a, d);
+}
+
+__m512i
+foo_andnd (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b);
+  __mmask32 m2 = _mm512_cmpeq_epi16_mask (c, d);
+  return _mm512_mask_add_epi16 (c, m1 & ~m2, a, d);
+}
+
+__m512i
+foo_andnw (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b);
+  __mmask16 m2 = _mm512_cmpeq_epi32_mask (c, d);
+  return _mm512_mask_add_epi32 (c, m1 & ~m2, a, d);
+}
+
+__m512i
+foo_andnb (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b);
+  __mmask8 m2 = _mm512_cmpeq_epi64_mask (c, d);
+  return _mm512_mask_add_epi64 (c, m1 & ~m2, a, d);
+}
+
+__m512i
+foo_notq (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b);
+  return _mm512_mask_add_epi8 (c, ~m1, a, d);
+}
+
+/* { dg-final { scan-assembler-times "knotq" "2" { target { ! ia32 } } } }  */
+
+__m512i
+foo_notd (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b);
+  return _mm512_mask_add_epi16 (c, ~m1, a, d);
+}
+
+/* { dg-final { scan-assembler-times "knotd" "2" { target { ! ia32 } } } }  */
+
+__m512i
+foo_notw (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b);
+  return _mm512_mask_add_epi32 (c, ~m1, a, d);
+}
+
+__m512i
+foo_notb (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b);
+  return _mm512_mask_add_epi64 (c, ~m1, a, d);
+}
+
+/* { dg-final { scan-assembler-times "knotw" "4" } }  */
diff --git a/gcc/testsuite/gcc.target/i386/bitwise_mask_op-2.c b/gcc/testsuite/gcc.target/i386/bitwise_mask_op-2.c
new file mode 100644
index 00000000000..277c5a98079
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bitwise_mask_op-2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "knotb" "2" } }  */
+/* { dg-final { scan-assembler-times "korb" "1" } }  */
+/* { dg-final { scan-assembler-times "kxorb" "1" } }  */
+#include "bitwise_mask_op-1.c"
+
-- 
2.18.1

Reply via email to