On Mon, Aug 17, 2020 at 6:08 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > On Fri, Aug 14, 2020 at 10:26 AM Hongtao Liu <crazy...@gmail.com> wrote: > > > > Enable operator or/xor/and/andn/not for mask register, kxnor is not > > enabled since there's no corresponding instruction for general > > registers. > > > > gcc/ > > PR target/88808 > > * config/i386/i386.md: (*movsi_internal): Adjust constraints > > for mask registers. > > (*movhi_internal): Ditto. > > (*movqi_internal): Ditto. > > (*anddi_1): Support mask register operations > > (*and<mode>_1): Ditto. > > (*andqi_1): Ditto. > > (*andn<mode>_1): Ditto. > > (*<code><mode>_1): Ditto. > > (*<code>qi_1): Ditto. > > (*one_cmpl<mode>2_1): Ditto. > > (*one_cmplsi2_1_zext): Ditto. > > (*one_cmplqi2_1): Ditto. > > > > gcc/testsuite/ > > * gcc.target/i386/bitwise_mask_op-1.c: New test. > > * gcc.target/i386/bitwise_mask_op-2.c: New test. > > * gcc.target/i386/avx512bw-kunpckwd-1.c: Adjust testcase. > > * gcc.target/i386/avx512bw-kunpckwd-3.c: Ditto. > > * gcc.target/i386/avx512dq-kmovb-5.c: Ditto. > > * gcc.target/i386/avx512f-kmovw-5.c: Ditto. > > index 74d207c3711..e8ad79d1b0a 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -2294,7 +2294,7 @@ > > (define_insn "*movsi_internal" > [(set (match_operand:SI 0 "nonimmediate_operand" > - "=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k ,*rm,*k") > + "=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k ,*rm,k") > (match_operand:SI 1 "general_operand" > "g ,re,C ,*y,m ,*y,*y,r ,C ,*v,m ,*v,*v,r ,*r,*km,*k ,CBC"))] > "!(MEM_P (operands[0]) && MEM_P (operands[1]))" > > I'd rather see *k everywhere, also with *movqi_internal and > *movhi_internal patterns. The "*" means that the allocator won't > allocate a mask register by default, but it will be used to optimize > moves. With the above change, you are risking that during integer > register pressure, the register allocator will allocate zero to a mask > register, and later "optimize" the move with a direct maskreg-intreg > move. > > The current strategy is that only general registers get allocated for > integer modes. Let's keep it this way for now. >
Yes, though it would fail gcc.target/i386/avx512dq-pr88465.c and gcc.target/i386/avx512f-pr88465.c, i think it's more reasonable not to move zero into mask register directly. > Otherwise, the patchset LGTM, but please test the suggested changes and > repost. > > BTW: Do you plan to remove mask operations from sse.md? ATM, they are > used to distinguish mask operations, generated from builtins from > generic operations, so I'd like to keep them for a while. The drawback > is, that they are not combined with other operations, but at the end > of the day, this is what the programmer asked for by using builtins. Agree, I prefer to keep them. > > Uros. Bootstrap is ok, regression test is ok for i386/x86-64 backend(After adjusting testcase). impact for SPEC2017 on SKL. 500.perlbench_r 0.00% 502.gcc_r 1.59% 505.mcf_r 1.49% 520.omnetpp_r 1.91% 523.xalancbmk_r -1.22% 525.x264_r 0.00% 531.deepsjeng_r 0.00% 541.leela_r -0.22% 548.exchange2_r 2.27% 557.xz_r 0.63% INT geomean 0.64% 503.bwaves_r 3.68% 507.cactuBSSN_r -0.62% 508.namd_r 0.51% 510.parest_r -0.16% 511.povray_r 0.57% 519.lbm_r 0.50% 521.wrf_r 0.00% 526.blender_r 0.00% 527.cam4_r 0.00% 538.imagick_r -0.41% 544.nab_r 0.00% 549.fotonik3d_r -0.20% 554.roms_r 4.19% FP geomean 0.66% -- BR, Hongtao
From e546516449ec4ed9301b83a063efdefbf0f7e75a Mon Sep 17 00:00:00 2001 From: liuhongt <hongtao....@intel.com> Date: Thu, 13 Aug 2020 14:20:43 +0800 Subject: [PATCH 4/4] Enable bitwise operation for type mask. Enable operator or/xor/and/andn/not for mask register, kxnor is not enabled since there's no corresponding instruction for general registers. gcc/ PR target/88808 * config/i386/i386.md: (*movsi_internal): Adjust constraints for mask registers. (*movhi_internal): Ditto. (*movqi_internal): Ditto. (*anddi_1): Support mask register operations (*and<mode>_1): Ditto. (*andqi_1): Ditto. (*andn<mode>_1): Ditto. (*<code><mode>_1): Ditto. (*<code>qi_1): Ditto. (*one_cmpl<mode>2_1): Ditto. (*one_cmplsi2_1_zext): Ditto. (*one_cmplqi2_1): Ditto. gcc/testsuite/ * gcc.target/i386/bitwise_mask_op-1.c: New test. * gcc.target/i386/bitwise_mask_op-2.c: New test. * gcc.target/i386/avx512bw-kunpckwd-1.c: Adjust testcase. * gcc.target/i386/avx512bw-kunpckwd-3.c: Ditto. * gcc.target/i386/avx512dq-kmovb-5.c: Ditto. * gcc.target/i386/avx512f-kmovw-5.c: Ditto. * gcc.target/i386/avx512bw-pr88465.c: Ditto. * gcc.target/i386/avx512f-pr88465.c: Ditto. --- gcc/config/i386/i386.md | 260 +++++++++++++----- .../gcc.target/i386/avx512bw-kunpckwd-1.c | 2 +- .../gcc.target/i386/avx512bw-kunpckwd-3.c | 2 +- .../gcc.target/i386/avx512dq-kmovb-5.c | 2 +- .../gcc.target/i386/avx512dq-pr88465.c | 4 +- .../gcc.target/i386/avx512f-kmovw-5.c | 2 +- .../gcc.target/i386/avx512f-pr88465.c | 4 +- .../gcc.target/i386/bitwise_mask_op-1.c | 177 ++++++++++++ .../gcc.target/i386/bitwise_mask_op-2.c | 7 + 9 files changed, 380 insertions(+), 80 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/bitwise_mask_op-1.c create mode 100644 gcc/testsuite/gcc.target/i386/bitwise_mask_op-2.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 3a15941c3e8..4255b9a7a64 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2403,8 +2403,8 @@ (symbol_ref "true")))]) (define_insn "*movhi_internal" - [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,k,k ,r,m,k") - (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,r,km,k,k,CBC"))] + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,*k,*k ,*r,*m,*k") + (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,*r,*km,*k,*k,CBC"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -2491,9 +2491,9 @@ (define_insn "*movqi_internal" [(set (match_operand:QI 0 "nonimmediate_operand" - "=Q,R,r,q,q,r,r ,?r,m ,k,k,r,m,k,k,k") + "=Q,R,r,q,q,r,r ,?r,m ,*k,*k,*r,*m,*k,*k,*k") (match_operand:QI 1 "general_operand" - "Q ,R,r,n,m,q,rn, m,qn,r,k,k,k,m,C,BC"))] + "Q ,R,r,n,m,q,rn, m,qn,*r,*k,*k,*k,*m,C,BC"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { char buf[128]; @@ -9044,19 +9044,21 @@ }) (define_insn "*anddi_1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r") + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,k") (and:DI - (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm") - (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,L"))) + (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm,k") + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,L,k"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)" "@ and{l}\t{%k2, %k0|%k0, %k2} and{q}\t{%2, %0|%0, %2} and{q}\t{%2, %0|%0, %2} - #" - [(set_attr "type" "alu,alu,alu,imovx") - (set_attr "length_immediate" "*,*,*,0") + and{q}\t{%2, %0|%0, %2} + kandq\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "x64,x64,x64,x64,avx512bw") + (set_attr "type" "alu,alu,alu,imovx,msklog") + (set_attr "length_immediate" "*,*,*,0,*") (set (attr "prefix_rex") (if_then_else (and (eq_attr "type" "imovx") @@ -9064,7 +9066,7 @@ (match_operand 1 "ext_QIreg_operand"))) (const_string "1") (const_string "*"))) - (set_attr "mode" "SI,DI,DI,SI")]) + (set_attr "mode" "SI,DI,DI,SI,DI")]) (define_insn_and_split "*anddi_1_btr" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") @@ -9130,17 +9132,25 @@ (set_attr "mode" "SI")]) (define_insn "*and<mode>_1" - [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,Ya") - (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,qm") - (match_operand:SWI24 2 "<general_operand>" "r<i>,m,L"))) + [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,Ya,k") + (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,qm,k") + (match_operand:SWI24 2 "<general_operand>" "r<i>,m,L,k"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (AND, <MODE>mode, operands)" "@ and{<imodesuffix>}\t{%2, %0|%0, %2} and{<imodesuffix>}\t{%2, %0|%0, %2} - #" - [(set_attr "type" "alu,alu,imovx") - (set_attr "length_immediate" "*,*,0") + and{<imodesuffix>}\t{%2, %0|%0, %2} + kand<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set (attr "isa") + (cond [(eq_attr "alternative" "3") + (if_then_else (eq_attr "mode" "SI") + (const_string "avx512bw") + (const_string "avx512f")) + ] + (const_string "*"))) + (set_attr "type" "alu,alu,imovx,msklog") + (set_attr "length_immediate" "*,*,0,*") (set (attr "prefix_rex") (if_then_else (and (eq_attr "type" "imovx") @@ -9148,20 +9158,39 @@ (match_operand 1 "ext_QIreg_operand"))) (const_string "1") (const_string "*"))) - (set_attr "mode" "<MODE>,<MODE>,SI")]) + (set_attr "mode" "<MODE>,<MODE>,SI,<MODE>")]) (define_insn "*andqi_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r") - (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") - (match_operand:QI 2 "general_operand" "qn,m,rn"))) + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,k") + (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k") + (match_operand:QI 2 "general_operand" "qn,m,rn,k"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (AND, QImode, operands)" - "@ - and{b}\t{%2, %0|%0, %2} - and{b}\t{%2, %0|%0, %2} - and{l}\t{%k2, %k0|%k0, %k2}" - [(set_attr "type" "alu") - (set_attr "mode" "QI,QI,SI") +{ + switch (which_alternative) + { + case 0: + case 1: + return "and{b}\t{%2, %0|%0, %2}"; + case 2: + return "and{l}\t{%k2, %k0|%k0, %k2}"; + case 3: + if (TARGET_AVX512DQ) + return "kandb\t{%2, %1, %0|%0, %1, %2}"; + return "kandw\t{%2, %1, %0|%0, %1, %2}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,alu,msklog") + (set (attr "mode") + (cond [(eq_attr "alternative" "2") + (const_string "SI") + (and (eq_attr "alternative" "3") + (match_test "!TARGET_AVX512DQ")) + (const_string "HI") + ] + (const_string "QI"))) ;; Potential partial reg stall on alternative 2. (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "2") @@ -9539,28 +9568,53 @@ }) (define_insn "*andn<mode>_1" - [(set (match_operand:SWI48 0 "register_operand" "=r,r") + [(set (match_operand:SWI48 0 "register_operand" "=r,r,k") (and:SWI48 - (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r")) - (match_operand:SWI48 2 "nonimmediate_operand" "r,m"))) + (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r,k")) + (match_operand:SWI48 2 "nonimmediate_operand" "r,m,k"))) (clobber (reg:CC FLAGS_REG))] - "TARGET_BMI" - "andn\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "bitmanip") - (set_attr "btver2_decode" "direct, double") + "TARGET_BMI || TARGET_AVX512BW" + "@ + andn\t{%2, %1, %0|%0, %1, %2} + andn\t{%2, %1, %0|%0, %1, %2} + kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "bmi,bmi,avx512bw") + (set_attr "type" "bitmanip,bitmanip,msklog") + (set_attr "btver2_decode" "direct, double,*") (set_attr "mode" "<MODE>")]) (define_insn "*andn<mode>_1" - [(set (match_operand:SWI12 0 "register_operand" "=r") + [(set (match_operand:SWI12 0 "register_operand" "=r,k") (and:SWI12 - (not:SWI12 (match_operand:SWI12 1 "register_operand" "r")) - (match_operand:SWI12 2 "register_operand" "r"))) + (not:SWI12 (match_operand:SWI12 1 "register_operand" "r,k")) + (match_operand:SWI12 2 "register_operand" "r,k"))) (clobber (reg:CC FLAGS_REG))] - "TARGET_BMI" - "andn\t{%k2, %k1, %k0|%k0, %k1, %k2}" - [(set_attr "type" "bitmanip") - (set_attr "btver2_decode" "direct") - (set_attr "mode" "SI")]) + "TARGET_BMI || TARGET_AVX512BW" +{ + switch (which_alternative) + { + case 0: + return "andn\t{%k2, %k1, %k0|%k0, %k1, %k2}"; + case 1: + if (TARGET_AVX512DQ) + return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"; + return "kandnw\t{%2, %1, %0|%0, %1, %2}"; + default: + gcc_unreachable (); + } +} + + [(set_attr "isa" "bmi,avx512f") + (set_attr "type" "bitmanip,msklog") + (set_attr "btver2_decode" "direct,*") + (set (attr "mode") + (cond [(eq_attr "alternative" "0") + (const_string "SI") + (and (eq_attr "alternative" "1") + (match_test "!TARGET_AVX512DQ")) + (const_string "HI") + ] + (const_string "<MODE>")))]) (define_insn "*andn_<mode>_ccno" [(set (reg FLAGS_REG) @@ -9631,14 +9685,24 @@ }) (define_insn "*<code><mode>_1" - [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r") + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,k") (any_or:SWI248 - (match_operand:SWI248 1 "nonimmediate_operand" "%0,0") - (match_operand:SWI248 2 "<general_operand>" "r<i>,m"))) + (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k") + (match_operand:SWI248 2 "<general_operand>" "r<i>,m,k"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" - "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "@ + <logic>{<imodesuffix>}\t{%2, %0|%0, %2} + <logic>{<imodesuffix>}\t{%2, %0|%0, %2} + k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set (attr "isa") + (cond [(eq_attr "alternative" "2") + (if_then_else (eq_attr "mode" "SI,DI") + (const_string "avx512bw") + (const_string "avx512f")) + ] + (const_string "*"))) + (set_attr "type" "alu, alu, msklog") (set_attr "mode" "<MODE>")]) (define_insn_and_split "*iordi_1_bts" @@ -9711,17 +9775,37 @@ (set_attr "mode" "SI")]) (define_insn "*<code>qi_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r") - (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") - (match_operand:QI 2 "general_operand" "qn,m,rn"))) + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,k") + (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k") + (match_operand:QI 2 "general_operand" "qn,m,rn,k"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (<CODE>, QImode, operands)" - "@ - <logic>{b}\t{%2, %0|%0, %2} - <logic>{b}\t{%2, %0|%0, %2} - <logic>{l}\t{%k2, %k0|%k0, %k2}" - [(set_attr "type" "alu") - (set_attr "mode" "QI,QI,SI") +{ + switch (which_alternative) + { + case 0: + case 1: + return "<logic>{b}\t{%2, %0|%0, %2}"; + case 2: + return "<logic>{l}\t{%k2, %k0|%k0, %k2}"; + case 3: + if (TARGET_AVX512DQ) + return "k<logic>b\t{%2, %1, %0|%0, %1, %2}"; + return "k<logic>w\t{%2, %1, %0|%0, %1, %2}"; + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "*,*,*,avx512f") + (set_attr "type" "alu,alu,alu,msklog") + (set (attr "mode") + (cond [(eq_attr "alternative" "2") + (const_string "SI") + (and (eq_attr "alternative" "3") + (match_test "!TARGET_AVX512DQ")) + (const_string "HI") + ] + (const_string "QI"))) ;; Potential partial reg stall on alternative 2. (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "2") @@ -10370,31 +10454,63 @@ "split_double_mode (DImode, &operands[0], 2, &operands[0], &operands[2]);") (define_insn "*one_cmpl<mode>2_1" - [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm") - (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0")))] + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,k") + (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0,k")))] "ix86_unary_operator_ok (NOT, <MODE>mode, operands)" - "not{<imodesuffix>}\t%0" - [(set_attr "type" "negnot") + "@ + not{<imodesuffix>}\t%0 + knot<mskmodesuffix>\t{%1, %0|%0, %1}" + [(set (attr "isa") + (cond [(eq_attr "alternative" "2") + (if_then_else (eq_attr "mode" "SI,DI") + (const_string "avx512bw") + (const_string "avx512f")) + ] + (const_string "*"))) + (set_attr "type" "negnot,msklog") (set_attr "mode" "<MODE>")]) (define_insn "*one_cmplsi2_1_zext" - [(set (match_operand:DI 0 "register_operand" "=r") + [(set (match_operand:DI 0 "register_operand" "=r,k") (zero_extend:DI - (not:SI (match_operand:SI 1 "register_operand" "0"))))] + (not:SI (match_operand:SI 1 "register_operand" "0,k"))))] "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)" - "not{l}\t%k0" - [(set_attr "type" "negnot") - (set_attr "mode" "SI")]) + "@ + not{l}\t%k0 + knotd\t{%1, %0|%0, %1}" + [(set_attr "isa" "x64,avx512bw") + (set_attr "type" "negnot,msklog") + (set_attr "mode" "SI,SI")]) (define_insn "*one_cmplqi2_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r") - (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")))] + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,k") + (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))] "ix86_unary_operator_ok (NOT, QImode, operands)" - "@ - not{b}\t%0 - not{l}\t%k0" - [(set_attr "type" "negnot") - (set_attr "mode" "QI,SI") +{ + switch (which_alternative) + { + case 0: + return "not{b}\t%0"; + case 1: + return "not{l}\t%k0"; + case 2: + if (TARGET_AVX512DQ) + return "knotb\t{%1, %0|%0, %1}"; + return "knotw\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "*,*,avx512f") + (set_attr "type" "negnot,negnot,msklog") + (set (attr "mode") + (cond [(eq_attr "alternative" "1") + (const_string "SI") + (and (eq_attr "alternative" "2") + (match_test "!TARGET_AVX512DQ")) + (const_string "HI") + ] + (const_string "QI"))) ;; Potential partial reg stall on alternative 1. (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "1") diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-1.c index 94422f36010..46d9351f275 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512bw -O2" } */ -/* { dg-final { scan-assembler-times "kunpckwd\[ \\t\]+\[^\{\n\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "kunpckwd\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-3.c b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-3.c index c68ad8cc1f7..fe13f4f33fc 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-3.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-3.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512bw -O2" } */ -/* { dg-final { scan-assembler-times "kunpckwd\[ \\t\]+\[^\{\n\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "kunpckwd\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-5.c b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-5.c index 49817097e26..114e03ee93d 100644 --- a/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-5.c +++ b/gcc/testsuite/gcc.target/i386/avx512dq-kmovb-5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512dq -O2" } */ +/* { dg-options "-mavx512dq -mno-avx512bw -O2" } */ /* { dg-final { scan-assembler-times "kmovb\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-pr88465.c b/gcc/testsuite/gcc.target/i386/avx512dq-pr88465.c index a11fd26a44e..4690e7ba9e8 100644 --- a/gcc/testsuite/gcc.target/i386/avx512dq-pr88465.c +++ b/gcc/testsuite/gcc.target/i386/avx512dq-pr88465.c @@ -1,8 +1,8 @@ /* PR target/88465 */ /* { dg-do compile } */ /* { dg-options "-O2 -mavx512dq -mno-avx512bw" } */ -/* { dg-final { scan-assembler-times "kxorb\[ \t]" 1 } } */ -/* { dg-final { scan-assembler-times "kxnorb\[ \t]" 1 } } */ +/* { dg-final { scan-assembler-times "kxorb\[ \t]" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "kxnorb\[ \t]" 1 { xfail *-*-* } } } */ void foo (void) diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kmovw-5.c b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-5.c index 7bb34d34d8d..79d37394b36 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-kmovw-5.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-kmovw-5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512f -O2" } */ +/* { dg-options "-mavx512f -mno-avx512bw -O2" } */ /* { dg-final { scan-assembler-times "kmovw\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88465.c b/gcc/testsuite/gcc.target/i386/avx512f-pr88465.c index e66ea64db02..b1ab9633522 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-pr88465.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88465.c @@ -1,8 +1,8 @@ /* PR target/88465 */ /* { dg-do compile } */ /* { dg-options "-O2 -mavx512f -mno-avx512dq -mno-avx512bw" } */ -/* { dg-final { scan-assembler-times "kxorw\[ \t]" 2 } } */ -/* { dg-final { scan-assembler-times "kxnorw\[ \t]" 1 } } */ +/* { dg-final { scan-assembler-times "kxorw\[ \t]" 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "kxnorw\[ \t]" 1 { xfail *-*-* } } } */ void foo (void) diff --git a/gcc/testsuite/gcc.target/i386/bitwise_mask_op-1.c b/gcc/testsuite/gcc.target/i386/bitwise_mask_op-1.c new file mode 100644 index 00000000000..2757bcaaf50 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bitwise_mask_op-1.c @@ -0,0 +1,177 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -mno-avx512dq -O2" } */ + +#include <immintrin.h> +__m512i +foo_orq (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b); + __mmask64 m2 = _mm512_cmpeq_epi8_mask (c, d); + return _mm512_mask_add_epi8 (c, m1 | m2, a, d); +} + +/* { dg-final { scan-assembler-times "korq" "1" { target { ! ia32 } } } } */ + +__m512i +foo_ord (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b); + __mmask32 m2 = _mm512_cmpeq_epi16_mask (c, d); + return _mm512_mask_add_epi16 (c, m1 | m2, a, d); +} + +/* { dg-final { scan-assembler-times "kord" "1" } } */ + +__m512i +foo_orw (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b); + __mmask16 m2 = _mm512_cmpeq_epi32_mask (c, d); + return _mm512_mask_add_epi32 (c, m1 | m2, a, d); +} + +__m512i +foo_orb (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b); + __mmask8 m2 = _mm512_cmpeq_epi64_mask (c, d); + return _mm512_mask_add_epi64 (c, m1 | m2, a, d); +} + +/* { dg-final { scan-assembler-times "korw" "2" } } */ + +__m512i +foo_xorq (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b); + __mmask64 m2 = _mm512_cmpeq_epi8_mask (c, d); + return _mm512_mask_add_epi8 (c, m1 ^ m2, a, d); +} + +/* { dg-final { scan-assembler-times "kxorq" "1" { target { ! ia32 } } } } */ + +__m512i +foo_xord (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b); + __mmask32 m2 = _mm512_cmpeq_epi16_mask (c, d); + return _mm512_mask_add_epi16 (c, m1 ^ m2, a, d); +} + +/* { dg-final { scan-assembler-times "kxord" "1" } } */ + +__m512i +foo_xorw (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b); + __mmask16 m2 = _mm512_cmpeq_epi32_mask (c, d); + return _mm512_mask_add_epi32 (c, m1 ^ m2, a, d); +} + +__m512i +foo_xorb (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b); + __mmask8 m2 = _mm512_cmpeq_epi64_mask (c, d); + return _mm512_mask_add_epi64 (c, m1 ^ m2, a, d); +} + +/* { dg-final { scan-assembler-times "korw" "2" } } */ + +__m512i +foo_andq (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b); + __mmask64 m2 = _mm512_cmpeq_epi8_mask (c, d); + return _mm512_mask_add_epi8 (c, m1 & m2, a, d); +} + +__m512i +foo_andd (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b); + __mmask32 m2 = _mm512_cmpeq_epi16_mask (c, d); + return _mm512_mask_add_epi16 (c, m1 & m2, a, d); +} + +__m512i +foo_andw (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b); + __mmask16 m2 = _mm512_cmpeq_epi32_mask (c, d); + return _mm512_mask_add_epi32 (c, m1 & m2, a, d); +} + +__m512i +foo_andb (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b); + __mmask8 m2 = _mm512_cmpeq_epi64_mask (c, d); + return _mm512_mask_add_epi64 (c, m1 & m2, a, d); +} + +__m512i +foo_andnq (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b); + __mmask64 m2 = _mm512_cmpeq_epi8_mask (c, d); + return _mm512_mask_add_epi8 (c, m1 & ~m2, a, d); +} + +__m512i +foo_andnd (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b); + __mmask32 m2 = _mm512_cmpeq_epi16_mask (c, d); + return _mm512_mask_add_epi16 (c, m1 & ~m2, a, d); +} + +__m512i +foo_andnw (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b); + __mmask16 m2 = _mm512_cmpeq_epi32_mask (c, d); + return _mm512_mask_add_epi32 (c, m1 & ~m2, a, d); +} + +__m512i +foo_andnb (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b); + __mmask8 m2 = _mm512_cmpeq_epi64_mask (c, d); + return _mm512_mask_add_epi64 (c, m1 & ~m2, a, d); +} + +__m512i +foo_notq (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b); + return _mm512_mask_add_epi8 (c, ~m1, a, d); +} + +/* { dg-final { scan-assembler-times "knotq" "2" { target { ! ia32 } } } } */ + +__m512i +foo_notd (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b); + return _mm512_mask_add_epi16 (c, ~m1, a, d); +} + +/* { dg-final { scan-assembler-times "knotd" "2" { target { ! ia32 } } } } */ + +__m512i +foo_notw (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b); + return _mm512_mask_add_epi32 (c, ~m1, a, d); +} + +__m512i +foo_notb (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b); + return _mm512_mask_add_epi64 (c, ~m1, a, d); +} + +/* { dg-final { scan-assembler-times "knotw" "4" } } */ diff --git a/gcc/testsuite/gcc.target/i386/bitwise_mask_op-2.c b/gcc/testsuite/gcc.target/i386/bitwise_mask_op-2.c new file mode 100644 index 00000000000..277c5a98079 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bitwise_mask_op-2.c @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -mavx512dq -O2" } */ +/* { dg-final { scan-assembler-times "knotb" "2" } } */ +/* { dg-final { scan-assembler-times "korb" "1" } } */ +/* { dg-final { scan-assembler-times "kxorb" "1" } } */ +#include "bitwise_mask_op-1.c" + -- 2.18.1