https://gcc.gnu.org/g:7d297806214d84daba029568463e1e95224b797f
commit r16-5219-g7d297806214d84daba029568463e1e95224b797f Author: liuhongt <[email protected]> Date: Tue Nov 11 00:19:19 2025 -0800 Optimize kmov + kmov + or to kortest. For instruction sequence like kmovb %k0, %edx kmovb %k1, %ecx orb %cl, %dl je .L5 if only CCZ is cared, it can be optimized to kortestb %k1, %k0 je .L5 gcc/ChangeLog: * config/i386/i386.md (*ior<mode>_ccz_1): New define_insn. gcc/testsuite/ChangeLog: * gcc.target/i386/kortest_ccz-1.c: New test. Diff: --- gcc/config/i386/i386.md | 16 ++++++++++++++++ gcc/testsuite/gcc.target/i386/kortest_ccz-1.c | 13 +++++++++++++ 2 files changed, 29 insertions(+) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 3ea2439526be..f3c8f595de91 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -14203,6 +14203,22 @@ (set_attr "isa" "*,apx_ndd") (set_attr "mode" "SI")]) +;; It must be put before *<code><mode>_3, the blow one. +(define_insn "*ior<mode>_ccz_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (ior:SWI1248_AVX512BWDQ_64 + (match_operand:SWI1248_AVX512BWDQ_64 1 "nonimmediate_operand" "%0,?k") + (match_operand:SWI1248_AVX512BWDQ_64 2 "<general_operand>" "<g>, k")) + (const_int 0))) + (clobber (match_scratch:SWI1248_AVX512BWDQ_64 0 "=<r>, X"))] + "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + or{<imodesuffix>}\t{%2, %0|%0, %2} + kortest<mskmodesuffix>\t{%1, %2|%2, %1}" + [(set_attr "type" "alu,msklog") + (set_attr "mode" "<MODE>")]) + (define_insn "*<code><mode>_3" [(set (reg FLAGS_REG) (compare (any_or:SWI diff --git a/gcc/testsuite/gcc.target/i386/kortest_ccz-1.c b/gcc/testsuite/gcc.target/i386/kortest_ccz-1.c new file mode 100644 index 000000000000..b3cf5b803fbe --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/kortest_ccz-1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O3" } */ +/* { dg-final { scan-assembler-not "kmov" } } */ +/* { dg-final { scan-assembler "kortest" } } */ + +int +foo (int *__restrict a, int* __restrict d, int b, int c, int n) +{ + for (int i = 0; i != 10000; i++) + if (a[i] > b | d[i] > c) + return 1; + return 0; +}
