https://gcc.gnu.org/g:87b2de785ab1a887456645bfce2257ed98c98a29
commit r16-4568-g87b2de785ab1a887456645bfce2257ed98c98a29 Author: liuhongt <[email protected]> Date: Wed Oct 15 00:30:01 2025 -0700 Support reduc_sbool_{and,ior,xor}_scal_m for avx512 kmask. gcc/ChangeLog: PR target/101639 * config/i386/sse.md (reduc_sbool_and_scal_<mode>): New expander. (reduc_sbool_ior_scal_<mode>): Ditto. (reduc_sbool_xor_scal_<mode>): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr101639_reduc_mask_di.c: New test. * gcc.target/i386/pr101639_reduc_mask_hi.c: New test. * gcc.target/i386/pr101639_reduc_mask_qi.c: New test. * gcc.target/i386/pr101639_reduc_mask_si.c: New test. Diff: --- gcc/config/i386/sse.md | 82 ++++++++++++++++++++++ .../gcc.target/i386/pr101639_reduc_mask_di.c | 30 ++++++++ .../gcc.target/i386/pr101639_reduc_mask_hi.c | 30 ++++++++ .../gcc.target/i386/pr101639_reduc_mask_qi.c | 30 ++++++++ .../gcc.target/i386/pr101639_reduc_mask_si.c | 30 ++++++++ 5 files changed, 202 insertions(+) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 4ad17f67b9dc..b1918c462867 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4013,6 +4013,88 @@ DONE; }) +(define_expand "reduc_sbool_and_scal_<mode>" + [(match_operand:QI 0 "register_operand") + (match_operand:SWI1248_AVX512BWDQ 1 "register_operand") + (match_operand:SI 2 "const_int_operand")] + "TARGET_AVX512F" +{ + int n_elt = INTVAL (operands[2]); + rtx op2 = CONSTM1_RTX (<MODE>mode); + rtx op1 = operands[1]; + if (n_elt < 8) + { + op2 = gen_int_mode ((1u << n_elt) - 1, QImode); + op1 = gen_reg_rtx (QImode); + emit_insn (gen_andqi3 (op1, operands[1], op2)); + } + ix86_expand_setcc (operands[0], EQ, op1, op2); + DONE; +}) + +(define_expand "reduc_sbool_ior_scal_<mode>" + [(match_operand:QI 0 "register_operand") + (match_operand:SWI1248_AVX512BWDQ 1 "register_operand") + (match_operand:SI 2 "const_int_operand")] + "TARGET_AVX512F" +{ + int n_elt = INTVAL (operands[2]); + rtx op1 = operands[1]; + if (n_elt < 8) + { + rtx op2 = gen_int_mode ((1u << n_elt) - 1, QImode); + op1 = gen_reg_rtx (QImode); + emit_insn (gen_andqi3 (op1, operands[1], op2)); + } + ix86_expand_setcc (operands[0], NE, + op1, CONST0_RTX (<MODE>mode)); + DONE; +}) + +(define_expand "reduc_sbool_xor_scal_<mode>" + [(match_operand:QI 0 "register_operand") + (match_operand:SWI1248_AVX512BWDQ 1 "register_operand") + (match_operand:SI 2 "const_int_operand")] + "TARGET_AVX512F && TARGET_POPCNT + && (TARGET_64BIT || <MODE>mode != DImode)" +{ + rtx popcnt1, op1 = operands[1]; + int n_elt = INTVAL (operands[2]); + if (n_elt < 8) + { + rtx op2 = gen_int_mode ((1u << n_elt) - 1, QImode); + op1 = gen_reg_rtx (QImode); + emit_insn (gen_andqi3 (op1, operands[1], op2)); + } + + switch (<MODE_SIZE>) + { + case 1: + case 2: + op1 = gen_reg_rtx (SImode); + emit_move_insn (op1, gen_rtx_ZERO_EXTEND (SImode, operands[1])); + /* FALLTHRU. */ + case 4: + popcnt1 = gen_reg_rtx (SImode); + emit_insn (gen_popcountsi2 (popcnt1, op1)); + emit_insn (gen_andsi3 (popcnt1, popcnt1, GEN_INT (0x1))); + break; + + case 8: + popcnt1 = gen_reg_rtx (DImode); + emit_insn (gen_popcountdi2 (popcnt1, op1)); + emit_insn (gen_anddi3 (popcnt1, popcnt1, GEN_INT (0x1))); + break; + + default: + gcc_unreachable (); + + } + + emit_move_insn (operands[0], gen_lowpart (QImode, popcnt1)); + DONE; +}) + (define_insn "<mask_codefor>reducep<mode><mask_name><round_saeonly_name>" [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v") (unspec:VFH_AVX512VL diff --git a/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_di.c b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_di.c new file mode 100644 index 000000000000..c46555a524c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_di.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-times "kortest" 2 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "setc" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "setne" 1 } } */ +/* { dg-final { scan-assembler-times "popcnt" 1 { target { ! ia32 } } } } */ + +bool f(char * p, long n) +{ + bool r = true; + for(long i = 0; i < 64; ++i) + r &= (p[i] != 0); + return r; +} + +bool f2(char * p, long n) +{ + bool r = false; + for(long i = 0; i < 64; ++i) + r |= (p[i] != 0); + return r; +} + +bool f3(char * p, long n) +{ + bool r = false; + for(long i = 0; i < 64; ++i) + r ^= (p[i] != 0); + return r; +} diff --git a/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_hi.c b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_hi.c new file mode 100644 index 000000000000..74dc3d41f0bc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_hi.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-times "kortest" 2 } } */ +/* { dg-final { scan-assembler-times "setc" 1 } } */ +/* { dg-final { scan-assembler-times "setne" 1 } } */ +/* { dg-final { scan-assembler-times "popcnt" 1 } } */ + +bool f(char * p, long n) +{ + bool r = true; + for(long i = 0; i < 16; ++i) + r &= (p[i] != 0); + return r; +} + +bool f2(char * p, long n) +{ + bool r = false; + for(long i = 0; i < 16; ++i) + r |= (p[i] != 0); + return r; +} + +bool f3(char * p, long n) +{ + bool r = false; + for(long i = 0; i < 16; ++i) + r ^= (p[i] != 0); + return r; +} diff --git a/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_qi.c b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_qi.c new file mode 100644 index 000000000000..ccc39d54418f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_qi.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-times "kortest" 2 } } */ +/* { dg-final { scan-assembler-times "setc" 1 } } */ +/* { dg-final { scan-assembler-times "setne" 1 } } */ +/* { dg-final { scan-assembler-times "popcnt" 1 } } */ + +bool f(int * p, long n) +{ + bool r = true; + for(long i = 0; i < 8; ++i) + r &= (p[i] != 0); + return r; +} + +bool f2(int * p, long n) +{ + bool r = false; + for(long i = 0; i < 8; ++i) + r |= (p[i] != 0); + return r; +} + +bool f3(int * p, long n) +{ + bool r = false; + for(long i = 0; i < 8; ++i) + r ^= (p[i] != 0); + return r; +} diff --git a/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_si.c b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_si.c new file mode 100644 index 000000000000..b0c8736ccb71 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_si.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-times "kortest" 2 } } */ +/* { dg-final { scan-assembler-times "setc" 1 } } */ +/* { dg-final { scan-assembler-times "setne" 1 } } */ +/* { dg-final { scan-assembler-times "popcnt" 1 } } */ + +bool f(char * p, long n) +{ + bool r = true; + for(long i = 0; i < 32; ++i) + r &= (p[i] != 0); + return r; +} + +bool f2(char * p, long n) +{ + bool r = false; + for(long i = 0; i < 32; ++i) + r |= (p[i] != 0); + return r; +} + +bool f3(char * p, long n) +{ + bool r = false; + for(long i = 0; i < 32; ++i) + r ^= (p[i] != 0); + return r; +}
