Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Observed below new pass with -march=native on SPR.
gcc: gcc.dg/vect/vect-reduc-bool-4.c -flto -ffat-lto-objects
scan-tree-dump-times vect "optimized: loop vectorized" 2
gcc: gcc.dg/vect/vect-reduc-bool-4.c scan-tree-dump-times vect "optimized: loop
vectorized" 2
gcc: gcc.dg/vect/vect-reduc-bool-8.c -flto -ffat-lto-objects
scan-tree-dump-times vect "optimized: loop vectorized" 2
gcc: gcc.dg/vect/vect-reduc-bool-8.c scan-tree-dump-times vect "optimized: loop
vectorized" 2
unix/-m32: gcc: gcc.dg/vect/vect-reduc-bool-3.c -flto -ffat-lto-objects
scan-tree-dump-times vect "optimized: loop vectorized" 2
unix/-m32: gcc: gcc.dg/vect/vect-reduc-bool-3.c scan-tree-dump-times vect
"optimized: loop vectorized" 2
unix/-m32: gcc: gcc.dg/vect/vect-reduc-bool-4.c -flto -ffat-lto-objects
scan-tree-dump-times vect "optimized: loop vectorized" 2
unix/-m32: gcc: gcc.dg/vect/vect-reduc-bool-4.c scan-tree-dump-times vect
"optimized: loop vectorized" 2
unix/-m32: gcc: gcc.dg/vect/vect-reduc-bool-7.c -flto -ffat-lto-objects
scan-tree-dump-times vect "optimized: loop vectorized" 2
unix/-m32: gcc: gcc.dg/vect/vect-reduc-bool-7.c scan-tree-dump-times vect
"optimized: loop vectorized" 2
unix/-m32: gcc: gcc.dg/vect/vect-reduc-bool-8.c -flto -ffat-lto-objects
scan-tree-dump-times vect "optimized: loop vectorized" 2
unix/-m32: gcc: gcc.dg/vect/vect-reduc-bool-8.c scan-tree-dump-times vect
"optimized: loop vectorized" 2
Ready push to trunk..
gcc/ChangeLog:
PR target/101639
* config/i386/sse.md
(reduc_sbool_and_scal_<mode>): New expander.
(reduc_sbool_ior_scal_<mode>): Ditto.
(reduc_sbool_xor_scal_<mode>): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr101639_reduc_mask_di.c: New test.
* gcc.target/i386/pr101639_reduc_mask_hi.c: New test.
* gcc.target/i386/pr101639_reduc_mask_qi.c: New test.
* gcc.target/i386/pr101639_reduc_mask_si.c: New test.
---
gcc/config/i386/sse.md | 82 +++++++++++++++++++
.../gcc.target/i386/pr101639_reduc_mask_di.c | 30 +++++++
.../gcc.target/i386/pr101639_reduc_mask_hi.c | 30 +++++++
.../gcc.target/i386/pr101639_reduc_mask_qi.c | 30 +++++++
.../gcc.target/i386/pr101639_reduc_mask_si.c | 30 +++++++
5 files changed, 202 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_di.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_hi.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_qi.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_si.c
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 8b28c8edb19..444dc7a7cbc 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4013,6 +4013,88 @@ (define_expand "reduc_umin_scal_v8hi"
DONE;
})
+(define_expand "reduc_sbool_and_scal_<mode>"
+ [(match_operand:QI 0 "register_operand")
+ (match_operand:SWI1248_AVX512BWDQ 1 "register_operand")
+ (match_operand:SI 2 "const_int_operand")]
+ "TARGET_AVX512F"
+{
+ int n_elt = INTVAL (operands[2]);
+ rtx op2 = CONSTM1_RTX (<MODE>mode);
+ rtx op1 = operands[1];
+ if (n_elt < 8)
+ {
+ op2 = gen_int_mode ((1u << n_elt) - 1, QImode);
+ op1 = gen_reg_rtx (QImode);
+ emit_insn (gen_andqi3 (op1, operands[1], op2));
+ }
+ ix86_expand_setcc (operands[0], EQ, op1, op2);
+ DONE;
+})
+
+(define_expand "reduc_sbool_ior_scal_<mode>"
+ [(match_operand:QI 0 "register_operand")
+ (match_operand:SWI1248_AVX512BWDQ 1 "register_operand")
+ (match_operand:SI 2 "const_int_operand")]
+ "TARGET_AVX512F"
+{
+ int n_elt = INTVAL (operands[2]);
+ rtx op1 = operands[1];
+ if (n_elt < 8)
+ {
+ rtx op2 = gen_int_mode ((1u << n_elt) - 1, QImode);
+ op1 = gen_reg_rtx (QImode);
+ emit_insn (gen_andqi3 (op1, operands[1], op2));
+ }
+ ix86_expand_setcc (operands[0], NE,
+ op1, CONST0_RTX (<MODE>mode));
+ DONE;
+})
+
+(define_expand "reduc_sbool_xor_scal_<mode>"
+ [(match_operand:QI 0 "register_operand")
+ (match_operand:SWI1248_AVX512BWDQ 1 "register_operand")
+ (match_operand:SI 2 "const_int_operand")]
+ "TARGET_AVX512F && TARGET_POPCNT
+ && (TARGET_64BIT || <MODE>mode != DImode)"
+{
+ rtx popcnt1, op1 = operands[1];
+ int n_elt = INTVAL (operands[2]);
+ if (n_elt < 8)
+ {
+ rtx op2 = gen_int_mode ((1u << n_elt) - 1, QImode);
+ op1 = gen_reg_rtx (QImode);
+ emit_insn (gen_andqi3 (op1, operands[1], op2));
+ }
+
+ switch (<MODE_SIZE>)
+ {
+ case 1:
+ case 2:
+ op1 = gen_reg_rtx (SImode);
+ emit_move_insn (op1, gen_rtx_ZERO_EXTEND (SImode, operands[1]));
+ /* FALLTHRU. */
+ case 4:
+ popcnt1 = gen_reg_rtx (SImode);
+ emit_insn (gen_popcountsi2 (popcnt1, op1));
+ emit_insn (gen_andsi3 (popcnt1, popcnt1, GEN_INT (0x1)));
+ break;
+
+ case 8:
+ popcnt1 = gen_reg_rtx (DImode);
+ emit_insn (gen_popcountdi2 (popcnt1, op1));
+ emit_insn (gen_anddi3 (popcnt1, popcnt1, GEN_INT (0x1)));
+ break;
+
+ default:
+ gcc_unreachable ();
+
+ }
+
+ emit_move_insn (operands[0], gen_lowpart (QImode, popcnt1));
+ DONE;
+})
+
(define_insn "<mask_codefor>reducep<mode><mask_name><round_saeonly_name>"
[(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
(unspec:VFH_AVX512VL
diff --git a/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_di.c
b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_di.c
new file mode 100644
index 00000000000..c46555a524c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_di.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-times "kortest" 2 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "setc" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "setne" 1 } } */
+/* { dg-final { scan-assembler-times "popcnt" 1 { target { ! ia32 } } } } */
+
+bool f(char * p, long n)
+{
+ bool r = true;
+ for(long i = 0; i < 64; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool f2(char * p, long n)
+{
+ bool r = false;
+ for(long i = 0; i < 64; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+bool f3(char * p, long n)
+{
+ bool r = false;
+ for(long i = 0; i < 64; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_hi.c
b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_hi.c
new file mode 100644
index 00000000000..74dc3d41f0b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_hi.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-times "kortest" 2 } } */
+/* { dg-final { scan-assembler-times "setc" 1 } } */
+/* { dg-final { scan-assembler-times "setne" 1 } } */
+/* { dg-final { scan-assembler-times "popcnt" 1 } } */
+
+bool f(char * p, long n)
+{
+ bool r = true;
+ for(long i = 0; i < 16; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool f2(char * p, long n)
+{
+ bool r = false;
+ for(long i = 0; i < 16; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+bool f3(char * p, long n)
+{
+ bool r = false;
+ for(long i = 0; i < 16; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_qi.c
b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_qi.c
new file mode 100644
index 00000000000..ccc39d54418
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_qi.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-times "kortest" 2 } } */
+/* { dg-final { scan-assembler-times "setc" 1 } } */
+/* { dg-final { scan-assembler-times "setne" 1 } } */
+/* { dg-final { scan-assembler-times "popcnt" 1 } } */
+
+bool f(int * p, long n)
+{
+ bool r = true;
+ for(long i = 0; i < 8; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool f2(int * p, long n)
+{
+ bool r = false;
+ for(long i = 0; i < 8; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+bool f3(int * p, long n)
+{
+ bool r = false;
+ for(long i = 0; i < 8; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_si.c
b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_si.c
new file mode 100644
index 00000000000..b0c8736ccb7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101639_reduc_mask_si.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-times "kortest" 2 } } */
+/* { dg-final { scan-assembler-times "setc" 1 } } */
+/* { dg-final { scan-assembler-times "setne" 1 } } */
+/* { dg-final { scan-assembler-times "popcnt" 1 } } */
+
+bool f(char * p, long n)
+{
+ bool r = true;
+ for(long i = 0; i < 32; ++i)
+ r &= (p[i] != 0);
+ return r;
+}
+
+bool f2(char * p, long n)
+{
+ bool r = false;
+ for(long i = 0; i < 32; ++i)
+ r |= (p[i] != 0);
+ return r;
+}
+
+bool f3(char * p, long n)
+{
+ bool r = false;
+ for(long i = 0; i < 32; ++i)
+ r ^= (p[i] != 0);
+ return r;
+}
--
2.34.1