Changes from v3:
* Added New testcase.
Changes from v2:
* Some formatting.
Changes from v1:
* Added more info to commit message fixed indentation.
This patch removes redundant vector compare instructions and logic
from the vec_first_mismatch_or_eos_index intrinsic.
Currently, GCC generates extra vcmpneb instructions and additional
masking logic (xxland, xxlorc) to handle EOS and mismatch comparisons.
However, a single vcmpnezb instruction already suffices, as it covers
both By eliminating the redundant comparisons (vcmpneb) and the
associated logic (xxland/xxlorc) we produce shorter,
more efficient code.
Bootstrapped and tested on powerpc64le-linux-gnu with no regressions.
2025-10-22 Vijay Shankar <[email protected]>
gcc/ChangeLog:
PR target/116004
* config/rs6000/vsx.md (first_mismatch_or_eos_index): Remove redundant
emit_insns.
gcc/testsuite/ChangeLog:
PR target/116004
* gcc.target/powerpc/pr116004.c: New Test.
---
gcc/config/rs6000/vsx.md | 22 ++------
gcc/testsuite/gcc.target/powerpc/pr116004.c | 58 +++++++++++++++++++++
2 files changed, 61 insertions(+), 19 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/powerpc/pr116004.c
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index cfad9b8c6..3c2319a53 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5668,29 +5668,13 @@
"TARGET_P9_VECTOR"
{
int sh;
- rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
- rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
- rtx cmpz_result = gen_reg_rtx (<MODE>mode);
- rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
- rtx and_result = gen_reg_rtx (<MODE>mode);
rtx result = gen_reg_rtx (<MODE>mode);
- rtx vzero = gen_reg_rtx (<MODE>mode);
-
- /* Vector with zeros in elements that correspond to zeros in operands. */
- emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
- emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
- emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
- emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
+ /* Vector with ones in elements that do not match or elements corresponding
+ to zeros in operands. */
- /* Vector with ones in elments that match. */
- emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
+ emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (result, operands[1],
operands[2]));
- emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
-
- /* Create vector with ones in elements where there was a zero in one of
- the source elements or the elements did not match. */
- emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
if (<MODE>mode == V16QImode)
diff --git a/gcc/testsuite/gcc.target/powerpc/pr116004.c
b/gcc/testsuite/gcc.target/powerpc/pr116004.c
new file mode 100644
index 000000000..2d0982d1b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr116004.c
@@ -0,0 +1,58 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-final { scan-assembler-times {\mvcmpnezb\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvcmpnezh\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mvcmpnezw\M} 2 } } */
+/* { dg-final { scan-assembler-not "vcmpneb " } } */
+/* { dg-final { scan-assembler-not "vcmpneh " } } */
+/* { dg-final { scan-assembler-not "vcmpnew " } } */
+
+#include <altivec.h>
+#include <stdint.h>
+
+int main(void) {
+ vector signed char char_src1, char_src2;
+ vector unsigned char uchar_src1, uchar_src2;
+ vector signed short short_src1, short_src2;
+ vector unsigned short ushort_src1, ushort_src2;
+ vector signed int int_src1, int_src2;
+ vector unsigned int uint_src1, uint_src2;
+
+ volatile unsigned int r1, r2, r3, r4, r5, r6;
+
+ /* signed char */
+ char_src1 = (vector signed char) {-1, 2, 3, 0, -5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ char_src2 = (vector signed char) {2, 3, 20, 0, -5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ r1 = vec_first_mismatch_or_eos_index(char_src1, char_src2);
+
+ /* unsigned char */
+ uchar_src1 = (vector unsigned char) {1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ uchar_src2 = (vector unsigned char) {1, 0, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16};
+ r2 = vec_first_mismatch_or_eos_index(uchar_src1, uchar_src2);
+
+ /* signed short */
+ short_src1 = (vector signed short) {-10, -20, 30, 40, 50, 60, 70, 80};
+ short_src2 = (vector signed short) {-10, 20, 30, 40, 50, 60, 70, 80};
+ r3 = vec_first_mismatch_or_eos_index(short_src1, short_src2);
+
+ /* unsigned short */
+ ushort_src1 = (vector unsigned short) {10, 20, 30, 40, 50, 60, 70, 0};
+ ushort_src2 = (vector unsigned short) {10, 20, 30, 40, 50, 60, 70, 80};
+ r4 = vec_first_mismatch_or_eos_index(ushort_src1, ushort_src2);
+
+ /* signed int */
+ int_src1 = (vector signed int) {1, 2, 3, 4};
+ int_src2 = (vector signed int) {1, 20, 3, 4};
+ r5 = vec_first_mismatch_or_eos_index(int_src1, int_src2);
+
+ /* unsigned int */
+ uint_src1 = (vector unsigned int) {1, 2, 3, 0};
+ uint_src2 = (vector unsigned int) {1, 2, 3, 0};
+ r6 = vec_first_mismatch_or_eos_index(uint_src1, uint_src2);
+
+ return 0;
+}
--
2.47.3