From da9f359a0cd3915ffa42ad6cf192a18788015136 Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Tue, 1 Mar 2022 13:41:52 +0800
Subject: [PATCH v2] Expand __builtin_memcmp_eq with ptest for OImode.

gcc/ChangeLog:

	PR target/104610
	* config/i386/i386-expand.cc (ix86_expand_branch): Use ptest
	for QImode when code is EQ or NE.
	* config/i386/i386.md (cbranchoi4): New expander.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr104610.c: New test.
---
 gcc/config/i386/i386-expand.cc           |  9 ++++++++-
 gcc/config/i386/i386.md                  | 16 ++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr104610.c | 13 +++++++++++++
 3 files changed, 37 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr104610.c

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 0fd3028c205..b26c626a48a 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -2267,12 +2267,19 @@ ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
 
   /* Handle special case - vector comparsion with boolean result, transform
      it using ptest instruction.  */
-  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+      || mode == OImode)
     {
       rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG);
       machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode;
 
       gcc_assert (code == EQ || code == NE);
+      if (mode == OImode)
+	{
+	  op0 = lowpart_subreg (p_mode, force_reg (mode, op0), mode);
+	  op1 = lowpart_subreg (p_mode, force_reg (mode, op1), mode);
+	  mode = p_mode;
+	}
       /* Generate XOR since we can't check that one operand is zero vector.  */
       tmp = gen_reg_rtx (mode);
       emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1)));
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index f9c06ff302a..76bb56542da 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1338,6 +1338,22 @@ (define_expand "cbranch<mode>4"
   DONE;
 })
 
+(define_expand "cbranchoi4"
+  [(set (reg:CC FLAGS_REG)
+	(compare:CC (match_operand:OI 1 "nonimmediate_operand")
+		    (match_operand:OI 2 "nonimmediate_operand")))
+   (set (pc) (if_then_else
+	       (match_operator 0 "bt_comparison_operator"
+		[(reg:CC FLAGS_REG) (const_int 0)])
+	       (label_ref (match_operand 3))
+	       (pc)))]
+  "TARGET_AVX"
+{
+  ix86_expand_branch (GET_CODE (operands[0]),
+		      operands[1], operands[2], operands[3]);
+  DONE;
+})
+
 (define_expand "cstore<mode>4"
   [(set (reg:CC FLAGS_REG)
 	(compare:CC (match_operand:SWIM 2 "nonimmediate_operand")
diff --git a/gcc/testsuite/gcc.target/i386/pr104610.c b/gcc/testsuite/gcc.target/i386/pr104610.c
new file mode 100644
index 00000000000..fe39cbe5b8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr104610.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx -mmove-max=256 -mstore-max=256" } */
+/* { dg-final { scan-assembler-times {(?n)vptest.*ymm} 1 } } */
+/* { dg-final { scan-assembler-times {sete} 1 } } */
+/* { dg-final { scan-assembler-not {(?n)je.*L[0-9]} } } */
+/* { dg-final { scan-assembler-not {(?n)jne.*L[0-9]} } } */
+
+
+_Bool f256(char *a)
+{
+  char t[] = "0123456789012345678901234567890";
+  return __builtin_memcmp(a, &t[0], sizeof(t)) == 0;
+}
-- 
2.18.1

