Hi,

The attached patch addresses some of the remaining things as mentioned
in the PR.

Tested on sh-elf with

make -k check RUNTESTFLAGS="--target_board=sh-sim\{-m2/-ml,-m2/-mb,
-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"

Committed as r235952.

Cheers,
Oleg

gcc/ChangeLog:
        PR target/52933
        * config/sh/sh.md (*cmp_div0s_7, *cmp_div0s_8): Add div0s variants.
        * config/sh/sh.c (sh_rtx_costs): Add another div0s case.

gcc/testsuite/ChangeLog:
        PR target/52933
        * gcc.target/sh/pr52933-1.c (test_31, test_32, test_33, test_34,
        test_35, test_36, test_37, test_38, test_39, test_40): New sub-tests.
        Adjust expected instruction counts.
        * gcc.target/sh/pr52933-2.c: Adjust expected instruction counts.
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index ebdb523..809f679 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -3209,6 +3209,15 @@ sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
 	  *total = 1; //COSTS_N_INSNS (1);
 	  return true;
 	}
+
+      /* div0s variant.  */
+      if (GET_CODE (XEXP (x, 0)) == XOR
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
+	  && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+	{
+	  *total = 1;
+	  return true;
+	}
       return false;
 
     /* The cost of a sign or zero extend depends on whether the source is a
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 0ab76b5..e704e2a 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -1103,6 +1103,97 @@
 	(lshiftrt:SI (xor:SI (match_dup 0) (match_dup 1)) (const_int 31)))
    (set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))])
 
+;; In some cases, it might be shorter to get a tested bit into bit 31 and
+;; use div0s.  Otherwise it's usually better to just leave the xor and tst
+;; sequence.  The only thing we can try to do here is avoiding the large
+;; tst constant.
+(define_insn_and_split "*cmp_div0s_7"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI (xor:SI (match_operand:SI 0 "arith_reg_operand")
+				 (match_operand:SI 1 "arith_reg_operand"))
+			 (const_int 1)
+			 (match_operand 2 "const_int_operand")))]
+  "TARGET_SH1 && can_create_pseudo_p ()
+   && (INTVAL (operands[2]) == 7 || INTVAL (operands[2]) == 15
+       || INTVAL (operands[2]) == 23 || INTVAL (operands[2]) == 29
+       || INTVAL (operands[2]) == 30 || INTVAL (operands[2]) == 31)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  const int bitpos = INTVAL (operands[2]);
+
+  rtx op0 = gen_reg_rtx (SImode);
+  rtx op1 = gen_reg_rtx (SImode);
+
+  if (bitpos == 23 || bitpos == 30 || bitpos == 29)
+    {
+      emit_insn (gen_ashlsi3 (op0, operands[0], GEN_INT (31 - bitpos)));
+      emit_insn (gen_ashlsi3 (op1, operands[1], GEN_INT (31 - bitpos)));
+    }
+  else if (bitpos == 15)
+    {
+      emit_insn (gen_extendhisi2 (op0, gen_lowpart (HImode, operands[0])));
+      emit_insn (gen_extendhisi2 (op1, gen_lowpart (HImode, operands[1])));
+    }
+  else if (bitpos == 7)
+    {
+      emit_insn (gen_extendqisi2 (op0, gen_lowpart (QImode, operands[0])));
+      emit_insn (gen_extendqisi2 (op1, gen_lowpart (QImode, operands[1])));
+    }
+  else if (bitpos == 31)
+    {
+      op0 = operands[0];
+      op1 = operands[1];
+    }
+  else
+    gcc_unreachable ();
+
+  emit_insn (gen_cmp_div0s (op0, op1));
+  DONE;
+})
+
+;; For bits 0..7 using a xor and tst #imm,r0 sequence seems to be better.
+;; Thus allow the following patterns only for higher bit positions where
+;; we it's more likely to save the large tst constant.
+(define_insn_and_split "*cmp_div0s_8"
+  [(set (reg:SI T_REG)
+	(eq:SI (zero_extract:SI (match_operand:SI 0 "arith_reg_operand")
+				(const_int 1)
+				(match_operand 2 "const_int_operand"))
+	       (zero_extract:SI (match_operand:SI 1 "arith_reg_operand")
+				(const_int 1)
+				(match_dup 2))))]
+  "TARGET_SH1 && can_create_pseudo_p ()
+   && (INTVAL (operands[2]) == 15
+       || INTVAL (operands[2]) == 23 || INTVAL (operands[2]) == 29
+       || INTVAL (operands[2]) == 30 || INTVAL (operands[2]) == 31)"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI (xor:SI (match_dup 0) (match_dup 1))
+			 (const_int 1) (match_dup 2)))
+   (set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))])
+
+(define_insn_and_split "*cmp_div0s_9"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI (xor:SI (xor:SI (match_operand:SI 0 "arith_reg_operand")
+					 (match_operand:SI 1 "arith_reg_operand"))
+				 (match_operand 2 "const_int_operand"))
+			 (const_int 1)
+			 (match_operand 3 "const_int_operand")))]
+  "TARGET_SH1 && can_create_pseudo_p ()
+   && (INTVAL (operands[2]) & 0xFFFFFFFF) == (1U << INTVAL (operands[3]))
+   && (INTVAL (operands[3]) == 15
+       || INTVAL (operands[3]) == 23 || INTVAL (operands[3]) == 29
+       || INTVAL (operands[3]) == 30 || INTVAL (operands[3]) == 31)"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI (xor:SI (match_dup 0) (match_dup 1))
+			 (const_int 1) (match_dup 3)))
+   (set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))])
+
 ;; -------------------------------------------------------------------------
 ;; SImode compare and branch
 ;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/sh/pr52933-1.c b/gcc/testsuite/gcc.target/sh/pr52933-1.c
index 138de7f..81aa94f 100644
--- a/gcc/testsuite/gcc.target/sh/pr52933-1.c
+++ b/gcc/testsuite/gcc.target/sh/pr52933-1.c
@@ -4,13 +4,13 @@
    logic usually show up as redundant tst insns.  */
 /* { dg-do compile }  */
 /* { dg-options "-O2" } */
-/* { dg-final { scan-assembler-times "div0s" 32 } } */
+/* { dg-final { scan-assembler-times "div0s" 42 } } */
 /* { dg-final { scan-assembler-not "tst" } } */
 /* { dg-final { scan-assembler-not "not\t" } }  */
 /* { dg-final { scan-assembler-not "nott" } }  */
 
-/* { dg-final { scan-assembler-times "negc" 9 { target { ! sh2a } } } }  */
-/* { dg-final { scan-assembler-times "movrt" 9 { target { sh2a } } } }  */
+/* { dg-final { scan-assembler-times "negc" 10 { target { ! sh2a } } } }  */
+/* { dg-final { scan-assembler-times "movrt" 10 { target { sh2a } } } }  */
 
 typedef unsigned char bool;
 
@@ -212,3 +212,75 @@ test_30 (int a, int b)
 {
   return ((a >> 31) ^ (b >> 31)) & 1;
 }
+
+// -------------------------------------------------------
+
+bool
+test_31 (int a, int b)
+{
+  /* 2x exts.w, div0s  */
+  return ((a & 0x8000) ^ (b & 0x8000)) != 0;
+}
+
+bool
+test_32 (int a, int b)
+{
+  /* 2x exts.w, div0s  */
+  return (a & 0x8000) != (b & 0x8000);
+}
+
+bool
+test_33 (int a, int b)
+{
+  /* 2x add/shll, div0s  */
+  return ((a & (1<<30)) ^ (b & (1<<30))) != 0;
+}
+
+bool
+test_34 (int a, int b)
+{
+  /* 2x exts.b, div0s  */
+  return (a & 0x80) != (b & 0x80);
+}
+
+bool
+test_35 (signed char a, signed char b)
+{
+  /* 2x exts.b, div0s  */
+  return (a < 0) != (b < 0);
+}
+
+bool
+test_36 (short a, short b)
+{
+  /* 2x exts.w, div0s  */
+  return (a < 0) != (b < 0);
+}
+
+int
+test_37 (short a, short b)
+{
+  /* 2x exts.w, div0s  */
+  return (a < 0) != (b < 0) ? 40 : -10;
+}
+
+bool
+test_38 (int a, int b)
+{
+  /* 2x shll8, div0s  */
+  return ((a & (1<<23)) ^ (b & (1<<23))) != 0;
+}
+
+bool
+test_39 (int a, int b)
+{
+  /* 2x shll2, div0s  */
+  return ((a & (1<<29)) ^ (b & (1<<29))) != 0;
+}
+
+bool
+test_40 (short a, short b)
+{
+  /* 2x exts.w, div0s, negc  */
+  return (a < 0) == (b < 0);
+}
diff --git a/gcc/testsuite/gcc.target/sh/pr52933-2.c b/gcc/testsuite/gcc.target/sh/pr52933-2.c
index 4637f0e..2b5d09a 100644
--- a/gcc/testsuite/gcc.target/sh/pr52933-2.c
+++ b/gcc/testsuite/gcc.target/sh/pr52933-2.c
@@ -5,12 +5,12 @@
    logic usually show up as redundant tst insns.  */
 /* { dg-do compile }  */
 /* { dg-options "-O2 -mpretend-cmove" } */
-/* { dg-final { scan-assembler-times "div0s" 32 } } */
+/* { dg-final { scan-assembler-times "div0s" 42 } } */
 /* { dg-final { scan-assembler-not "tst" } } */
 /* { dg-final { scan-assembler-not "not\t" } }  */
 /* { dg-final { scan-assembler-not "nott" } }  */
 
-/* { dg-final { scan-assembler-times "negc" 9 { target { ! sh2a } } } }  */
-/* { dg-final { scan-assembler-times "movrt" 9 { target { sh2a } } } }  */
+/* { dg-final { scan-assembler-times "negc" 10 { target { ! sh2a } } } }  */
+/* { dg-final { scan-assembler-times "movrt" 10 { target { sh2a } } } }  */
 
 #include "pr52933-1.c"

Reply via email to