[SH] PR 51244 - Improve store of floating-point comparison

Oleg Endo Wed, 08 Aug 2012 14:53:01 -0700

Hello,

This patch mainly improves stores of negated/inverted floating point
comparison results in regs and removes a useless zero-extension after
storing the negated T bit in a reg.


One thing that is annoying is the fact that the '-1' constant is emitted
during combine and thus won't get any chance of being CSE-ed in any way.
This results in multiple regs being loaded with the '-1' constant,
although one would suffice.
For integer comparisons, I've fixed this a while ago by loading the
constant '-1' into a reg in the expander and emitting an insn that
'uses' that reg.  However, this won't work for floating-point
comparisons, because the cstoresf expander never sees the inversion.

Is there a way to somehow merge pseudos that will be loaded with the
same constant values before register allocation, possibly also lifting
those constant loads outside of loops?

Tested on rev 190151 with
 make -k check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"

and no new failures.
OK?

Cheers,
Oleg

ChangeLog:
        PR target/51244
        * config/sh/sh.md: Add negc extu sequence peephole.
        (movrt, movnegt, movrt_negc, nott): Use t_reg_operand predicate.
        (*movrt_negc): New insn.
        * config/sh/sync.md (atomic_test_and_set): Pass gen_t_reg_rtx to
        gen_movnegt.
        * config/sh/sh.c (expand_cbranchsi4, sh_emit_scc_to_t, 
        sh_emit_compare_and_branch, sh_emit_compare_and_set): Use
        get_t_reg_rtx.
        (sh_expand_t_scc): Pass gen_t_reg_rtx to gen_movnegt.

testsuite/ChangeLog:
        PR target/51244
        * gcc.target/sh/pr51244-5: New.
        * gcc.target/sh/pr51244-6: New.

Index: gcc/testsuite/gcc.target/sh/pr51244-5.c
===================================================================
--- gcc/testsuite/gcc.target/sh/pr51244-5.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr51244-5.c	(revision 0)
@@ -0,0 +1,50 @@
+/* Check that no unnecessary sign or zero extension insn is generated after
+   a negc or movrt insn that stores the inverted T bit in a reg.  */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O2" } */
+/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */
+/* { dg-final { scan-assembler-not "extu|exts" } } */
+
+int
+test_00 (int a, int b, int* c, short* d, int x)
+{
+  *d = x != 0;
+  *c = -1;
+
+  if (x != 0)
+    return a > 0;
+
+  return 0;
+}
+
+unsigned char
+test_01 (int x)
+{
+  if (x < 58 && x > 47)
+    return 1;
+  return 0;
+}
+
+char
+test_02 (int x)
+{
+  if (x < 58 && x > 47)
+    return 1;
+  return 0;
+}
+
+unsigned short
+test_03 (int x)
+{
+  if (x < 58 && x > 47)
+    return 1;
+  return 0;
+}
+
+short
+test_04 (int x)
+{
+  if (x < 58 && x > 47)
+    return 1;
+  return 0;
+}
Index: gcc/testsuite/gcc.target/sh/pr51244-6.c
===================================================================
--- gcc/testsuite/gcc.target/sh/pr51244-6.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr51244-6.c	(revision 0)
@@ -0,0 +1,15 @@
+/* Check that no unnecessary sign or zero extension insn is generated after
+   a negc or movrt insn that stores the inverted T bit in a reg.  */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O1" } */
+/* { dg-skip-if "" { "sh*-*-*" } { "-m1" "-m2" "-m3" "-m4al" "*nofpu" "-m4-340*" "-m4-400*" "-m4-500*" "-m5*" } { "" } }  */
+/* { dg-final { scan-assembler-not "extu|exts" } } */
+
+float
+test_00 (float q[4], float m[9])
+{
+  float s0 = m[0] + m[1];
+  float s1 = m[0] - m[1];
+
+  return q[s0 > s1 ?  0 : 1];
+}
Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 189953)
+++ gcc/config/sh/sh.md	(working copy)
@@ -9515,7 +9515,7 @@
 
 (define_insn "movrt"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r")
-	(xor:SI (reg:SI T_REG) (const_int 1)))]
+	(xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1)))]
   "TARGET_SH2A"
   "movrt	%0"
   [(set_attr "type" "arith")])
@@ -9668,28 +9668,66 @@
 
 (define_expand "movnegt"
   [(set (match_operand:SI 0 "arith_reg_dest" "")
-	(xor:SI (reg:SI T_REG) (const_int 1)))]
-  ""
+	(xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1)))]
+  "TARGET_SH1"
 {
   if (TARGET_SH2A)
-    emit_insn (gen_movrt (operands[0]));
+    emit_insn (gen_movrt (operands[0], operands[1]));
   else
     {
       rtx val = force_reg (SImode, gen_int_mode (-1, SImode));
-      emit_insn (gen_movrt_negc (operands[0], val));
+      emit_insn (gen_movrt_negc (operands[0], operands[1], val));
     }
   DONE;
 })
 
 (define_insn "movrt_negc"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r")
-	(xor:SI (reg:SI T_REG) (const_int 1)))
+	(xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1)))
    (set (reg:SI T_REG) (const_int 1))
-   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+   (use (match_operand:SI 2 "arith_reg_operand" "r"))]
   "TARGET_SH1"
-  "negc	%1,%0"
+  "negc	%2,%0"
   [(set_attr "type" "arith")])
 
+;; The -1 constant will not be CSE-ed for the *movrt_negc pattern, but the
+;; pattern can be used by the combine pass.  Using a scratch reg for the
+;; -1 constant results in slightly better register allocations compared to
+;; generating a pseudo reg before reload.
+(define_insn_and_split "*movrt_negc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1)))
+   (clobber (match_scratch:SI 2 "=r"))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && ! TARGET_SH2A"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (const_int -1))
+   (parallel
+       [(set (match_dup 0) (xor:SI (match_dup 1) (const_int 1)))
+	(set (reg:SI T_REG) (const_int 1))
+	(use (match_dup 2))])])
+
+;; In some cases the zero extension does not get combined away and a 
+;; sequence like the following might remain:
+;;	mov	#-1,r2
+;;	tst	r1,r1
+;;	negc	r2,r1
+;;	extu.b	r1,r1
+(define_peephole2
+  [(parallel
+       [(set (match_operand:SI 0 "arith_reg_dest" "")
+	     (xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1)))
+	(set (reg:SI T_REG) (const_int 1))
+	(use (match_operand:SI 2 "arith_reg_operand" ""))])
+   (set (match_dup 0)
+	(zero_extend:SI (match_operand 3 "arith_reg_operand" "")))]
+  "TARGET_SH1 && REGNO (operands[0]) == REGNO (operands[3])"
+  [(parallel
+       [(set (match_dup 0) (xor:SI (match_dup 1) (const_int 1)))
+	(set (reg:SI T_REG) (const_int 1))
+	(use (match_dup 2))])])
+
 ;; The *negnegt pattern helps the combine pass to figure out how to fold 
 ;; an explicit double T bit negation.
 (define_insn_and_split "*negnegt"
@@ -9729,7 +9767,8 @@
   [(const_int 0)])
 
 (define_insn_and_split "nott"
-  [(set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))]
+  [(set (reg:SI T_REG)
+	(xor:SI (match_operand:SI 0 "t_reg_operand" "") (const_int 1)))]
   "TARGET_SH1"
 {
   gcc_assert (TARGET_SH2A);
Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c	(revision 189953)
+++ gcc/config/sh/sh.c	(working copy)
@@ -1893,7 +1893,7 @@
       branch_expander = gen_branch_false;
     default: ;
     }
-  emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG),
+  emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
                           gen_rtx_fmt_ee (comparison, SImode,
                                           operands[1], operands[2])));
   jump = emit_jump_insn (branch_expander (operands[3], get_t_reg_rtx ()));
@@ -2129,7 +2129,7 @@
 void
 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
 {
-  rtx t_reg = gen_rtx_REG (SImode, T_REG);
+  rtx t_reg = get_t_reg_rtx ();
   enum rtx_code oldcode = code;
   enum machine_mode mode;
 
@@ -2304,7 +2304,7 @@
     }
 
   insn = gen_rtx_SET (VOIDmode,
-		      gen_rtx_REG (SImode, T_REG),
+		      get_t_reg_rtx (),
 		      gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
 
   sh_emit_set_t_insn (insn, mode);
@@ -2369,9 +2369,9 @@
   if (lab)
     emit_label (lab);
   if (invert)
-    emit_insn (gen_movnegt (operands[0]));
+    emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
   else
-    emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG));
+    emit_move_insn (operands[0], get_t_reg_rtx ());
 }
 
 /* Functions to output assembly code.  */
@@ -12121,7 +12121,7 @@
   if ((code == EQ && val == 1) || (code == NE && val == 0))
     emit_insn (gen_movt (result, get_t_reg_rtx ()));
   else if ((code == EQ && val == 0) || (code == NE && val == 1))
-    emit_insn (gen_movnegt (result));
+    emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
   else if (code == EQ || code == NE)
     emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
   else
Index: gcc/config/sh/sync.md
===================================================================
--- gcc/config/sh/sync.md	(revision 189953)
+++ gcc/config/sh/sync.md	(working copy)
@@ -830,7 +830,7 @@
   /* The result of the test op is the inverse of what we are
      supposed to return.  Thus invert the T bit.  The inversion will be
      potentially optimized away and integrated into surrounding code.  */
-  emit_insn (gen_movnegt (operands[0]));
+  emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
   DONE;
 })

[SH] PR 51244 - Improve store of floating-point comparison

Reply via email to