Hello, This patch mainly improves stores of negated/inverted floating point comparison results in regs and removes a useless zero-extension after storing the negated T bit in a reg.
One thing that is annoying is the fact that the '-1' constant is emitted during combine and thus won't get any chance of being CSE-ed in any way. This results in multiple regs being loaded with the '-1' constant, although one would suffice. For integer comparisons, I've fixed this a while ago by loading the constant '-1' into a reg in the expander and emitting an insn that 'uses' that reg. However, this won't work for floating-point comparisons, because the cstoresf expander never sees the inversion. Is there a way to somehow merge pseudos that will be loaded with the same constant values before register allocation, possibly also lifting those constant loads outside of loops? Tested on rev 190151 with make -k check RUNTESTFLAGS="--target_board=sh-sim \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}" and no new failures. OK? Cheers, Oleg ChangeLog: PR target/51244 * config/sh/sh.md: Add negc extu sequence peephole. (movrt, movnegt, movrt_negc, nott): Use t_reg_operand predicate. (*movrt_negc): New insn. * config/sh/sync.md (atomic_test_and_set): Pass gen_t_reg_rtx to gen_movnegt. * config/sh/sh.c (expand_cbranchsi4, sh_emit_scc_to_t, sh_emit_compare_and_branch, sh_emit_compare_and_set): Use get_t_reg_rtx. (sh_expand_t_scc): Pass gen_t_reg_rtx to gen_movnegt. testsuite/ChangeLog: PR target/51244 * gcc.target/sh/pr51244-5: New. * gcc.target/sh/pr51244-6: New.
Index: gcc/testsuite/gcc.target/sh/pr51244-5.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr51244-5.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr51244-5.c (revision 0) @@ -0,0 +1,50 @@ +/* Check that no unnecessary sign or zero extension insn is generated after + a negc or movrt insn that stores the inverted T bit in a reg. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O2" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */ +/* { dg-final { scan-assembler-not "extu|exts" } } */ + +int +test_00 (int a, int b, int* c, short* d, int x) +{ + *d = x != 0; + *c = -1; + + if (x != 0) + return a > 0; + + return 0; +} + +unsigned char +test_01 (int x) +{ + if (x < 58 && x > 47) + return 1; + return 0; +} + +char +test_02 (int x) +{ + if (x < 58 && x > 47) + return 1; + return 0; +} + +unsigned short +test_03 (int x) +{ + if (x < 58 && x > 47) + return 1; + return 0; +} + +short +test_04 (int x) +{ + if (x < 58 && x > 47) + return 1; + return 0; +} Index: gcc/testsuite/gcc.target/sh/pr51244-6.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr51244-6.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr51244-6.c (revision 0) @@ -0,0 +1,15 @@ +/* Check that no unnecessary sign or zero extension insn is generated after + a negc or movrt insn that stores the inverted T bit in a reg. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m1" "-m2" "-m3" "-m4al" "*nofpu" "-m4-340*" "-m4-400*" "-m4-500*" "-m5*" } { "" } } */ +/* { dg-final { scan-assembler-not "extu|exts" } } */ + +float +test_00 (float q[4], float m[9]) +{ + float s0 = m[0] + m[1]; + float s1 = m[0] - m[1]; + + return q[s0 > s1 ? 0 : 1]; +} Index: gcc/config/sh/sh.md =================================================================== --- gcc/config/sh/sh.md (revision 189953) +++ gcc/config/sh/sh.md (working copy) @@ -9515,7 +9515,7 @@ (define_insn "movrt" [(set (match_operand:SI 0 "arith_reg_dest" "=r") - (xor:SI (reg:SI T_REG) (const_int 1)))] + (xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1)))] "TARGET_SH2A" "movrt %0" [(set_attr "type" "arith")]) @@ -9668,28 +9668,66 @@ (define_expand "movnegt" [(set (match_operand:SI 0 "arith_reg_dest" "") - (xor:SI (reg:SI T_REG) (const_int 1)))] - "" + (xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1)))] + "TARGET_SH1" { if (TARGET_SH2A) - emit_insn (gen_movrt (operands[0])); + emit_insn (gen_movrt (operands[0], operands[1])); else { rtx val = force_reg (SImode, gen_int_mode (-1, SImode)); - emit_insn (gen_movrt_negc (operands[0], val)); + emit_insn (gen_movrt_negc (operands[0], operands[1], val)); } DONE; }) (define_insn "movrt_negc" [(set (match_operand:SI 0 "arith_reg_dest" "=r") - (xor:SI (reg:SI T_REG) (const_int 1))) + (xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1))) (set (reg:SI T_REG) (const_int 1)) - (use (match_operand:SI 1 "arith_reg_operand" "r"))] + (use (match_operand:SI 2 "arith_reg_operand" "r"))] "TARGET_SH1" - "negc %1,%0" + "negc %2,%0" [(set_attr "type" "arith")]) +;; The -1 constant will not be CSE-ed for the *movrt_negc pattern, but the +;; pattern can be used by the combine pass. Using a scratch reg for the +;; -1 constant results in slightly better register allocations compared to +;; generating a pseudo reg before reload. +(define_insn_and_split "*movrt_negc" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1))) + (clobber (match_scratch:SI 2 "=r")) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && ! TARGET_SH2A" + "#" + "&& reload_completed" + [(set (match_dup 2) (const_int -1)) + (parallel + [(set (match_dup 0) (xor:SI (match_dup 1) (const_int 1))) + (set (reg:SI T_REG) (const_int 1)) + (use (match_dup 2))])]) + +;; In some cases the zero extension does not get combined away and a +;; sequence like the following might remain: +;; mov #-1,r2 +;; tst r1,r1 +;; negc r2,r1 +;; extu.b r1,r1 +(define_peephole2 + [(parallel + [(set (match_operand:SI 0 "arith_reg_dest" "") + (xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1))) + (set (reg:SI T_REG) (const_int 1)) + (use (match_operand:SI 2 "arith_reg_operand" ""))]) + (set (match_dup 0) + (zero_extend:SI (match_operand 3 "arith_reg_operand" "")))] + "TARGET_SH1 && REGNO (operands[0]) == REGNO (operands[3])" + [(parallel + [(set (match_dup 0) (xor:SI (match_dup 1) (const_int 1))) + (set (reg:SI T_REG) (const_int 1)) + (use (match_dup 2))])]) + ;; The *negnegt pattern helps the combine pass to figure out how to fold ;; an explicit double T bit negation. (define_insn_and_split "*negnegt" @@ -9729,7 +9767,8 @@ [(const_int 0)]) (define_insn_and_split "nott" - [(set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))] + [(set (reg:SI T_REG) + (xor:SI (match_operand:SI 0 "t_reg_operand" "") (const_int 1)))] "TARGET_SH1" { gcc_assert (TARGET_SH2A); Index: gcc/config/sh/sh.c =================================================================== --- gcc/config/sh/sh.c (revision 189953) +++ gcc/config/sh/sh.c (working copy) @@ -1893,7 +1893,7 @@ branch_expander = gen_branch_false; default: ; } - emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG), + emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (), gen_rtx_fmt_ee (comparison, SImode, operands[1], operands[2]))); jump = emit_jump_insn (branch_expander (operands[3], get_t_reg_rtx ())); @@ -2129,7 +2129,7 @@ void sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1) { - rtx t_reg = gen_rtx_REG (SImode, T_REG); + rtx t_reg = get_t_reg_rtx (); enum rtx_code oldcode = code; enum machine_mode mode; @@ -2304,7 +2304,7 @@ } insn = gen_rtx_SET (VOIDmode, - gen_rtx_REG (SImode, T_REG), + get_t_reg_rtx (), gen_rtx_fmt_ee (branch_code, SImode, op0, op1)); sh_emit_set_t_insn (insn, mode); @@ -2369,9 +2369,9 @@ if (lab) emit_label (lab); if (invert) - emit_insn (gen_movnegt (operands[0])); + emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ())); else - emit_move_insn (operands[0], gen_rtx_REG (SImode, T_REG)); + emit_move_insn (operands[0], get_t_reg_rtx ()); } /* Functions to output assembly code. */ @@ -12121,7 +12121,7 @@ if ((code == EQ && val == 1) || (code == NE && val == 0)) emit_insn (gen_movt (result, get_t_reg_rtx ())); else if ((code == EQ && val == 0) || (code == NE && val == 1)) - emit_insn (gen_movnegt (result)); + emit_insn (gen_movnegt (result, get_t_reg_rtx ())); else if (code == EQ || code == NE) emit_insn (gen_move_insn (result, GEN_INT (code == NE))); else Index: gcc/config/sh/sync.md =================================================================== --- gcc/config/sh/sync.md (revision 189953) +++ gcc/config/sh/sync.md (working copy) @@ -830,7 +830,7 @@ /* The result of the test op is the inverse of what we are supposed to return. Thus invert the T bit. The inversion will be potentially optimized away and integrated into surrounding code. */ - emit_insn (gen_movnegt (operands[0])); + emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ())); DONE; })