Hello, The attached patch improves the utilization of the addc and subc instructions as mentioned in the PR. Tested on rev 190396 with make -k check RUNTESTFLAGS="--target_board=sh-sim \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
and no new failures. I've also briefly checked the CSiBE result-size for 'm4-single -ml -O2 -mpretend-cmove'. There is a little bit of noise here and there due to differences in insn scheduling/ordering. Looking at some parts of mpeg2dec, the new insn sequences seem to be beneficial for a couple of functions. OK? Cheers, Oleg ChangeLog: PR target/54236 * config/sh/sh.md (addc): Add commutative modifier. (*addc, *minus_plus_one, *subc, *negc): New insns and splits. testsuite/ChangeLog: PR target/54236 * gcc.target/sh/pr54236-1.c: New.
Index: gcc/testsuite/gcc.target/sh/pr54236-1.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr54236-1.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr54236-1.c (revision 0) @@ -0,0 +1,76 @@ +/* Tests to check the utilization of addc, subc and negc instructions in + special cases. If everything works as expected we won't see any + movt instructions in these cases. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m5*"} { "" } } */ +/* { dg-final { scan-assembler-times "addc" 3 } } */ +/* { dg-final { scan-assembler-times "subc" 3 } } */ +/* { dg-final { scan-assembler-times "sett" 4 } } */ +/* { dg-final { scan-assembler-times "negc" 1 } } */ +/* { dg-final { scan-assembler-not "movt" } } */ + +int +test_00 (int a, int b, int c, int d) +{ + /* 1x addc, 1x sett */ + return a + b + 1; +} + +int +test_01 (int a, int b, int c, int d) +{ + /* 1x addc */ + return a + (c == d); +} + +int +test_02 (int a, int b, int c, int d) +{ + /* 1x subc, 1x sett */ + return a - b - 1; +} + +int +test_03 (int a, int b, int c, int d) +{ + /* 1x subc */ + return a - (c == d); +} + +int +test_04 (int a, int b, int c, int d) +{ + /* 1x addc, 1x sett */ + return a + b + c + 1; +} + +int +test_05 (int a, int b, int c, int d) +{ + /* 1x subc, 1x sett */ + return a - b - c - 1; +} + +int +test_06 (int a, int b, int c, int d) +{ + /* 1x negc */ + return 0 - a - (b == c); +} + +int +test_07 (int *vec) +{ + /* Must not see a 'sett' or 'addc' here. + This is a case where combine tries to produce + 'a + (0 - b) + 1' out of 'a - b + 1'. */ + int z = vec[0]; + int vi = vec[1]; + int zi = vec[2]; + + if (zi != 0 && z < -1) + vi -= (((vi >> 7) & 0x01) << 1) - 1; + + return vi; +} Index: gcc/config/sh/sh.md =================================================================== --- gcc/config/sh/sh.md (revision 190396) +++ gcc/config/sh/sh.md (working copy) @@ -1686,7 +1686,7 @@ (define_insn "addc" [(set (match_operand:SI 0 "arith_reg_dest" "=r") - (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "0") + (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0") (match_operand:SI 2 "arith_reg_operand" "r")) (reg:SI T_REG))) (set (reg:SI T_REG) @@ -1695,6 +1695,76 @@ "addc %2,%0" [(set_attr "type" "arith")]) +;; A simplified version of the addc insn, where the exact value of the +;; T bit doesn't matter. This is easier for combine to pick up. +;; We allow a reg or 0 for one of the operands in order to be able to +;; do 'reg + T' sequences. Reload will load the constant 0 into the reg +;; as needed. +(define_insn "*addc" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0") + (match_operand:SI 2 "arith_reg_or_0_operand" "r")) + (match_operand:SI 3 "t_reg_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "addc %2,%0" + [(set_attr "type" "arith")]) + +;; Split 'reg + reg + 1' into a sett addc sequence, as it can be scheduled +;; better, if the sett insn can be done early. +(define_insn_and_split "*addc" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "arith_reg_operand" "")) + (const_int 1))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& 1" + [(set (reg:SI T_REG) (const_int 1)) + (parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 1) (match_dup 2)) + (reg:SI T_REG))) + (clobber (reg:SI T_REG))])]) + +;; Sometimes combine will try to do 'reg + (0-reg) + 1' if the *addc pattern +;; matched. Split this up into a simple sub add sequence, as this will save +;; us one sett insn. +(define_insn_and_split "*minus_plus_one" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (plus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "arith_reg_operand" "")) + (const_int 1)))] + "TARGET_SH1" + "#" + "&& 1" + [(set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))]) + +;; Split 'reg + T' into 'reg + 0 + T' to utilize the addc insn. +;; If the 0 constant can be CSE-ed, this becomes a one instruction +;; operation, as opposed to sequences such as +;; movt r2 +;; add r2,r3 +;; +;; Even if the constant is not CSE-ed, a sequence such as +;; mov #0,r2 +;; addc r2,r3 +;; can be scheduled much better since the load of the constant can be +;; done earlier, before any comparison insns that store the result in +;; the T bit. +(define_insn_and_split "*addc" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (plus:SI (match_operand:SI 1 "t_reg_operand" "") + (match_operand:SI 2 "arith_reg_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (plus:SI (plus:SI (match_dup 2) (const_int 0)) + (match_dup 1))) + (clobber (reg:SI T_REG))])]) + (define_expand "addsi3" [(set (match_operand:SI 0 "arith_reg_operand" "") (plus:SI (match_operand:SI 1 "arith_operand" "") @@ -1805,6 +1875,63 @@ "subc %2,%0" [(set_attr "type" "arith")]) +;; A simplified version of the subc insn, where the exact value of the +;; T bit doesn't matter. This is easier for combine to pick up. +;; We allow a reg or 0 for one of the operands in order to be able to +;; do 'reg - T' sequences. Reload will load the constant 0 into the reg +;; as needed. +(define_insn "*subc" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_or_0_operand" "r")) + (match_operand:SI 3 "t_reg_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "subc %2,%0" + [(set_attr "type" "arith")]) + +;; Split reg - reg - 1 into a sett subc sequence, as it can be scheduled +;; better, if the sett insn can be done early. +;; Notice that combine turns 'a - b - 1' into 'a + (~b)'. +(define_insn_and_split "*subc" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (plus:SI (not:SI (match_operand:SI 1 "arith_reg_operand" "")) + (match_operand:SI 2 "arith_reg_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& 1" + [(set (reg:SI T_REG) (const_int 1)) + (parallel [(set (match_dup 0) + (minus:SI (minus:SI (match_dup 2) (match_dup 1)) + (reg:SI T_REG))) + (clobber (reg:SI T_REG))])]) + +;; Split 'reg - T' into 'reg - 0 - T' to utilize the subc insn. +;; If the 0 constant can be CSE-ed, this becomes a one instruction +;; operation, as opposed to sequences such as +;; movt r2 +;; sub r2,r3 +;; +;; Even if the constant is not CSE-ed, a sequence such as +;; mov #0,r2 +;; subc r2,r3 +;; can be scheduled much better since the load of the constant can be +;; done earlier, before any comparison insns that store the result in +;; the T bit. +(define_insn_and_split "*subc" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (minus:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "t_reg_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (minus:SI (minus:SI (match_dup 1) (const_int 0)) + (match_dup 2))) + (clobber (reg:SI T_REG))])]) + (define_insn "*subsi3_internal" [(set (match_operand:SI 0 "arith_reg_dest" "=r") (minus:SI (match_operand:SI 1 "arith_reg_operand" "0") @@ -4528,6 +4655,19 @@ "negc %1,%0" [(set_attr "type" "arith")]) +;; A simplified version of the negc insn, where the exact value of the +;; T bit doesn't matter. This is easier for combine to pick up. +;; Notice that '0 - x - 1' is the same as '~x', thus we don't specify +;; extra patterns for this case. +(define_insn "*negc" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (minus:SI (neg:SI (match_operand:SI 1 "arith_reg_operand" "r")) + (match_operand:SI 2 "t_reg_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "negc %1,%0" + [(set_attr "type" "arith")]) + (define_insn "*negdi_media" [(set (match_operand:DI 0 "arith_reg_dest" "=r") (neg:DI (match_operand:DI 1 "arith_reg_operand" "r")))]