Hello, The attached patch restores the SH fma combine patterns which I removed when adding support for the fma patterns in 4.8. It turned out that without these patterns things like 'a * b + a' won't utilize the fmac instruction. As far as I understand, this is actually a tree optimization issue, but having these combine patterns might be a good idea anyway.
Tested on rev 199069 with make -k check RUNTESTFLAGS="--target_board=sh-sim \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}" and no new failures. OK for trunk? I also would like to apply this to the 4.8 branch after it has been opened again. OK? Cheers, Oleg gcc/ChangeLog: PR target/56547 * config/sh/sh.md (fmasf4): Remove empty constraints strings. (*fmasf4, *fmasf4_media): New insns. testsuite/ChangeLog: PR target/56547 * gcc.target/sh/pr56547-1.c: New. * gcc.target/sh/pr56547-2.c: New.
Index: gcc/config/sh/sh.md =================================================================== --- gcc/config/sh/sh.md (revision 199069) +++ gcc/config/sh/sh.md (working copy) @@ -12251,10 +12251,10 @@ ;; FMA (fused multiply-add) patterns (define_expand "fmasf4" - [(set (match_operand:SF 0 "fp_arith_reg_operand" "") - (fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "") - (match_operand:SF 2 "fp_arith_reg_operand" "") - (match_operand:SF 3 "fp_arith_reg_operand" "")))] + [(set (match_operand:SF 0 "fp_arith_reg_operand") + (fma:SF (match_operand:SF 1 "fp_arith_reg_operand") + (match_operand:SF 2 "fp_arith_reg_operand") + (match_operand:SF 3 "fp_arith_reg_operand")))] "TARGET_SH2E || TARGET_SHMEDIA_FPU" { if (TARGET_SH2E) @@ -12285,6 +12285,43 @@ "fmac.s %1, %2, %0" [(set_attr "type" "fparith_media")]) +;; For some cases such as 'a * b + a' the FMA pattern is not generated by +;; previous transformations. If FMA is generally allowed, let the combine +;; pass utilize it. +(define_insn_and_split "*fmasf4" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%w") + (match_operand:SF 2 "fp_arith_reg_operand" "f")) + (match_operand:SF 3 "arith_reg_operand" "0"))) + (use (match_operand:PSI 4 "fpscr_operand"))] + "TARGET_SH2E && flag_fp_contract_mode != FP_CONTRACT_OFF" + "fmac %1,%2,%0" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (fma:SF (match_dup 1) (match_dup 2) (match_dup 3))) + (use (match_dup 4))])] +{ + /* Change 'b * a + a' into 'a * b + a'. + This is better for register allocation. */ + if (REGNO (operands[2]) == REGNO (operands[3])) + { + rtx tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } +} + [(set_attr "type" "fp") + (set_attr "fp_mode" "single")]) + +(define_insn "*fmasf4_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")) + (match_operand:SF 3 "fp_arith_reg_operand" "0")))] + "TARGET_SHMEDIA_FPU && flag_fp_contract_mode != FP_CONTRACT_OFF" + "fmac.s %1, %2, %0" + [(set_attr "type" "fparith_media")]) + (define_expand "divsf3" [(set (match_operand:SF 0 "arith_reg_operand" "") (div:SF (match_operand:SF 1 "arith_reg_operand" "") Index: gcc/testsuite/gcc.target/sh/pr56547-1.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr56547-1.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr56547-1.c (revision 0) @@ -0,0 +1,19 @@ +/* Verify that the fmac insn is used for the expression 'a * b + a' and + 'a * a + a'. + This assumes that the default compiler setting is -ffp-contract=fast. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m1" "-m2" "-m3" "-m4al" "*nofpu" "-m4-340*" "-m4-400*" "-m4-500*" "-m5*" } { "" } } */ +/* { dg-final { scan-assembler-times "fmac" 2 } } */ + +float +test_00 (float a, float b) +{ + return a * b + a; +} + +float +test_01 (float a) +{ + return a * a + a; +} Index: gcc/testsuite/gcc.target/sh/pr56547-2.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr56547-2.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr56547-2.c (revision 0) @@ -0,0 +1,18 @@ +/* Verify that the fmac insn is used for the expression 'a * b + a' and + 'a * a + a' when -ffast-math is specified. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1 -ffast-math" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m1" "-m2" "-m3" "-m4al" "*nofpu" "-m4-340*" "-m4-400*" "-m4-500*" "-m5*" } { "" } } */ +/* { dg-final { scan-assembler-times "fmac" 2 } } */ + +float +test_00 (float a, float b) +{ + return a * b + a; +} + +float +test_01 (float a) +{ + return a * a + a; +}