Richard, This patch: - adds the for TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS required clobbers in CALL_INSN_FUNCTION_USAGE, - sets TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true, which enables the fuse-caller-save optimisation, and - adds an arm fuse-caller-save test-case.
Build and tested on arm-linux-gnueabi. OK for trunk? Thanks, - Tom
2014-06-01 Radovan Obradovic <robrado...@mips.com> Tom de Vries <t...@codesourcery.com> * config/arm/arm-protos.h (arm_emit_call_insn): Add bool parameter. * config/arm/arm.c (TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS): Redefine to true. (arm_emit_call_insn): Add and use sibcall parameter. Add IP and CC clobbers to CALL_INSN_FUNCTION_USAGE. (define_expand "sibcall_internal") (define_expand "sibcall_value_internal"): New. (define_expand "call", define_expand "call_value"): Add argument to arm_emit_call_insn. (define_expand "sibcall"): Use sibcall_internal and arm_emit_call_insn. (define_expand "sibcall_value"): Use sibcall_value_internal and arm_emit_call_insn. * gcc.target/arm/fuse-caller-save.c: New test. --- gcc/config/arm/arm-protos.h | 2 +- gcc/config/arm/arm.c | 16 ++++++++++++- gcc/config/arm/arm.md | 30 +++++++++++++++++++++++-- gcc/testsuite/gcc.target/arm/fuse-caller-save.c | 26 +++++++++++++++++++++ 4 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/fuse-caller-save.c diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 74645ee..524fd83 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -126,7 +126,7 @@ extern int arm_const_double_inline_cost (rtx); extern bool arm_const_double_by_parts (rtx); extern bool arm_const_double_by_immediates (rtx); extern const char *fp_immediate_constant (rtx); -extern void arm_emit_call_insn (rtx, rtx); +extern void arm_emit_call_insn (rtx, rtx, bool); extern const char *output_call (rtx *); extern const char *output_call_mem (rtx *); void arm_emit_movpair (rtx, rtx); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 1117bd4..34e0977 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -679,6 +679,9 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p +#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS +#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true + struct gcc_target targetm = TARGET_INITIALIZER; /* Obstack for minipool constant handling. */ @@ -17603,7 +17606,7 @@ vfp_emit_fstmd (int base_reg, int count) the call target. */ void -arm_emit_call_insn (rtx pat, rtx addr) +arm_emit_call_insn (rtx pat, rtx addr, bool sibcall) { rtx insn; @@ -17614,6 +17617,7 @@ arm_emit_call_insn (rtx pat, rtx addr) to the instruction's CALL_INSN_FUNCTION_USAGE. */ if (TARGET_VXWORKS_RTP && flag_pic + && !sibcall && GET_CODE (addr) == SYMBOL_REF && (SYMBOL_REF_DECL (addr) ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr)) @@ -17622,6 +17626,16 @@ arm_emit_call_insn (rtx pat, rtx addr) require_pic_register (); use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg); } + + if (TARGET_AAPCS_BASED) + { + /* For AAPCS, IP and CC can be clobbered by veneers inserted by the + linker. We need to add these to allow + arm_call_fusage_contains_non_callee_clobbers to return true. */ + rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn); + clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM)); + clobber_reg (fusage, gen_rtx_REG (word_mode, CC_REGNUM)); + } } /* Output a 'call' insn. */ diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 75d0541..1209730 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -9090,7 +9090,7 @@ XEXP (operands[0], 0) = force_reg (Pmode, callee); pat = gen_call_internal (operands[0], operands[1], operands[2]); - arm_emit_call_insn (pat, XEXP (operands[0], 0)); + arm_emit_call_insn (pat, XEXP (operands[0], 0), false); DONE; }" ) @@ -9200,7 +9200,7 @@ pat = gen_call_value_internal (operands[0], operands[1], operands[2], operands[3]); - arm_emit_call_insn (pat, XEXP (operands[1], 0)); + arm_emit_call_insn (pat, XEXP (operands[1], 0), false); DONE; }" ) @@ -9350,6 +9350,12 @@ (set_attr "type" "call")] ) +(define_expand "sibcall_internal" + [(parallel [(call (match_operand 0 "memory_operand" "") + (match_operand 1 "general_operand" "")) + (return) + (use (match_operand 2 "" ""))])]) + ;; We may also be able to do sibcalls for Thumb, but it's much harder... (define_expand "sibcall" [(parallel [(call (match_operand 0 "memory_operand" "") @@ -9359,6 +9365,8 @@ "TARGET_32BIT" " { + rtx pat; + if ((!REG_P (XEXP (operands[0], 0)) && GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF) || (GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF @@ -9367,9 +9375,20 @@ if (operands[2] == NULL_RTX) operands[2] = const0_rtx; + + pat = gen_sibcall_internal (operands[0], operands[1], operands[2]); + arm_emit_call_insn (pat, operands[0], true); + DONE; }" ) +(define_expand "sibcall_value_internal" + [(parallel [(set (match_operand 0 "" "") + (call (match_operand 1 "memory_operand" "") + (match_operand 2 "general_operand" ""))) + (return) + (use (match_operand 3 "" ""))])]) + (define_expand "sibcall_value" [(parallel [(set (match_operand 0 "" "") (call (match_operand 1 "memory_operand" "") @@ -9379,6 +9398,8 @@ "TARGET_32BIT" " { + rtx pat; + if ((!REG_P (XEXP (operands[1], 0)) && GET_CODE (XEXP (operands[1], 0)) != SYMBOL_REF) || (GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF @@ -9387,6 +9408,11 @@ if (operands[3] == NULL_RTX) operands[3] = const0_rtx; + + pat = gen_sibcall_value_internal (operands[0], operands[1], + operands[2], operands[3]); + arm_emit_call_insn (pat, operands[1], true); + DONE; }" ) diff --git a/gcc/testsuite/gcc.target/arm/fuse-caller-save.c b/gcc/testsuite/gcc.target/arm/fuse-caller-save.c new file mode 100644 index 0000000..a4f7e98 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/fuse-caller-save.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fuse-caller-save" } */ +/* Testing -fuse-caller-save optimization option. */ + +static int __attribute__((noinline)) +bar (int x) +{ + return x + 3; +} + +int __attribute__((noinline)) +foo (int y) +{ + return y + bar (y); +} + +int +main (void) +{ + return !(foo (5) == 13); +} + +/* For thumb1, r3 is considered likely spilled, and treated differently in + ira_build_conflicts, which inhibits the fuse-caller-save optimization. */ +/* { dg-final { scan-assembler-times "mov\tr3, r0" 1 { target { ! arm_thumb1 } } } } */ + -- 1.9.1