I have a reload/register allocation question and possible patch. While working on the Aarch64 SIMD ABI[1] I ran into a problem where GCC was saving and restoring registers that it did not need to. I tracked it down to lra-constraints.c and its use of targetm.hard_regno_call_part_clobbered on instructions that are not calls. Specifically need_for_call_save_p would check this macro even when the instruction in question (unknown to need_for_call_save_p) was not a call instruction.
This seems wrong to me and I was wondering if anyone more familiar with the register allocator and reload could look at this patch and tell me if it seems reasonable or not. It passed bootstrap and I am running tests now. I am just wondering if there is any reason why this target function would need to be called on non-call instructions or if doing so is just an oversight/bug. Steve Ellcey sell...@cavium.com [1] https://gcc.gnu.org/ml/gcc/2018-07/msg00012.html 2018-07-11 Steve Ellcey <sell...@cavium.com> * lra-constraints.c (need_for_call_save_p): Add insn argument and only check targetm.hard_regno_call_part_clobbered on calls. (need_for_split_p): Add insn argument, pass to need_for_call_save_p. (split_reg): Pass insn to need_for_call_save_p. (split_if_necessary): Pass curr_insn to need_for_split_p. (inherit_in_ebb): Ditto. diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index 7eeec76..7fc8e7f 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -5344,7 +5344,7 @@ inherit_reload_reg (bool def_p, int original_regno, /* Return true if we need a caller save/restore for pseudo REGNO which was assigned to a hard register. */ static inline bool -need_for_call_save_p (int regno) +need_for_call_save_p (int regno, rtx_insn *insn) { lra_assert (regno >= FIRST_PSEUDO_REGISTER && reg_renumber[regno] >= 0); return (usage_insns[regno].calls_num < calls_num @@ -5354,7 +5354,7 @@ need_for_call_save_p (int regno) ? lra_reg_info[regno].actual_call_used_reg_set : call_used_reg_set, PSEUDO_REGNO_MODE (regno), reg_renumber[regno]) - || (targetm.hard_regno_call_part_clobbered + || (CALL_P (insn) && targetm.hard_regno_call_part_clobbered (reg_renumber[regno], PSEUDO_REGNO_MODE (regno))))); } @@ -5374,7 +5374,8 @@ static bitmap_head ebb_global_regs; assignment pass because of too many generated moves which will be probably removed in the undo pass. */ static inline bool -need_for_split_p (HARD_REG_SET potential_reload_hard_regs, int regno) +need_for_split_p (HARD_REG_SET potential_reload_hard_regs, + int regno, rtx_insn *insn) { int hard_regno = regno < FIRST_PSEUDO_REGISTER ? regno : reg_renumber[regno]; @@ -5416,7 +5417,8 @@ need_for_split_p (HARD_REG_SET potential_reload_hard_regs, int regno) || (regno >= FIRST_PSEUDO_REGISTER && lra_reg_info[regno].nrefs > 3 && bitmap_bit_p (&ebb_global_regs, regno)))) - || (regno >= FIRST_PSEUDO_REGISTER && need_for_call_save_p (regno))); + || (regno >= FIRST_PSEUDO_REGISTER + && need_for_call_save_p (regno, insn))); } /* Return class for the split pseudo created from original pseudo with @@ -5536,7 +5538,7 @@ split_reg (bool before_p, int original_regno, rtx_insn *insn, nregs = hard_regno_nregs (hard_regno, mode); rclass = lra_get_allocno_class (original_regno); original_reg = regno_reg_rtx[original_regno]; - call_save_p = need_for_call_save_p (original_regno); + call_save_p = need_for_call_save_p (original_regno, insn); } lra_assert (hard_regno >= 0); if (lra_dump_file != NULL) @@ -5759,7 +5761,7 @@ split_if_necessary (int regno, machine_mode mode, && INSN_UID (next_usage_insns) < max_uid) || (GET_CODE (next_usage_insns) == INSN_LIST && (INSN_UID (XEXP (next_usage_insns, 0)) < max_uid))) - && need_for_split_p (potential_reload_hard_regs, regno + i) + && need_for_split_p (potential_reload_hard_regs, regno + i, insn) && split_reg (before_p, regno + i, insn, next_usage_insns, NULL)) res = true; return res; @@ -6529,7 +6531,8 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) && usage_insns[j].check == curr_usage_insns_check && (next_usage_insns = usage_insns[j].insns) != NULL_RTX) { - if (need_for_split_p (potential_reload_hard_regs, j)) + if (need_for_split_p (potential_reload_hard_regs, j, + curr_insn)) { if (lra_dump_file != NULL && head_p) {