Yoshinori Sato <[email protected]> writes:
> The LRA was confused and looping due to the definition of the register class.
> The LRA yielded incorrect results because the manipulation of stack frames
> and the movement of double words relied heavily on existing reloads.
> These changes ensure that the correct code is generated even when the "-mlra"
> option is specified.
>
> v4 changes.
> - Use post_ra_split_completed in addsi3_lra and ashlsi3_lra.
> - revert -mlra option.
> - Unnecessary changes have been reverted.
> - some clean up.
>
> PR113948
> ChangeLog:
> * gcc/config/rx/rx-protos.h (rx_split_double_move): New helper
> prototype.
> (rx_relax_double_operands): Likewise.
> * gcc/config/rx/rx.cc (rx_legitimize_address): Add expand complex
> case.
> (rx_is_legitimate_address): Add double word case.
> (rx_gen_move_template): Fix operation size in unsigned extend.
> (rx_gen_move_template): Remove DImode and DFmode.
> (rx_get_stack_layout): Fix for frame size calculation.
> (rx_initial_elimination_offset): The calculation method has been
> changed to one that supports LRA.
> (rx_hard_regno_nregs): Use CEIL.
> (rx_hard_regno_mode_ok): Add ATTRIBUTE_UNUSED.
> (rx_modes_tieable_p): Add int case.
> (rx_get_subword): New. Double word move helper.
> (rx_split_double_move): Likewise.
> (rx_relax_double_operands): Likewise.
> * gcc/config/rx/rx.h (reg_class): Add CC for all regsisters.
> (CLASS_MAX_NREGS): Remove.
> * gcc/config/rx/rx.md (mov<register_modes:mode>):
> Replace copy_to_mode_reg to force_reg.
> (movdi): Limit the arguments to make register allocation easier.
> (movdf): Likewise.
> (movdi_internal): New.
> (movdf_internal): New.
> (addsi3_pid): New. Handling UNSPEC_PID_ADDR.
> (addsi3_lra): New. alternative addptrsi3.
> (ashlsi3_lra): likewise.
>
> Signed-off-by: Yoshinori Sato <[email protected]>
Thanks, this mostly looks really good to me. Just a couple of minor
comments below:
> ---
> gcc/config/rx/rx-protos.h | 2 +
> gcc/config/rx/rx.cc | 163 +++++++++++++++++++++++++++++---------
> gcc/config/rx/rx.h | 5 +-
> gcc/config/rx/rx.md | 122 ++++++++++++++++++++++------
> 4 files changed, 224 insertions(+), 68 deletions(-)
>
> [...]
> diff --git a/gcc/config/rx/rx.cc b/gcc/config/rx/rx.cc
> index 902e756a34e..637988168e0 100644
> --- a/gcc/config/rx/rx.cc
> +++ b/gcc/config/rx/rx.cc
> @@ -148,17 +148,28 @@ rx_legitimize_address (rtx x,
> rtx oldx ATTRIBUTE_UNUSED,
> machine_mode mode ATTRIBUTE_UNUSED)
> {
> + rtx op0 = XEXP (x, 0);
> + rtx op1 = XEXP (x, 1);
> +
This needs to be inside...
> if (rx_pid_data_operand (x) == PID_UNENCODED)
> {
> rtx rv = gen_pid_addr (gen_rtx_REG (SImode, rx_pid_base_regnum ()), x);
> return rv;
> }
>
> - if (GET_CODE (x) == PLUS
> - && GET_CODE (XEXP (x, 0)) == PLUS
> - && REG_P (XEXP (XEXP (x, 0), 0))
> - && REG_P (XEXP (x, 1)))
> - return force_reg (SImode, x);
> + if (GET_CODE (x) == PLUS)
> + {
...this if statement. XEXP (x, 0) and XEXP (x, 1) would lead to RTL
checking failures if x is not a binary or ternary operator.
You might see this if you configure with --enable-checking=yes,extra,rtl
> + if (GET_CODE (op0) == PLUS
> + && REG_P (XEXP (op0, 0))
> + && CONST_INT_P (op1))
> + {
> + rtx base = XEXP (op0, 0);
> + rtx index = XEXP (op0, 1);
> +
> + rtx new_base = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, op1));
> + return simplify_gen_binary (PLUS, Pmode, new_base, index);
> + }
> + }
>
> return x;
> }
> @@ -194,6 +205,29 @@ rx_is_legitimate_address (machine_mode mode, rtx x,
> Post-increment Register Indirect. */
> return RTX_OK_FOR_BASE (XEXP (x, 0), strict);
>
> + if (GET_MODE_SIZE (mode) == 8)
Usual GCC style would be to use UNITS_PER_WORD * 2 rather than 8,
to make it more obvious where the value comes from.
> + {
> + /*
> + Since double-word memory access is split into multiple parts,
> + only simple indirect addresses are accepted.
> + */
Formatting nit, but: the GCC style is:
/* Since double-word memory accesses are split into multiple parts,
only simple indirect addresses are accepted. */
> + if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_INC)
> + return false;
> +
> + if (GET_CODE (x) == PLUS)
> + {
> + rtx base = XEXP (x, 0);
> + rtx index = XEXP (x, 1);
> +
> + if (!REG_P (base) || !CONST_INT_P (index))
> + return false;
> +
> + if (!IN_RANGE (INTVAL (index), 0, (0x10000 * 4) - 1 - 4)
> + || (INTVAL (index) % 4) != 0)
> + return false;
> + }
> + }
> +
> switch (rx_pid_data_operand (x))
> {
> case PID_UNENCODED:
> [...]
> @@ -3627,7 +3649,9 @@ rx_hard_regno_mode_ok (unsigned int regno, machine_mode)
> static bool
> rx_modes_tieable_p (machine_mode mode1, machine_mode mode2)
> {
> - return ((GET_MODE_CLASS (mode1) == MODE_FLOAT
> + return (GET_MODE_CLASS (mode1) == MODE_INT
> + && GET_MODE_CLASS (mode2) == MODE_INT)
> + || ((GET_MODE_CLASS (mode1) == MODE_FLOAT
> || GET_MODE_CLASS (mode1) == MODE_COMPLEX_FLOAT)
> == (GET_MODE_CLASS (mode2) == MODE_FLOAT
> || GET_MODE_CLASS (mode2) == MODE_COMPLEX_FLOAT));
This change still looks redundant to me.
> @@ -3644,7 +3668,68 @@ rx_c_mode_for_floating_type (enum tree_index ti)
> return TARGET_64BIT_DOUBLES ? DFmode : SFmode;
> return default_mode_for_floating_type (ti);
> }
> +
There should be a comment above the function, such as:
/* Return one word of doubleword value OP, where OP has mode MODE.
A REG_OFFSET of 0 selects the low word and a REG_OFFSET of 1
selects the high word. */
> +static rtx
> +rx_get_subword (rtx op, machine_mode mode, int reg_offset)
> +{
> + unsigned int mem_offset = reg_offset * 4;
> + if (TARGET_BIG_ENDIAN_DATA)
> + mem_offset = 4 - mem_offset;
> +
> + if (MEM_P (op))
> + return adjust_address (op, SImode, mem_offset);
> + else
> + return simplify_gen_subreg (SImode, op, mode, mem_offset);
> +}
> +
Similarly here:
/* Split a doubleword move with operands OPERANDS. Both operands have
mode MODE. */
Yeah, it's kind-of obvious in this case, but still :)
> +void
> +rx_split_double_move (rtx * operands, machine_mode mode)
> +{
> + rtx dest = operands[0];
> + rtx src = operands[1];
> +
> + rtx dest_low, dest_high, src_low, src_high;
> +
> + src_low = rx_get_subword (src, mode, 0);
> + src_high = rx_get_subword (src, mode, 1);
> +
> + dest_low = rx_get_subword (dest, mode, 0);
> + dest_high = rx_get_subword (dest, mode, 1);
> +
> + if (REG_P (operands[0]) && reg_overlap_mentioned_p (dest_low, operands[1]))
> + {
> + emit_move_insn (dest_high, src_high);
> + emit_move_insn (dest_low, src_low);
> + }
> + else
> + {
> + emit_move_insn (dest_low, src_low);
> + emit_move_insn (dest_high, src_high);
> + }
> +}
> +
And here:
/* Adjust the operands of a doubleword move. OPERANDS gives the operands
and MODE gives the mode of the operands. */
> +void
> +rx_relax_double_operands(rtx * operands, machine_mode mode)
> +{
> + if (MEM_P (operands[0]) && !rx_restricted_mem_operand (operands[0], mode))
> + {
> + rtx addr = XEXP (operands[0], 0);
> + addr = force_reg (Pmode, addr);
> + operands[0] = replace_equiv_address (operands[0], addr);
> + }
> +
> + if (MEM_P (operands[1]) && !rx_restricted_mem_operand (operands[1], mode))
> + {
> + rtx addr = XEXP (operands[1], 0);
> + addr = force_reg (Pmode, addr);
> + operands[1] = replace_equiv_address (operands[1], addr);
> + }
This two "if" statements shouldn't be needed now that memory_operand
enforces the necessary rules. (Or, alternatively, if there are cases
where this code is still needed, there might need to be some more tweaks
to rx_is_legitimate_address.)
> +
> + if (MEM_P (operands[0]) && !REG_P (operands[1]))
> + operands[1] = force_reg (mode, operands[1]);
Formatting nit, sorry, but: the statement should only be indented
2 columns beyond "if", rather than 4.
> +}
>
> +
> #undef TARGET_NARROW_VOLATILE_BITFIELD
> #define TARGET_NARROW_VOLATILE_BITFIELD
> rx_narrow_volatile_bitfield
>
> [...]
> @@ -603,6 +597,58 @@
> (set_attr "timings" "11,11,11,11,11,12,11,11,11,11,11,11")]
> )
>
> +(define_expand "movdi"
> + [(set (match_operand:DI 0 "nonimmediate_operand" "")
> + (match_operand:DI 1 "general_operand" ""))]
> + ""
> + {
> + rx_relax_double_operands(operands, DImode);
> +
> + emit_insn (gen_movdi_internal (operands[0], operands[1]));
> + DONE;
I didn't notice last time, sorry, but: the emit_insn and DONE shouldn't
be necessary. Just falling through would achieve the same thing.
> + }
> +)
> +
> +(define_insn_and_split "movdi_internal"
> + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,m")
> + (match_operand:DI 1 "general_operand" "ri,m,r"))]
> + ""
> + "#"
> + "reload_completed"
> + [(const_int 0)]
> + {
> + rx_split_double_move (operands, DImode);
> + DONE;
> + }
> + [(set_attr "length" "8")]
> +)
> +
> +(define_expand "movdf"
> + [(set (match_operand:DF 0 "nonimmediate_operand" "")
> + (match_operand:DF 1 "general_operand" ""))]
> + ""
> + {
> + rx_relax_double_operands(operands, DFmode);
> +
> + emit_insn (gen_movdf_internal (operands[0], operands[1]));
> + DONE;
> + }
> +)
Similarly here.
> +
> +(define_insn_and_split "movdf_internal"
> + [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m")
> + (match_operand:DF 1 "general_operand" "rF,m,r"))]
> + ""
> + "#"
> + "reload_completed"
> + [(const_int 0)]
> + {
> + rx_split_double_move (operands, DFmode);
> + DONE;
> + }
> + [(set_attr "length" "8")]
> +)
> +
> (define_insn "extend<small_int_modes:mode>si2"
> [(set (match_operand:SI 0 "register_operand" "=r,r")
> (sign_extend:SI (match_operand:small_int_modes
> [...]
> @@ -2872,20 +2931,33 @@
> ""
> )
>
> -(define_insn "movdi"
> - [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
> - (match_operand:DI 1 "general_operand" "rmi"))]
> - "TARGET_ENABLE_LRA"
> - { return rx_gen_move_template (operands, false); }
> - [(set_attr "length" "16")
> - (set_attr "timings" "22")]
> -)
> -
> -(define_insn "movdf"
> - [(set (match_operand:DF 0 "nonimmediate_operand" "=rm")
> - (match_operand:DF 1 "general_operand" "rmi"))]
> - "TARGET_ENABLE_LRA"
> - { return rx_gen_move_template (operands, false); }
> - [(set_attr "length" "16")
> - (set_attr "timings" "22")]
> +;; RX does not allow addition without destroying CC.
> +;; As an alternative to addptrsi3, we define addsi3, which hides changes to
> CC.
> +(define_insn_and_split "*addsi3_lra"
> + [(set (match_operand:SI 0 "register_operand" "=r,r")
> + (plus:SI (match_operand:SI 1 "register_operand" "0,r")
> + (match_operand:SI 2 "rx_source_operand" "ri,ri")))]
> + "!post_ra_split_completed"
> + "#"
> + "&& 1"
> + [(parallel [
> + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))
> + (clobber (reg:CC 16))
> + ])]
> +)
> +
> +(define_insn_and_split "*ashlsi3_lra"
> + [(set (match_operand:SI 0 "register_operand" "=r,r,r")
> + (ashift:SI (match_operand:SI 1 "register_operand" "%0,0,r")
> + (match_operand:SI 2 "rx_shift_operand" "r,i,i")))]
> + "!post_ra_split_completed"
> + "@
> + shll\t%2, %0
> + shll\t%2, %0
> + shll\t%2, %1, %0"
This asm template should just be "#", as for the other new
define_insn_and_splits. The asm for this instruction should
never be used directly, given the !post_ra_split_completed
condition.
Thanks,
Richard
> + "&& 1"
> + [(parallel [
> + (set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))
> + (clobber (reg:CC CC_REG))
> + ])]
> )