Sandra Loosemore <[email protected]> writes:
> On 08/27/2012 10:36 AM, Richard Sandiford wrote:
>> Sandra Loosemore<[email protected]> writes:
>>> On 08/19/2012 11:22 AM, Richard Sandiford wrote:
>>>>
>>>> Not sure whether a peephole is the right choice here. In practice,
>>>> I'd imagine these opportunities would only come from a DImode move of
>>>> $0 into a doubleword register, so we could simply emit the pattern in
>>>> mips_split_doubleword_move.
>>>>
>>>> That would also allow us to use it for plain HI and LO. It wasn't
>>>> obvious from the patch why it was restricted to the DSP extension
>>>> registers.
>>>>
>>>> Please also add a scan-assembler test.
>>>
>>> How is this version of the fix?
>>
>> Just to say that I've not forgotten about this. I'd still like to
>> remove the !TARGET_64BIT and ISA_HAS_DSP_MULT tests, because the
>> idea isn't specific to either.
>>
>> Also, reviewing the patch made me realise that it might be better
>> to keep the move intact and simply use "mult" in the output code.
>> That's my fault for suggesting the wrong thing, though, so I was
>> hoping to find time this weekend to try it myself. The testsuite
>> stuff ended up taking up all the available time instead.
>
> Richard,
>
> Have you had time to think about this some more? I am not sure I can
> guess how you'd like me to fix this patch now without some more specific
> review and/or suggestions about where the optimization should happen and
> what cases it should be extended to detect in addition to the dsp
> accumulator multiplies.
The patch below is the one I've been testing. But I got sidetracked
by looking into the possibility of removing the MD0_REG and MD1_REG
classes, in order to get more sensible costs. I think that was needed
for the madd-9.c test to pass.
Richard
Index: gcc/config/mips/mips-protos.h
===================================================================
--- gcc/config/mips/mips-protos.h 2012-09-03 07:49:57.319188985 +0100
+++ gcc/config/mips/mips-protos.h 2012-09-04 20:15:10.240130458 +0100
@@ -212,8 +212,8 @@ extern int m16_simm8_8 (rtx, enum machin
extern int m16_nsimm8_8 (rtx, enum machine_mode);
extern rtx mips_subword (rtx, bool);
-extern bool mips_split_64bit_move_p (rtx, rtx);
-extern void mips_split_doubleword_move (rtx, rtx);
+extern bool mips_split_move_p (rtx, rtx);
+extern void mips_split_move (rtx, rtx);
extern const char *mips_output_move (rtx, rtx);
extern bool mips_cfun_has_cprestore_slot_p (void);
extern bool mips_cprestore_address_p (rtx, bool);
Index: gcc/config/mips/mips.c
===================================================================
--- gcc/config/mips/mips.c 2012-09-04 20:15:08.191130518 +0100
+++ gcc/config/mips/mips.c 2012-09-04 20:15:17.173130256 +0100
@@ -2395,11 +2395,11 @@ mips_load_store_insns (rtx mem, rtx insn
mode = GET_MODE (mem);
/* Try to prove that INSN does not need to be split. */
- might_split_p = true;
- if (GET_MODE_BITSIZE (mode) == 64)
+ might_split_p = GET_MODE_SIZE (mode) > UNITS_PER_WORD;
+ if (might_split_p)
{
set = single_set (insn);
- if (set && !mips_split_64bit_move_p (SET_DEST (set), SET_SRC (set)))
+ if (set && !mips_split_move_p (SET_DEST (set), SET_SRC (set)))
might_split_p = false;
}
@@ -4105,39 +4105,55 @@ mips_subword (rtx op, bool high_p)
return simplify_gen_subreg (word_mode, op, mode, byte);
}
-/* Return true if a 64-bit move from SRC to DEST should be split into two. */
+/* Return true if SRC can be moved into DEST using MULT $0, $0. */
+
+static bool
+mips_mult_move_p (rtx dest, rtx src)
+{
+ return (src == const0_rtx
+ && REG_P (dest)
+ && GET_MODE_SIZE (GET_MODE (dest)) == 2 * UNITS_PER_WORD
+ && (ISA_HAS_DSP_MULT
+ ? ACC_REG_P (REGNO (dest))
+ : MD_REG_P (REGNO (dest))));
+}
+
+/* Return true if a move from SRC to DEST should be split into two. */
bool
-mips_split_64bit_move_p (rtx dest, rtx src)
+mips_split_move_p (rtx dest, rtx src)
{
- if (TARGET_64BIT)
+ /* Check whether the move can be done using some variant of MULT $0,$0. */
+ if (mips_mult_move_p (dest, src))
return false;
/* FPR-to-FPR moves can be done in a single instruction, if they're
allowed at all. */
- if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
+ unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
+ if (size == 8 && FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
return false;
/* Check for floating-point loads and stores. */
- if (ISA_HAS_LDC1_SDC1)
+ if (size == 8 && ISA_HAS_LDC1_SDC1)
{
if (FP_REG_RTX_P (dest) && MEM_P (src))
return false;
if (FP_REG_RTX_P (src) && MEM_P (dest))
return false;
}
- return true;
+
+ /* Otherwise split all multiword moves. */
+ return size > UNITS_PER_WORD;
}
-/* Split a doubleword move from SRC to DEST. On 32-bit targets,
- this function handles 64-bit moves for which mips_split_64bit_move_p
- holds. For 64-bit targets, this function handles 128-bit moves. */
+/* Split a move from SRC to DEST, given that mips_split_move_p holds. */
void
-mips_split_doubleword_move (rtx dest, rtx src)
+mips_split_move (rtx dest, rtx src)
{
rtx low_dest;
+ gcc_assert (mips_split_move_p (dest, src));
if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src))
{
if (!TARGET_64BIT && GET_MODE (dest) == DImode)
@@ -4209,7 +4225,7 @@ mips_output_move (rtx dest, rtx src)
mode = GET_MODE (dest);
dbl_p = (GET_MODE_SIZE (mode) == 8);
- if (dbl_p && mips_split_64bit_move_p (dest, src))
+ if (mips_split_move_p (dest, src))
return "#";
if ((src_code == REG && GP_REG_P (REGNO (src)))
@@ -4220,6 +4236,14 @@ mips_output_move (rtx dest, rtx src)
if (GP_REG_P (REGNO (dest)))
return "move\t%0,%z1";
+ if (mips_mult_move_p (dest, src))
+ {
+ if (ISA_HAS_DSP_MULT)
+ return "mult\t%q0,%.,%.";
+ else
+ return "mult\t%.,%.";
+ }
+
/* Moves to HI are handled by special .md insns. */
if (REGNO (dest) == LO_REGNUM)
return "mtlo\t%z1";
@@ -10430,8 +10454,8 @@ mips_save_reg (rtx reg, rtx mem)
{
rtx x1, x2;
- if (mips_split_64bit_move_p (mem, reg))
- mips_split_doubleword_move (mem, reg);
+ if (mips_split_move_p (mem, reg))
+ mips_split_move (mem, reg);
else
mips_emit_move (mem, reg);
Index: gcc/config/mips/mips.md
===================================================================
--- gcc/config/mips/mips.md 2012-09-03 07:49:57.318188985 +0100
+++ gcc/config/mips/mips.md 2012-09-04 20:15:10.293130456 +0100
@@ -204,7 +204,7 @@ (define_attr "jal_macro" "no,yes"
;; the split instructions; in some cases, it is more appropriate for the
;; scheduling type to be "multi" instead.
(define_attr "move_type"
- "unknown,load,fpload,store,fpstore,mtc,mfc,mtlo,mflo,move,fmove,
+ "unknown,load,fpload,store,fpstore,mtc,mfc,mtlo,mflo,imul,move,fmove,
const,constN,signext,ext_ins,logical,arith,sll0,andi,loadpool,
shift_shift"
(const_string "unknown"))
@@ -369,6 +369,7 @@ (define_attr "type"
(eq_attr "move_type" "mflo") (const_string "mflo")
;; These types of move are always single insns.
+ (eq_attr "move_type" "imul") (const_string "imul")
(eq_attr "move_type" "fmove") (const_string "fmove")
(eq_attr "move_type" "loadpool") (const_string "load")
(eq_attr "move_type" "signext") (const_string "signext")
@@ -4254,13 +4255,13 @@ (define_insn "*mov<mode>_ra"
(set_attr "mode" "<MODE>")])
(define_insn "*movdi_32bit"
- [(set (match_operand:DI 0 "nonimmediate_operand"
"=d,d,d,m,*a,*d,*f,*f,*d,*m,*B*C*D,*B*C*D,*d,*m")
- (match_operand:DI 1 "move_operand"
"d,i,m,d,*J*d,*a,*J*d,*m,*f,*f,*d,*m,*B*C*D,*B*C*D"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand"
"=d,d,d,m,*a,*a,*d,*f,*f,*d,*m,*B*C*D,*B*C*D,*d,*m")
+ (match_operand:DI 1 "move_operand"
"d,i,m,d,*J,*d,*a,*J*d,*m,*f,*f,*d,*m,*B*C*D,*B*C*D"))]
"!TARGET_64BIT && !TARGET_MIPS16
&& (register_operand (operands[0], DImode)
|| reg_or_0_operand (operands[1], DImode))"
{ return mips_output_move (operands[0], operands[1]); }
- [(set_attr "move_type"
"move,const,load,store,mtlo,mflo,mtc,fpload,mfc,fpstore,mtc,fpload,mfc,fpstore")
+ [(set_attr "move_type"
"move,const,load,store,imul,mtlo,mflo,mtc,fpload,mfc,fpstore,mtc,fpload,mfc,fpstore")
(set_attr "mode" "DI")])
(define_insn "*movdi_32bit_mips16"
@@ -4707,14 +4708,14 @@ (define_expand "movti"
})
(define_insn "*movti"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=d,d,d,m,*a,*d")
- (match_operand:TI 1 "move_operand" "d,i,m,dJ,*d*J,*a"))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=d,d,d,m,*a,*a,*d")
+ (match_operand:TI 1 "move_operand" "d,i,m,dJ,*J,*d,*a"))]
"TARGET_64BIT
&& !TARGET_MIPS16
&& (register_operand (operands[0], TImode)
|| reg_or_0_operand (operands[1], TImode))"
- "#"
- [(set_attr "move_type" "move,const,load,store,mtlo,mflo")
+ { return mips_output_move (operands[0], operands[1]); }
+ [(set_attr "move_type" "move,const,load,store,imul,mtlo,mflo")
(set_attr "mode" "TI")])
(define_insn "*movti_mips16"
@@ -4765,21 +4766,20 @@ (define_insn "*movtf_mips16"
(define_split
[(set (match_operand:MOVE64 0 "nonimmediate_operand")
(match_operand:MOVE64 1 "move_operand"))]
- "reload_completed && !TARGET_64BIT
- && mips_split_64bit_move_p (operands[0], operands[1])"
+ "reload_completed && mips_split_move_p (operands[0], operands[1])"
[(const_int 0)]
{
- mips_split_doubleword_move (operands[0], operands[1]);
+ mips_split_move (operands[0], operands[1]);
DONE;
})
(define_split
[(set (match_operand:MOVE128 0 "nonimmediate_operand")
(match_operand:MOVE128 1 "move_operand"))]
- "TARGET_64BIT && reload_completed"
+ "reload_completed && mips_split_move_p (operands[0], operands[1])"
[(const_int 0)]
{
- mips_split_doubleword_move (operands[0], operands[1]);
+ mips_split_move (operands[0], operands[1]);
DONE;
})
Index: gcc/testsuite/gcc.target/mips/madd-9.c
===================================================================
--- gcc/testsuite/gcc.target/mips/madd-9.c 2012-09-03 07:49:57.318188985
+0100
+++ gcc/testsuite/gcc.target/mips/madd-9.c 2012-09-04 21:21:17.132015119
+0100
@@ -1,7 +1,13 @@
/* { dg-do compile } */
/* { dg-options "isa_rev>=1 -mgp32" } */
-/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
+/* References to X within the loop need to have a higher frequency than
+ references to X outside the loop, otherwise there is no reason
+ to prefer multiply/accumulator registers over GPRs. */
+/* { dg-skip-if "requires register frequencies" { *-*-* } { "-O0" "-Os" } { ""
} } */
/* { dg-final { scan-assembler-not "\tmul\t" } } */
+/* { dg-final { scan-assembler-not "\tmthi" } } */
+/* { dg-final { scan-assembler-not "\tmtlo" } } */
+/* { dg-final { scan-assembler "\tmult\t" } } */
/* { dg-final { scan-assembler "\tmadd\t" } } */
NOMIPS16 long long
Index: gcc/testsuite/gcc.target/mips/mips32-dsp-accinit.c
===================================================================
--- /dev/null 2012-08-19 20:42:12.842999468 +0100
+++ gcc/testsuite/gcc.target/mips/mips32-dsp-accinit.c 2012-09-04
21:04:52.697043742 +0100
@@ -0,0 +1,20 @@
+/* { dg-options "-mdspr2 -mgp32" } */
+/* References to RESULT within the loop need to have a higher frequency than
+ references to RESULT outside the loop, otherwise there is no reason
+ to prefer multiply/accumulator registers over GPRs. */
+/* { dg-skip-if "requires register frequencies" { *-*-* } { "-O0" "-Os" } { ""
} } */
+
+/* Check that the zero-initialization of the accumulator feeding into
+ the madd is done by means of a mult instruction instead of mthi/mtlo. */
+
+NOMIPS16 long long f (int n, int *v, int m)
+{
+ long long result = 0;
+ int i;
+
+ for (i = 0; i < n; i++)
+ result = __builtin_mips_madd (result, v[i], m);
+ return result;
+}
+
+/* { dg-final { scan-assembler "\tmult\t\\\$ac.,\\\$0,\\\$0" } } */