Re: PING Re: [PATCH, MIPS] add new peephole for 74k dspr2

Richard Sandiford Tue, 18 Sep 2012 11:17:21 -0700

Sandra Loosemore <[email protected]> writes:
> On 08/27/2012 10:36 AM, Richard Sandiford wrote:
>> Sandra Loosemore<[email protected]>  writes:
>>> On 08/19/2012 11:22 AM, Richard Sandiford wrote:
>>>>
>>>> Not sure whether a peephole is the right choice here.  In practice,
>>>> I'd imagine these opportunities would only come from a DImode move of
>>>> $0 into a doubleword register, so we could simply emit the pattern in
>>>> mips_split_doubleword_move.
>>>>
>>>> That would also allow us to use it for plain HI and LO.  It wasn't
>>>> obvious from the patch why it was restricted to the DSP extension
>>>> registers.
>>>>
>>>> Please also add a scan-assembler test.
>>>
>>> How is this version of the fix?
>>
>> Just to say that I've not forgotten about this.  I'd still like to
>> remove the !TARGET_64BIT and ISA_HAS_DSP_MULT tests, because the
>> idea isn't specific to either.
>>
>> Also, reviewing the patch made me realise that it might be better
>> to keep the move intact and simply use "mult" in the output code.
>> That's my fault for suggesting the wrong thing, though, so I was
>> hoping to find time this weekend to try it myself.  The testsuite
>> stuff ended up taking up all the available time instead.
>
> Richard,
>
> Have you had time to think about this some more?  I am not sure I can 
> guess how you'd like me to fix this patch now without some more specific 
> review and/or suggestions about where the optimization should happen and 
> what cases it should be extended to detect in addition to the dsp 
> accumulator multiplies.


The patch below is the one I've been testing.  But I got sidetracked
by looking into the possibility of removing the MD0_REG and MD1_REG
classes, in order to get more sensible costs.  I think that was needed
for the madd-9.c test to pass.

Richard


Index: gcc/config/mips/mips-protos.h
===================================================================
--- gcc/config/mips/mips-protos.h       2012-09-03 07:49:57.319188985 +0100
+++ gcc/config/mips/mips-protos.h       2012-09-04 20:15:10.240130458 +0100
@@ -212,8 +212,8 @@ extern int m16_simm8_8 (rtx, enum machin
 extern int m16_nsimm8_8 (rtx, enum machine_mode);
 
 extern rtx mips_subword (rtx, bool);
-extern bool mips_split_64bit_move_p (rtx, rtx);
-extern void mips_split_doubleword_move (rtx, rtx);
+extern bool mips_split_move_p (rtx, rtx);
+extern void mips_split_move (rtx, rtx);
 extern const char *mips_output_move (rtx, rtx);
 extern bool mips_cfun_has_cprestore_slot_p (void);
 extern bool mips_cprestore_address_p (rtx, bool);
Index: gcc/config/mips/mips.c
===================================================================
--- gcc/config/mips/mips.c      2012-09-04 20:15:08.191130518 +0100
+++ gcc/config/mips/mips.c      2012-09-04 20:15:17.173130256 +0100
@@ -2395,11 +2395,11 @@ mips_load_store_insns (rtx mem, rtx insn
   mode = GET_MODE (mem);
 
   /* Try to prove that INSN does not need to be split.  */
-  might_split_p = true;
-  if (GET_MODE_BITSIZE (mode) == 64)
+  might_split_p = GET_MODE_SIZE (mode) > UNITS_PER_WORD;
+  if (might_split_p)
     {
       set = single_set (insn);
-      if (set && !mips_split_64bit_move_p (SET_DEST (set), SET_SRC (set)))
+      if (set && !mips_split_move_p (SET_DEST (set), SET_SRC (set)))
        might_split_p = false;
     }
 
@@ -4105,39 +4105,55 @@ mips_subword (rtx op, bool high_p)
   return simplify_gen_subreg (word_mode, op, mode, byte);
 }
 
-/* Return true if a 64-bit move from SRC to DEST should be split into two.  */
+/* Return true if SRC can be moved into DEST using MULT $0, $0.  */
+
+static bool
+mips_mult_move_p (rtx dest, rtx src)
+{
+  return (src == const0_rtx
+         && REG_P (dest)
+         && GET_MODE_SIZE (GET_MODE (dest)) == 2 * UNITS_PER_WORD
+         && (ISA_HAS_DSP_MULT
+             ? ACC_REG_P (REGNO (dest))
+             : MD_REG_P (REGNO (dest))));
+}
+
+/* Return true if a move from SRC to DEST should be split into two.  */
 
 bool
-mips_split_64bit_move_p (rtx dest, rtx src)
+mips_split_move_p (rtx dest, rtx src)
 {
-  if (TARGET_64BIT)
+  /* Check whether the move can be done using some variant of MULT $0,$0.  */
+  if (mips_mult_move_p (dest, src))
     return false;
 
   /* FPR-to-FPR moves can be done in a single instruction, if they're
      allowed at all.  */
-  if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
+  unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
+  if (size == 8 && FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
     return false;
 
   /* Check for floating-point loads and stores.  */
-  if (ISA_HAS_LDC1_SDC1)
+  if (size == 8 && ISA_HAS_LDC1_SDC1)
     {
       if (FP_REG_RTX_P (dest) && MEM_P (src))
        return false;
       if (FP_REG_RTX_P (src) && MEM_P (dest))
        return false;
     }
-  return true;
+
+  /* Otherwise split all multiword moves.  */
+  return size > UNITS_PER_WORD;
 }
 
-/* Split a doubleword move from SRC to DEST.  On 32-bit targets,
-   this function handles 64-bit moves for which mips_split_64bit_move_p
-   holds.  For 64-bit targets, this function handles 128-bit moves.  */
+/* Split a move from SRC to DEST, given that mips_split_move_p holds.  */
 
 void
-mips_split_doubleword_move (rtx dest, rtx src)
+mips_split_move (rtx dest, rtx src)
 {
   rtx low_dest;
 
+  gcc_assert (mips_split_move_p (dest, src));
   if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src))
     {
       if (!TARGET_64BIT && GET_MODE (dest) == DImode)
@@ -4209,7 +4225,7 @@ mips_output_move (rtx dest, rtx src)
   mode = GET_MODE (dest);
   dbl_p = (GET_MODE_SIZE (mode) == 8);
 
-  if (dbl_p && mips_split_64bit_move_p (dest, src))
+  if (mips_split_move_p (dest, src))
     return "#";
 
   if ((src_code == REG && GP_REG_P (REGNO (src)))
@@ -4220,6 +4236,14 @@ mips_output_move (rtx dest, rtx src)
          if (GP_REG_P (REGNO (dest)))
            return "move\t%0,%z1";
 
+         if (mips_mult_move_p (dest, src))
+           {
+             if (ISA_HAS_DSP_MULT)
+               return "mult\t%q0,%.,%.";
+             else
+               return "mult\t%.,%.";
+           }
+
          /* Moves to HI are handled by special .md insns.  */
          if (REGNO (dest) == LO_REGNUM)
            return "mtlo\t%z1";
@@ -10430,8 +10454,8 @@ mips_save_reg (rtx reg, rtx mem)
     {
       rtx x1, x2;
 
-      if (mips_split_64bit_move_p (mem, reg))
-       mips_split_doubleword_move (mem, reg);
+      if (mips_split_move_p (mem, reg))
+       mips_split_move (mem, reg);
       else
        mips_emit_move (mem, reg);
 
Index: gcc/config/mips/mips.md
===================================================================
--- gcc/config/mips/mips.md     2012-09-03 07:49:57.318188985 +0100
+++ gcc/config/mips/mips.md     2012-09-04 20:15:10.293130456 +0100
@@ -204,7 +204,7 @@ (define_attr "jal_macro" "no,yes"
 ;; the split instructions; in some cases, it is more appropriate for the
 ;; scheduling type to be "multi" instead.
 (define_attr "move_type"
-  "unknown,load,fpload,store,fpstore,mtc,mfc,mtlo,mflo,move,fmove,
+  "unknown,load,fpload,store,fpstore,mtc,mfc,mtlo,mflo,imul,move,fmove,
    const,constN,signext,ext_ins,logical,arith,sll0,andi,loadpool,
    shift_shift"
   (const_string "unknown"))
@@ -369,6 +369,7 @@ (define_attr "type"
         (eq_attr "move_type" "mflo") (const_string "mflo")
 
         ;; These types of move are always single insns.
+        (eq_attr "move_type" "imul") (const_string "imul")
         (eq_attr "move_type" "fmove") (const_string "fmove")
         (eq_attr "move_type" "loadpool") (const_string "load")
         (eq_attr "move_type" "signext") (const_string "signext")
@@ -4254,13 +4255,13 @@ (define_insn "*mov<mode>_ra"
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*movdi_32bit"
-  [(set (match_operand:DI 0 "nonimmediate_operand" 
"=d,d,d,m,*a,*d,*f,*f,*d,*m,*B*C*D,*B*C*D,*d,*m")
-       (match_operand:DI 1 "move_operand" 
"d,i,m,d,*J*d,*a,*J*d,*m,*f,*f,*d,*m,*B*C*D,*B*C*D"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" 
"=d,d,d,m,*a,*a,*d,*f,*f,*d,*m,*B*C*D,*B*C*D,*d,*m")
+       (match_operand:DI 1 "move_operand" 
"d,i,m,d,*J,*d,*a,*J*d,*m,*f,*f,*d,*m,*B*C*D,*B*C*D"))]
   "!TARGET_64BIT && !TARGET_MIPS16
    && (register_operand (operands[0], DImode)
        || reg_or_0_operand (operands[1], DImode))"
   { return mips_output_move (operands[0], operands[1]); }
-  [(set_attr "move_type" 
"move,const,load,store,mtlo,mflo,mtc,fpload,mfc,fpstore,mtc,fpload,mfc,fpstore")
+  [(set_attr "move_type" 
"move,const,load,store,imul,mtlo,mflo,mtc,fpload,mfc,fpstore,mtc,fpload,mfc,fpstore")
    (set_attr "mode" "DI")])
 
 (define_insn "*movdi_32bit_mips16"
@@ -4707,14 +4708,14 @@ (define_expand "movti"
 })
 
 (define_insn "*movti"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=d,d,d,m,*a,*d")
-       (match_operand:TI 1 "move_operand" "d,i,m,dJ,*d*J,*a"))]
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=d,d,d,m,*a,*a,*d")
+       (match_operand:TI 1 "move_operand" "d,i,m,dJ,*J,*d,*a"))]
   "TARGET_64BIT
    && !TARGET_MIPS16
    && (register_operand (operands[0], TImode)
        || reg_or_0_operand (operands[1], TImode))"
-  "#"
-  [(set_attr "move_type" "move,const,load,store,mtlo,mflo")
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "move_type" "move,const,load,store,imul,mtlo,mflo")
    (set_attr "mode" "TI")])
 
 (define_insn "*movti_mips16"
@@ -4765,21 +4766,20 @@ (define_insn "*movtf_mips16"
 (define_split
   [(set (match_operand:MOVE64 0 "nonimmediate_operand")
        (match_operand:MOVE64 1 "move_operand"))]
-  "reload_completed && !TARGET_64BIT
-   && mips_split_64bit_move_p (operands[0], operands[1])"
+  "reload_completed && mips_split_move_p (operands[0], operands[1])"
   [(const_int 0)]
 {
-  mips_split_doubleword_move (operands[0], operands[1]);
+  mips_split_move (operands[0], operands[1]);
   DONE;
 })
 
 (define_split
   [(set (match_operand:MOVE128 0 "nonimmediate_operand")
        (match_operand:MOVE128 1 "move_operand"))]
-  "TARGET_64BIT && reload_completed"
+  "reload_completed && mips_split_move_p (operands[0], operands[1])"
   [(const_int 0)]
 {
-  mips_split_doubleword_move (operands[0], operands[1]);
+  mips_split_move (operands[0], operands[1]);
   DONE;
 })
 
Index: gcc/testsuite/gcc.target/mips/madd-9.c
===================================================================
--- gcc/testsuite/gcc.target/mips/madd-9.c      2012-09-03 07:49:57.318188985 
+0100
+++ gcc/testsuite/gcc.target/mips/madd-9.c      2012-09-04 21:21:17.132015119 
+0100
@@ -1,7 +1,13 @@
 /* { dg-do compile } */
 /* { dg-options "isa_rev>=1 -mgp32" } */
-/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
+/* References to X within the loop need to have a higher frequency than
+   references to X outside the loop, otherwise there is no reason
+   to prefer multiply/accumulator registers over GPRs.  */
+/* { dg-skip-if "requires register frequencies" { *-*-* } { "-O0" "-Os" } { "" 
} } */
 /* { dg-final { scan-assembler-not "\tmul\t" } } */
+/* { dg-final { scan-assembler-not "\tmthi" } } */
+/* { dg-final { scan-assembler-not "\tmtlo" } } */
+/* { dg-final { scan-assembler "\tmult\t" } } */
 /* { dg-final { scan-assembler "\tmadd\t" } } */
 
 NOMIPS16 long long
Index: gcc/testsuite/gcc.target/mips/mips32-dsp-accinit.c
===================================================================
--- /dev/null   2012-08-19 20:42:12.842999468 +0100
+++ gcc/testsuite/gcc.target/mips/mips32-dsp-accinit.c  2012-09-04 
21:04:52.697043742 +0100
@@ -0,0 +1,20 @@
+/* { dg-options "-mdspr2 -mgp32" } */
+/* References to RESULT within the loop need to have a higher frequency than
+   references to RESULT outside the loop, otherwise there is no reason
+   to prefer multiply/accumulator registers over GPRs.  */
+/* { dg-skip-if "requires register frequencies" { *-*-* } { "-O0" "-Os" } { "" 
} } */
+
+/* Check that the zero-initialization of the accumulator feeding into
+   the madd is done by means of a mult instruction instead of mthi/mtlo.  */
+
+NOMIPS16 long long f (int n, int *v, int m)
+{
+  long long result = 0;
+  int i;
+
+  for (i = 0; i < n; i++)
+    result = __builtin_mips_madd (result, v[i], m);
+  return result;
+}
+
+/* { dg-final { scan-assembler "\tmult\t\\\$ac.,\\\$0,\\\$0" } } */

Re: PING Re: [PATCH, MIPS] add new peephole for 74k dspr2

Reply via email to