After fixing the constraint problem, we hit an "insn does not satisfy
its constraints" with -mno-lra on the following insn, a vector load
from mem which has an invalid offset:
(insn 631 630 1122 12 (set (reg:SF 108 31 [orig:260 pretmp_44 ] [260])
        (mem:SF (plus:DI (reg:DI 30 30 [orig:338 ivtmp.141 ] [338])
                (const_int 2 [0x2])) [5 MEM[base: _1, offset: 2B]+0 S4 A32])) 
470 {movsf_hardfloat}
     (nil))

Here are the reload costs for the various alternatives of
movsf_hardfloat:
"=!r, !r,  m,  f, ww, ww, !r,  f, wb,  m, wY, wu,  Z,?wn, ?r,*c*l, !r, *h"
  "r,  m,  r,  f, ww,  j,  j,  m, wY,  f, wb,  Z, wu,  r, wn,   r, *h,  0"
 617 609  17  17   8   8 617   9   8  17  17   8  17  23  23   17 617  17

Notice that the cost for a vector<-vector move (ww,ww) is the same as
the cost for a vector<-mem move (wb,wY or wu,Z).  Since the
vector<-vector move comes first, it is chosen and the mem part of the
insn reloaded.  That just gives another copy of insn 631.

Bootstrapped and regression tested powerpc64le-linux.  powerpc64-linux
regression tests still in progress.  OK assuming all goes well?

Segher, this one differs from the previous attempt (that regressed
gcc.target/powerpc/sd-pwr6.c and gcc.target/powerpc/pr39902-2.c), in
that the r,m and m,r alternatives are before other mem alternatives.
Ordered the other way we get no progress on trying to do secondary mem
reloads.

        PR target/72802
        * config/rs6000/rs6000.md (mov<mode>_hardflow): Sort
        alternatives.  Put reg/reg moves within same class last,
        same class loads before stores.  Delete attr length.
testsuite/
        * gcc.c-torture/compile/pr72802.c: New.

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index f4c00d2..e52f759 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -6689,32 +6689,37 @@
 }")
 
 (define_insn "mov<mode>_hardfloat"
-  [(set (match_operand:FMOVE32 0 "nonimmediate_operand" 
"=!r,!r,m,f,<f32_vsx>,<f32_vsx>,!r,<f32_lr>,<f32_lr2>,<f32_sm>,<f32_sm2>,<f32_av>,Z,?<f32_dm>,?r,*c*l,!r,*h")
-       (match_operand:FMOVE32 1 "input_operand" 
"r,m,r,f,<f32_vsx>,<zero_fp>,<zero_fp>,<f32_lm>,<f32_lm2>,<f32_sr>,<f32_sr2>,Z,<f32_av>,r,<f32_dm>,r,h,0"))]
+  [(set (match_operand:FMOVE32 0 "nonimmediate_operand"
+        "=<f32_vsx>, !r,         !r,         m,          <f32_lr>,   <f32_lr2>,
+         <f32_av>,   <f32_sm>,   <f32_sm2>,  Z,          ?<f32_dm>,  ?r,
+         *c*l,       !r,         f,          <f32_vsx>,  !r,         *h")
+       (match_operand:FMOVE32 1 "input_operand"
+        "<zero_fp>,  <zero_fp>,  m,          r,          <f32_lm>,   <f32_lm2>,
+         Z,          <f32_sr>,   <f32_sr2>,  <f32_av>,   r,          <f32_dm>,
+         r,          *h,         f,          <f32_vsx>,  r,          0"))]
   "(gpc_reg_operand (operands[0], <MODE>mode)
    || gpc_reg_operand (operands[1], <MODE>mode))
    && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
   "@
-   mr %0,%1
-   lwz%U1%X1 %0,%1
-   stw%U0%X0 %1,%0
-   fmr %0,%1
-   xscpsgndp %x0,%x1,%x1
    xxlxor %x0,%x0,%x0
    li %0,0
+   lwz%U1%X1 %0,%1
+   stw%U0%X0 %1,%0
    <f32_li>
    <f32_li2>
+   <f32_lv>
    <f32_si>
    <f32_si2>
-   <f32_lv>
    <f32_sv>
    mtvsrwz %x0,%1
    mfvsrwz %0,%x1
    mt%0 %1
    mf%1 %0
+   fmr %0,%1
+   xscpsgndp %x0,%x1,%x1
+   mr %0,%1
    nop"
-  [(set_attr "type" 
"*,load,store,fpsimple,fpsimple,veclogical,integer,fpload,fpload,fpstore,fpstore,fpload,fpstore,mffgpr,mftgpr,mtjmpr,mfjmpr,*")
-   (set_attr "length" "4")])
+  [(set_attr "type" 
"veclogical,integer,load,store,fpload,fpload,fpload,fpstore,fpstore,fpstore,mffgpr,mftgpr,mtjmpr,mfjmpr,fpsimple,fpsimple,*,*")])
 
 (define_insn "*mov<mode>_softfloat"
   [(set (match_operand:FMOVE32 0 "nonimmediate_operand" 
"=r,cl,r,r,m,r,r,r,r,*h")
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr72802.c 
b/gcc/testsuite/gcc.c-torture/compile/pr72802.c
new file mode 100644
index 0000000..b9e1ab3
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr72802.c
@@ -0,0 +1,211 @@
+static a[];
+static b, h, m, n, o, p, q, t, u, v, t5, t6, t16, t17, t18, t25;
+c;
+static volatile d;
+static volatile e;
+static volatile f;
+static volatile g;
+j;
+static volatile k;
+static volatile l;
+static volatile r;
+const volatile s;
+static volatile w;
+static volatile x;
+const volatile y;
+static volatile z;
+static volatile t1;
+static volatile t2;
+const t3;
+t4;
+const volatile t8;
+const volatile t9;
+const volatile t10;
+static volatile t11;
+static volatile t12;
+static volatile t13;
+static volatile t14;
+const volatile t15;
+static volatile t19;
+static volatile t20;
+const volatile t21;
+static volatile t22;
+static volatile t23;
+const volatile t24;
+*t29;
+fn1() { b = 5; }
+fn2(long);
+#pragma pack(1)
+struct S0 {
+  short f3;
+  float f4;
+  signed f5
+};
+const struct S0 t7[] = {};
+static fn3() {
+  int t26[] = {};
+  int t27[10] = {};
+  --t25;
+  if (fn4()) {
+    t5++;
+    fn5();
+    int t28[] = {t26, t27};
+    return;
+  }
+}
+fn6() {
+  int i, t30 = 0;
+  if (fn6 == 2)
+    t30 = 1;
+  {
+    int t31, i = 0;
+    for (; i < 256; i++) {
+      t31 = i;
+      if (i & 1)
+        t31 = 0;
+      a[i] = t31;
+    }
+    i = 0;
+    for (; i < 3; i++)
+      t29[i] = t6;
+    fn7();
+    fn3();
+    t4 = c = j = 0;
+  }
+  fn2(h);
+  if (t30)
+    printf(b);
+  g;
+  fn2(g);
+  printf(b);
+  f;
+  fn2(f);
+  if (t30)
+    printf(b);
+  e;
+  fn2(e);
+  printf(b);
+  fn8();
+  d;
+  fn2(d);
+  if (t30)
+    printf(b);
+  l;
+  fn2(l);
+  printf(b);
+  k;
+  fn2(k);
+  if (t30)
+    printf(b);
+  printf(b);
+  for (; i; i++) {
+    y;
+    fn2(y);
+    printf(b);
+    x;
+    fn2(x);
+    if (t30)
+      printf(b);
+    w;
+    fn2(w);
+    printf(b);
+    fn2(v);
+    printf(b);
+    fn2(u);
+    if (t30)
+      printf(b);
+    fn2(t);
+    printf(b);
+    s;
+    fn2(s);
+    if (t30)
+      printf(b);
+    r;
+    fn2(r);
+    printf(b);
+    fn2(q);
+    if (t30)
+      printf(b);
+    fn2(p);
+    printf("", b);
+    fn2(o);
+    printf(b);
+    fn2(n);
+    if (t30)
+      printf(b);
+    fn2(m);
+    printf(b);
+  }
+  fn2(z);
+  if (t30)
+    printf(b);
+  printf("", t3);
+  t2;
+  fn2(t2);
+  printf(b);
+  t1;
+  fn2(t1);
+  if (t30)
+    printf(b);
+  for (; i < 6; i++) {
+    t10;
+    fn2(t10);
+    printf(b);
+    t9;
+    fn2(t9);
+    if (t30)
+      printf(b);
+    t8;
+    fn2(t8);
+    printf(b);
+    fn2(t7[i].f3);
+    if (t30)
+      printf(b);
+    fn2(t7[i].f4);
+    printf(b);
+    fn2(t7[i].f5);
+    if (t30)
+      printf(b);
+    t15;
+    fn2(t15);
+    printf(b);
+    t14;
+    fn2(t14);
+    if (t30)
+      printf(b);
+    t13;
+    fn2(t13);
+    printf(b);
+    t12;
+    fn2(t12);
+    if (t30)
+      printf(b);
+    t11;
+    fn2(t11);
+    printf(b);
+    t21;
+    fn2(t21);
+    if (t30)
+      printf(b);
+    t20;
+    fn2(t20);
+    fn2(t19);
+    if (t30)
+      printf(b);
+    fn2(t18);
+    printf(b);
+    fn2(t17);
+    printf(b);
+    fn2(t16);
+    printf(b);
+  }
+  t24;
+  t24;
+  if (t30)
+    printf(b);
+  printf(t23);
+  t22;
+  t22;
+  if (t30)
+    printf(b);
+}

-- 
Alan Modra
Australia Development Lab, IBM

Reply via email to