When I submitted my previous lra-remat patch, I mentioned I had some
concerns about the way we dealt with register number comparisons, but I
didn't want to change things blindly without a testcase. PR70123 has now
provided such a testcase where we are trying to rematerialize a hard
register (r6). While scanning we encounter an instruction of the form
(set (reg 285) (reg 272))
i.e. involving only pseudos, but reg_renumber[285] is r6. Since we only
compare register numbers, we do not notice that the hard reg is clobbered.
The following patch modifies the function input_regno_present_p, and
also renames it so that its purpose is more obvious to someone familiar
with other parts of gcc. I've made it look at reg_renumber, and also try
to deal with multi-word hard registers properly.
I'm not entirely sure this is a fully safe approach however, since I
can't yet answer the question of whether LRA could change another pseudo
to reside in hard register 6, thereby making the rematerialization
invalid after the fact. Therefore the patch also includes a change to
just disable candidates if they involve hard registers. I haven't
observed that making any difference in code generation (on x86_64),
beyond fixing the testcase on s390.
Bootstrapped and tested on x86_64-linux; Jakub verified that the
testcase works afterwards. Ok for trunk and 5-branch, either for one or
for both parts? I'm hoping the testcase in gcc.dg/torture will get
exercised in the right way on s390, but I haven't run tests on that machine.
Bernd
PR target/70123
* lra-remat.c (operand_to_remat): Disallow hard regs in the value t
be rematerialized.
(reg_overlap_for_remat_p): Renamed from input_regno_present_p.
Arguments swapped. All callers changed. Take reg_renumber into
account, and Calculate and compare register ranges for hard regs.
PR target/70123
* gcc.dg/torture/pr70123.c: New test.
Index: gcc/lra-remat.c
===================================================================
--- gcc/lra-remat.c (revision 234025)
+++ gcc/lra-remat.c (working copy)
@@ -413,6 +413,10 @@ operand_to_remat (rtx_insn *insn)
if (reg->regno >= FIRST_PSEUDO_REGISTER
&& bitmap_bit_p (&subreg_regs, reg->regno))
return -1;
+
+ /* Don't allow hard registers to be rematerialized. */
+ if (reg->regno < FIRST_PSEUDO_REGISTER)
+ return -1;
}
if (found_reg == NULL)
return -1;
@@ -718,21 +722,46 @@ calculate_local_reg_remat_bb_data (void)
-/* Return true if REGNO is an input operand of INSN. */
+/* Return true if REG overlaps an input operand of INSN. */
static bool
-input_regno_present_p (rtx_insn *insn, int regno)
+reg_overlap_for_remat_p (lra_insn_reg *reg, rtx_insn *insn)
{
int iter;
lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
struct lra_static_insn_data *static_id = id->insn_static_data;
- struct lra_insn_reg *reg;
-
+ unsigned regno = reg->regno;
+ int nregs;
+
+ if (regno >= FIRST_PSEUDO_REGISTER && reg_renumber[regno] >= 0)
+ regno = reg_renumber[regno];
+ if (regno >= FIRST_PSEUDO_REGISTER)
+ nregs = 1;
+ else
+ nregs = hard_regno_nregs[regno][reg->biggest_mode];
+
+ struct lra_insn_reg *reg2;
+
for (iter = 0; iter < 2; iter++)
- for (reg = (iter == 0 ? id->regs : static_id->hard_regs);
- reg != NULL;
- reg = reg->next)
- if (reg->type == OP_IN && reg->regno == regno)
- return true;
+ for (reg2 = (iter == 0 ? id->regs : static_id->hard_regs);
+ reg2 != NULL;
+ reg2 = reg2->next)
+ {
+ if (reg2->type != OP_IN)
+ continue;
+ unsigned regno2 = reg2->regno;
+ int nregs2;
+
+ if (regno2 >= FIRST_PSEUDO_REGISTER && reg_renumber[regno2] >= 0)
+ regno2 = reg_renumber[regno2];
+ if (regno >= FIRST_PSEUDO_REGISTER)
+ nregs2 = 1;
+ else
+ nregs2 = hard_regno_nregs[regno2][reg->biggest_mode];
+
+ if ((regno2 + nregs2 - 1 >= regno && regno2 < regno + nregs)
+ || (regno + nregs - 1 >= regno2 && regno < regno2 + nregs2))
+ return true;
+ }
return false;
}
@@ -833,7 +862,7 @@ calculate_gen_cands (void)
&& dst_regno == cand->regno)
continue;
if (cand->regno == reg->regno
- || input_regno_present_p (insn2, reg->regno))
+ || reg_overlap_for_remat_p (reg, insn2))
{
bitmap_clear_bit (gen_cands, cand->index);
bitmap_set_bit (&temp_bitmap, uid);
@@ -1219,7 +1248,7 @@ do_remat (void)
&& dst_regno == cand->regno)
continue;
if (cand->regno == reg->regno
- || input_regno_present_p (cand->insn, reg->regno))
+ || reg_overlap_for_remat_p (reg, cand->insn))
bitmap_set_bit (&temp_bitmap, cand->index);
}
Index: gcc/testsuite/gcc.dg/torture/pr70123.c
===================================================================
--- gcc/testsuite/gcc.dg/torture/pr70123.c (revision 0)
+++ gcc/testsuite/gcc.dg/torture/pr70123.c (working copy)
@@ -0,0 +1,198 @@
+/* { dg-do run } */
+/* { dg-options "-fPIC" { target fpic } } */
+__attribute__ ((noinline, noclone)) int
+bar (int flag, const char *__restrict format, ...)
+{
+ asm volatile ("" : : "r" (flag), "r" (format) : "memory");
+ return 0;
+}
+
+extern inline __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) int
+baz (const char *__restrict fmt, ...)
+{
+ return bar (1, fmt, __builtin_va_arg_pack ());
+}
+
+__attribute__ ((noinline, noclone)) int
+f1 (void **a, const char *b, int *c)
+{
+ *a = 0;
+ *c = 0;
+ asm volatile ("" : : "r" (&a), "r" (b), "r" (&c) : "memory");
+ return 0;
+}
+
+__attribute__ ((noinline, noclone)) int
+f2 (void *a, int b, int c, long d[], int *e)
+{
+ asm volatile ("" : : "r" (a), "r" (b), "r" (c), "r" (d), "r" (e) : "memory");
+ return 1;
+}
+
+__attribute__ ((noinline, noclone)) int
+f3 (void *a, int *b)
+{
+ asm volatile ("" : : "r" (a), "r" (b) : "memory");
+ return 0;
+}
+
+__attribute__ ((noinline, noclone)) int
+f4 (void *a, const char *b, int c, int d, double *e, int f, char **g, int *h)
+{
+ asm volatile ("" : : "r" (a), "r" (b), "r" (c), "r" (d) : "memory");
+ asm volatile ("" : : "r" (e), "r" (f), "r" (g), "r" (h) : "memory");
+ return 0;
+}
+
+__attribute__ ((noinline, noclone)) int
+f5 (void *a, long long b, int c, char **d, char **e, char **f, const char *g, long long h, int *i)
+{
+ asm volatile ("" : : "r" (a), "r" (b), "r" (c), "r" (d) : "memory");
+ asm volatile ("" : : "r" (e), "r" (f), "r" (g), "r" (h) : "memory");
+ asm volatile ("" : : "r" (i) : "memory");
+ return 0;
+}
+
+__attribute__ ((noinline, noclone)) int
+f6 (void *a, int b, int *c, int *d)
+{
+ asm volatile ("" : : "r" (a), "r" (b), "r" (c), "r" (d) : "memory");
+ return 0;
+}
+
+__attribute__ ((noinline, noclone)) int
+f7 (void *a, int b, long long c, long long d, long long e, double *f, int *g)
+{
+ asm volatile ("" : : "r" (a), "r" (b), "r" (c), "r" (d) : "memory");
+ asm volatile ("" : : "r" (e), "r" (f), "r" (g) : "memory");
+ return 0;
+}
+
+__attribute__ ((noinline, noclone)) int
+f8 (void *a, int b, long long c, long long d, long long e, char *f, const char **g, int *h, int *i)
+{
+ asm volatile ("" : : "r" (a), "r" (b), "r" (c), "r" (d) : "memory");
+ asm volatile ("" : : "r" (e), "r" (f), "r" (g), "r" (h) : "memory");
+ asm volatile ("" : : "r" (i) : "memory");
+ return 0;
+}
+
+__attribute__ ((noinline, noclone)) int
+f9 (void *a, int b, long long c, long long d, long long e, char *f, int *g)
+{
+ asm volatile ("" : : "r" (a), "r" (b), "r" (c), "r" (d) : "memory");
+ asm volatile ("" : : "r" (e), "r" (f), "r" (g) : "memory");
+ return 0;
+}
+
+__attribute__ ((noinline, noclone)) int
+f10 (void *a, int b, long long c, long long d, long long e, unsigned char f, unsigned char *g, int *h, int *i)
+{
+ asm volatile ("" : : "r" (a), "r" (b), "r" (c), "r" (d) : "memory");
+ asm volatile ("" : : "r" (e), "r" (f), "r" (g), "r" (h) : "memory");
+ asm volatile ("" : : "r" (i) : "memory");
+ return 0;
+}
+
+__attribute__ ((noinline, noclone)) int
+f11 (void *a, int b, long long c, long long d, long long e, long f, long *g, int *h, int *i)
+{
+ asm volatile ("" : : "r" (a), "r" (b), "r" (c), "r" (d) : "memory");
+ asm volatile ("" : : "r" (e), "r" (f), "r" (g), "r" (h) : "memory");
+ asm volatile ("" : : "r" (i) : "memory");
+ return 0;
+}
+
+__attribute__ ((noinline, noclone)) int
+f12 (void *a, int b, long long c, long long d, long long e, float f, float *g, int *h, int *i)
+{
+ asm volatile ("" : : "r" (a), "r" (b), "r" (c), "r" (d) : "memory");
+ asm volatile ("" : : "r" (e), "r" (f), "r" (g), "r" (h) : "memory");
+ asm volatile ("" : : "r" (i) : "memory");
+ return 0;
+}
+
+__attribute__ ((noinline, noclone)) int
+f13 (void *a, int b, long long c, long *d, long *e, int *f)
+{
+ asm volatile ("" : : "r" (a), "r" (b), "r" (c), "r" (d) : "memory");
+ asm volatile ("" : : "r" (e), "r" (f) : "memory");
+ return 0;
+}
+
+__attribute__ ((noinline, noclone)) int
+f14 (void *a, int b, int *c, int *d)
+{
+ asm volatile ("" : : "r" (a), "r" (b), "r" (c), "r" (d) : "memory");
+ return 0;
+}
+
+volatile int a;
+
+int
+main ()
+{
+ int b, c, d = 0, e, f = 0;
+ long g, h;
+ int j = 0;
+ long k, l;
+ int m;
+ unsigned char n[21];
+ long o[21];
+ float p[21];
+ double q[21];
+ long r[3], s = 0;
+ char t[42];
+ char u[21];
+ char *v[3];
+ const char *w[21];
+ double x[3] = { 15.1515151515151515, 16.1616161616161616, 17.1717171717171717 };
+ char y[40], z[81];
+ void *a2;
+ char *b2[10], *c2[10], *d2[10];
+ char e2[] = "abcdefghijklm";
+ for (g = 0; g < 21; g++)
+ w[g] = "";
+ f1 (&a2, y, &b);
+ if (b)
+ goto lab;
+ c = 32;
+ if (f2 (a2, c, d, r, &b) > 0)
+ __builtin_strcpy (z, "12345678901234567890123478901234567");
+ if (f3 (a2, &b) > 0)
+ goto lab;
+ if (f4 (a2, "abcdefg", 1, j, x, 14, v, &b) > 0)
+ goto lab;
+ for (g = 0; g < a; g++)
+ goto lab;
+ f5 (a2, s, f, b2, c2, d2, e2, 0L, &b);
+ if (f6 (a2, -1, &e, &b) > 0)
+ goto lab;
+ if (b > 0)
+ goto lab;
+ if (f6 (a2, 1, &e, &b) > 0)
+ goto lab;
+ f7 (a2, 8, g, 1, g, q, &b);
+ for (g = 1; g <= 20; g++)
+ {
+ for (h = 0; h < g; h++)
+ {
+ t[h] = 0;
+ q[h] = 0;
+ }
+ f8 (a2, 1, g, 1, 1, u, w, &m, &b);
+ for (h = 0; h < g; h++)
+ baz (" %2d", t[h]);
+ baz (" %d\nX", b);
+ f9 (a2, 3, g, 1, g, t, &b);
+ for (h = 0; h < g; h++)
+ baz (" %2d", t[h]);
+ f10 (a2, 4, g, 1, g, 99, n, &m, &b);
+ f11 (a2, 6, g, 1, g, 99, o, &m, &b);
+ f12 (a2, 7, g, 1, g, 99., p, &m, &b);
+ f13 (a2, 8, g, &k, &l, &b);
+ }
+ f14 (a2, 1, &e, &b);
+lab:
+ return 0;
+}