On 8/7/23 08:33, Manolis Tsamis wrote:
This is a new RTL pass that tries to optimize memory offset calculations
by moving them from add immediate instructions to the memory loads/stores.
For example it can transform this:

   addi t4,sp,16
   add  t2,a6,t4
   shl  t3,t2,1
   ld   a2,0(t3)
   addi a2,1
   sd   a2,8(t2)

into the following (one instruction less):

   add  t2,a6,sp
   shl  t3,t2,1
   ld   a2,32(t3)
   addi a2,1
   sd   a2,24(t2)

Although there are places where this is done already, this pass is more
powerful and can handle the more difficult cases that are currently not
optimized. Also, it runs late enough and can optimize away unnecessary
stack pointer calculations.

gcc/ChangeLog:

        * Makefile.in: Add fold-mem-offsets.o.
        * passes.def: Schedule a new pass.
        * tree-pass.h (make_pass_fold_mem_offsets): Declare.
        * common.opt: New options.
        * doc/invoke.texi: Document new option.
        * fold-mem-offsets.cc: New file.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/fold-mem-offsets-1.c: New test.
        * gcc.target/riscv/fold-mem-offsets-2.c: New test.
        * gcc.target/riscv/fold-mem-offsets-3.c: New test.
We still have the m68k issue to deal with.

We've got these key insns going into fold-mem-offsets:

(insn 39 38 41 3 (set (reg:SI 13 %a5 [orig:36 _14 ] [36])
        (plus:SI (reg:SI 10 %a2 [47])
            (const_int 1 [0x1]))) "j.c":19:3 157 {*addsi3_internal}
     (nil))
(insn 41 39 42 3 (set (reg:SI 8 %a0 [61])
        (plus:SI (reg/f:SI 12 %a4 [52])
            (reg:SI 13 %a5 [orig:36 _14 ] [36]))) "j.c":19:3 discrim 1 157 
{*addsi3_internal}
     (nil))
(insn 42 41 43 3 (set (mem:SI (reg:SI 8 %a0 [61]) [0 MEM <char[1:5]> [(void 
*)_15]+0 S4 A8])
        (const_int 1633837924 [0x61626364])) "j.c":19:3 discrim 1 55 
{*movsi_m68k2}
     (nil))
(insn 43 42 45 3 (set (mem:QI (plus:SI (reg:SI 8 %a0 [61])
                (const_int 4 [0x4])) [0 MEM <char[1:5]> [(void *)_15]+4 S1 A8])
        (const_int 101 [0x65])) "j.c":19:3 discrim 1 62 {*m68k.md:1130}
     (expr_list:REG_DEAD (reg:SI 8 %a0 [61])
        (nil)))

[ ... ]
(insn 58 57 59 3 (set (reg:SI 8 %a0 [72])
        (plus:SI (plus:SI (reg:SI 13 %a5 [orig:36 _14 ] [36])
                (reg:SI 11 %a3 [49]))
            (const_int 5 [0x5]))) "j.c":24:3 421 {*lea}
     (expr_list:REG_DEAD (reg:SI 13 %a5 [orig:36 _14 ] [36])
        (expr_list:REG_DEAD (reg:SI 11 %a3 [49])
            (nil))))


f-m-o will propagate the (const_int 1) from insn 39 into insns 42 & 43 and modifies insn 39 into a simple copy. Note carefully that turning insn 39 into a simple copy changes the value in a5.

As a result insn 58 computes the wrong value and the test (strlenopt-2) fails on the m68k.

In fold_offsets we have this code:

     /* We only fold through instructions that are transitively used as
         memory addresses and do not have other uses.  Use the same logic
         from offset calculation to visit instructions that can propagate
         offsets and keep track of them in CAN_FOLD_INSNS.  */
      bool success;
      struct df_link *uses = get_uses (def, dest, &success), *ref_link;

      if (!success)
        return 0;

      for (ref_link = uses; ref_link; ref_link = ref_link->next)
        {
          rtx_insn* use = DF_REF_INSN (ref_link->ref);

          if (DEBUG_INSN_P (use))
            continue;

          /* Punt if the use is anything more complicated than a set
             (clobber, use, etc).  */
          if (!NONJUMP_INSN_P (use) || GET_CODE (PATTERN (use)) != SET)
            return 0;

          /* This use affects instructions outside of CAN_FOLD_INSNS.  */
          if (!bitmap_bit_p (&can_fold_insns, INSN_UID (use)))
            return 0;

          rtx use_set = PATTERN (use);

          /* Special case: A foldable memory store is not foldable if it
             mentions DEST outside of the address calculation.  */
          if (use_set && MEM_P (SET_DEST (use_set))
              && reg_mentioned_p (dest, SET_SRC (use_set)))
            return 0;
        }

AFAICT that code is supposed to detect if there are uses outside of the set of insns that can be optimized. In our case DEF is a0 (the base of the memory references) and its def is insn 41. The assignment of a0 in insn 41 only reaches insns 42 and 43 which are marked in can_fold_insns.

That's find and good, but insufficient for correctness. ISTM we must look at the uses of any insn where we change the result of the computation, not just the def of the base register in the memory reference.

In this particular case we're going to modify insn 39, so we need to look at the uses of a5 (the value defined by insn 39). If we did that we'd see the use of a5 at insn 38 and insn 38 and reject the optimization.



Testcase below. You should be able to see the incorrect transformation with a m68k-linux-gnu cross compiler with -O2.



typedef unsigned int size_t;
extern void abort (void);
void *calloc (size_t, size_t);
void *malloc (size_t);
void free (void *);
char *strdup (const char *);
size_t strlen (const char *);
size_t strnlen (const char *, size_t);
void *memcpy (void *__restrict, const void *__restrict, size_t);
void *memmove (void *, const void *, size_t);
char *strcpy (char *__restrict, const char *__restrict);
char *strcat (char *__restrict, const char *__restrict);
char *strchr (const char *, int);
int strcmp (const char *, const char *);
int strncmp (const char *, const char *, size_t);
void *memset (void *, int, size_t);
int memcmp (const void *, const void *, size_t);
int strcmp (const char *, const char *);
int sprintf (char * __restrict, const char *__restrict, ...);
int snprintf (char * __restrict, size_t, const char *__restrict, ...);
__attribute__((noinline, noclone)) char *
foo (char *p, char *r)
{
  char buf[26];
  if (strlen (p) + strlen (r) + 9 > 26)
    return ((void *) 0);
  strcpy (buf, p);
  strcat (buf, "/");
  strcat (buf, "abcde");
  strcat (buf, r);
  strcat (buf, "fg");
  return strdup (buf);
}
int
main ()
{
  char *volatile p = "string1";
  char *volatile r = "string2";
  char *q = foo (p, r);
  if (q != ((void *) 0))
    {
      if (strcmp (q, "string1/abcdestring2fg"))
 abort ();
      free (q);
    }
  return 0;
}
~


Reply via email to