The BPF backend inline memmove expansion was broken for certain
constructs.  This patch addresses the two underlying issues:
1. Off-by-one in the "backwards" unrolled move loop offset.
2. Poor use of temporary register for the generated move loop, which
   could result in some of the loads performing the move to be optimized
   away when the source and destination of the memmove are based off of
   the same pointer.

Tested on x86_64-linux-gnu host for bpf-unknown-none target.
Also tested with Linux kernel bpf selftests, no regressions found.

gcc/

        PR target/122140
        * config/bpf/bpf.cc (bpf_expand_cpymem): Fix off-by-one offset
        in backwards loop.  Improve src and dest addrs used for the
        branch condition.
        (emit_move_loop): Improve emitted set insns and remove the
        explict temporary register.

---
 gcc/config/bpf/bpf.cc | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
index a28018b3367..39168f689ac 100644
--- a/gcc/config/bpf/bpf.cc
+++ b/gcc/config/bpf/bpf.cc
@@ -1252,13 +1252,11 @@ static void
 emit_move_loop (rtx src, rtx dst, machine_mode mode, int offset, int inc,
                unsigned iters, unsigned remainder)
 {
-  rtx reg = gen_reg_rtx (mode);
-
   /* First copy in chunks as large as alignment permits.  */
   for (unsigned int i = 0; i < iters; i++)
     {
-      emit_move_insn (reg, adjust_address (src, mode, offset));
-      emit_move_insn (adjust_address (dst, mode, offset), reg);
+      emit_insn (gen_rtx_SET (adjust_address (dst, mode, offset),
+                             adjust_address (src, mode, offset)));
       offset += inc;
     }
 
@@ -1266,22 +1264,22 @@ emit_move_loop (rtx src, rtx dst, machine_mode mode, 
int offset, int inc,
      used above.  */
   if (remainder & 4)
     {
-      emit_move_insn (reg, adjust_address (src, SImode, offset));
-      emit_move_insn (adjust_address (dst, SImode, offset), reg);
+      emit_insn (gen_rtx_SET (adjust_address (dst, SImode, offset),
+                             adjust_address (src, SImode, offset)));
       offset += (inc < 0 ? -4 : 4);
       remainder -= 4;
     }
   if (remainder & 2)
     {
-      emit_move_insn (reg, adjust_address (src, HImode, offset));
-      emit_move_insn (adjust_address (dst, HImode, offset), reg);
+      emit_insn (gen_rtx_SET (adjust_address (dst, HImode, offset),
+                             adjust_address (src, HImode, offset)));
       offset += (inc < 0 ? -2 : 2);
       remainder -= 2;
     }
   if (remainder & 1)
     {
-      emit_move_insn (reg, adjust_address (src, QImode, offset));
-      emit_move_insn (adjust_address (dst, QImode, offset), reg);
+      emit_insn (gen_rtx_SET (adjust_address (dst, QImode, offset),
+                             adjust_address (src, QImode, offset)));
     }
 }
 
@@ -1351,13 +1349,13 @@ bpf_expand_cpymem (rtx *operands, bool is_move)
       fwd_label = gen_label_rtx ();
       done_label = gen_label_rtx ();
 
-      rtx dst_addr = copy_to_mode_reg (Pmode, XEXP (dst, 0));
-      rtx src_addr = copy_to_mode_reg (Pmode, XEXP (src, 0));
+      rtx src_addr = force_operand (XEXP (src, 0), NULL_RTX);
+      rtx dst_addr = force_operand (XEXP (dst, 0), NULL_RTX);
       emit_cmp_and_jump_insns (src_addr, dst_addr, GEU, NULL_RTX, Pmode,
                               true, fwd_label, profile_probability::even ());
 
       /* Emit the "backwards" unrolled loop.  */
-      emit_move_loop (src, dst, mode, size_bytes, -inc, iters, remainder);
+      emit_move_loop (src, dst, mode, (size_bytes - 1), -inc, iters, 
remainder);
       emit_jump_insn (gen_jump (done_label));
       emit_barrier ();
 
-- 
2.51.0

Reply via email to