This is the third piece of my effort to improve inline expansion of memmove. The
first two parts I posted back in June fixed the names of the optab entries
involved so that optab cpymem is used for memcpy() and optab movmem is used for
memmove(). This piece adds support for actually attempting to invoke the movmem
optab to do inline expansion of __builtin_memmove().

Because what needs to be done for memmove() is very similar to memcpy(), I have
just added a bool parm "might_overlap" to several of the functions involved so
the same functions can handle both. The name might_overlap comes from the fact
that if we still have a memmove() call at expand, this means
gimple_fold_builtin_memory_op() was not able to prove that the source and
destination do not overlap.

There are a few places where might_overlap gets used to keep us from trying to
use the by-pieces infrastructure or generate a copy loop, as neither of those
things will work correctly if source and destination overlap.

I've restructured things slightly in emit_block_move_hints() so that we can
try the pattern first if we already know that by-pieces won't work. This way
we can bail out immediately in the might_overlap case.

Bootstrap/regtest passed on ppc64le, in progress on x86_64. If everything 
passes,
is this ok for trunk?


2019-09-27  Aaron Sawdey <acsaw...@linux.ibm.com>

        * builtins.c (expand_builtin_memory_copy_args): Add might_overlap parm.
        (expand_builtin_memcpy): Use might_overlap parm.
        (expand_builtin_mempcpy_args): Use might_overlap parm.
        (expand_builtin_memmove): Call expand_builtin_memory_copy_args.
        (expand_builtin_memory_copy_args): Add might_overlap parm.
        * expr.c (emit_block_move_via_cpymem): Rename to
        emit_block_move_via_pattern, add might_overlap parm, use cpymem
        or movmem optab as appropriate.
        (emit_block_move_hints): Add might_overlap parm, do the right
        thing for might_overlap==true.
        * expr.h (emit_block_move_hints): Update prototype.




Index: gcc/builtins.c
===================================================================
--- gcc/builtins.c      (revision 276131)
+++ gcc/builtins.c      (working copy)
@@ -127,7 +127,8 @@
 static rtx expand_builtin_memcpy (tree, rtx);
 static rtx expand_builtin_memory_copy_args (tree dest, tree src, tree len,
                                            rtx target, tree exp,
-                                           memop_ret retmode);
+                                           memop_ret retmode,
+                                           bool might_overlap);
 static rtx expand_builtin_memmove (tree, rtx);
 static rtx expand_builtin_mempcpy (tree, rtx);
 static rtx expand_builtin_mempcpy_args (tree, tree, tree, rtx, tree, 
memop_ret);
@@ -3790,7 +3791,7 @@
   check_memop_access (exp, dest, src, len);

   return expand_builtin_memory_copy_args (dest, src, len, target, exp,
-                                         /*retmode=*/ RETURN_BEGIN);
+                                         /*retmode=*/ RETURN_BEGIN, false);
 }

 /* Check a call EXP to the memmove built-in for validity.
@@ -3797,7 +3798,7 @@
    Return NULL_RTX on both success and failure.  */

 static rtx
-expand_builtin_memmove (tree exp, rtx)
+expand_builtin_memmove (tree exp, rtx target)
 {
   if (!validate_arglist (exp,
                         POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
@@ -3809,7 +3810,8 @@

   check_memop_access (exp, dest, src, len);

-  return NULL_RTX;
+  return expand_builtin_memory_copy_args (dest, src, len, target, exp,
+                                         /*retmode=*/ RETURN_BEGIN, true);
 }

 /* Expand a call EXP to the mempcpy builtin.
@@ -3858,7 +3860,8 @@

 static rtx
 expand_builtin_memory_copy_args (tree dest, tree src, tree len,
-                                rtx target, tree exp, memop_ret retmode)
+                                rtx target, tree exp, memop_ret retmode,
+                                bool might_overlap)
 {
   const char *src_str;
   unsigned int src_align = get_pointer_alignment (src);
@@ -3894,10 +3897,11 @@
                        &probable_max_size);
   src_str = c_getstr (src);

-  /* If SRC is a string constant and block move would be done
-     by pieces, we can avoid loading the string from memory
-     and only stored the computed constants.  */
-  if (src_str
+  /* If SRC is a string constant and block move would be done by
+     pieces, we can avoid loading the string from memory and only
+     stored the computed constants.  I'm not sure if the by pieces
+     method works if src/dest are overlapping, so avoid that case.  */
+  if (src_str && !might_overlap
       && CONST_INT_P (len_rtx)
       && (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1
       && can_store_by_pieces (INTVAL (len_rtx), builtin_memcpy_read_str,
@@ -3922,7 +3926,7 @@
       && (retmode == RETURN_BEGIN || target == const0_rtx))
     method = BLOCK_OP_TAILCALL;
   bool use_mempcpy_call = (targetm.libc_has_fast_function (BUILT_IN_MEMPCPY)
-                          && retmode == RETURN_END
+                          && retmode == RETURN_END && !might_overlap
                           && target != const0_rtx);
   if (use_mempcpy_call)
     method = BLOCK_OP_NO_LIBCALL_RET;
@@ -3929,7 +3933,7 @@
   dest_addr = emit_block_move_hints (dest_mem, src_mem, len_rtx, method,
                                     expected_align, expected_size,
                                     min_size, max_size, probable_max_size,
-                                    use_mempcpy_call, &is_move_done);
+                                    use_mempcpy_call, &is_move_done, 
might_overlap);

   /* Bail out when a mempcpy call would be expanded as libcall and when
      we have a target that provides a fast implementation
@@ -3962,7 +3966,7 @@
                             rtx target, tree orig_exp, memop_ret retmode)
 {
   return expand_builtin_memory_copy_args (dest, src, len, target, orig_exp,
-                                         retmode);
+                                         retmode, false);
 }

 /* Expand into a movstr instruction, if one is available.  Return NULL_RTX if
Index: gcc/expr.c
===================================================================
--- gcc/expr.c  (revision 276131)
+++ gcc/expr.c  (working copy)
@@ -73,9 +73,10 @@
 int cse_not_expected;

 static bool block_move_libcall_safe_for_call_parm (void);
-static bool emit_block_move_via_cpymem (rtx, rtx, rtx, unsigned, unsigned, 
HOST_WIDE_INT,
-                                       unsigned HOST_WIDE_INT, unsigned 
HOST_WIDE_INT,
-                                       unsigned HOST_WIDE_INT);
+static bool emit_block_move_via_pattern (rtx, rtx, rtx, unsigned, unsigned,
+                                        HOST_WIDE_INT, unsigned HOST_WIDE_INT,
+                                        unsigned HOST_WIDE_INT,
+                                        unsigned HOST_WIDE_INT, bool);
 static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned);
 static void clear_by_pieces (rtx, unsigned HOST_WIDE_INT, unsigned int);
 static rtx_insn *compress_float_constant (rtx, rtx);
@@ -1562,7 +1563,8 @@
                       unsigned HOST_WIDE_INT min_size,
                       unsigned HOST_WIDE_INT max_size,
                       unsigned HOST_WIDE_INT probable_max_size,
-                      bool bail_out_libcall, bool *is_move_done)
+                      bool bail_out_libcall, bool *is_move_done,
+                      bool might_overlap)
 {
   int may_use_call;
   rtx retval = 0;
@@ -1622,13 +1624,29 @@
       set_mem_size (y, const_size);
     }

-  if (CONST_INT_P (size) && can_move_by_pieces (INTVAL (size), align))
+  bool pieces_ok = can_move_by_pieces (INTVAL (size), align);
+  bool pattern_ok = false;
+
+  if (!CONST_INT_P (size) || !pieces_ok || might_overlap)
+    {
+      pattern_ok =
+       emit_block_move_via_pattern (x, y, size, align,
+                                    expected_align, expected_size,
+                                    min_size, max_size, probable_max_size,
+                                    might_overlap);
+      if (!pattern_ok && might_overlap)
+       {
+         /* Do not try any of the other methods below as they are not safe
+            for overlapping moves.  */
+         *is_move_done = false;
+         return retval;
+       }
+    }
+
+  if (pattern_ok) ;
+  else if (CONST_INT_P (size) && can_move_by_pieces (INTVAL (size), align))
     move_by_pieces (x, y, INTVAL (size), align, RETURN_BEGIN);
-  else if (emit_block_move_via_cpymem (x, y, size, align,
-                                      expected_align, expected_size,
-                                      min_size, max_size, probable_max_size))
-    ;
-  else if (may_use_call
+  else if (may_use_call && !might_overlap
           && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))
           && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (y)))
     {
@@ -1645,7 +1663,8 @@
       retval = emit_block_copy_via_libcall (x, y, size,
                                            method == BLOCK_OP_TAILCALL);
     }
-
+  else if (might_overlap)
+    *is_move_done = false;
   else
     emit_block_move_via_loop (x, y, size, align);

@@ -1721,15 +1740,25 @@
   return true;
 }

-/* A subroutine of emit_block_move.  Expand a cpymem pattern;
-   return true if successful.  */
+/* A subroutine of emit_block_move.  Expand a cpymem or movmem pattern;
+   return true if successful.
+
+   X is the destination of the copy or move.
+   Y is the source of the copy or move.
+   SIZE is the size of the block to be moved.

+   MIGHT_OVERLAP indicates this originated with expansion of a
+   builtin_memmove() and the source and destination blocks may
+   overlap.
+  */
+
 static bool
-emit_block_move_via_cpymem (rtx x, rtx y, rtx size, unsigned int align,
-                           unsigned int expected_align, HOST_WIDE_INT 
expected_size,
-                           unsigned HOST_WIDE_INT min_size,
-                           unsigned HOST_WIDE_INT max_size,
-                           unsigned HOST_WIDE_INT probable_max_size)
+emit_block_move_via_pattern (rtx x, rtx y, rtx size, unsigned int align,
+                            unsigned int expected_align, HOST_WIDE_INT 
expected_size,
+                            unsigned HOST_WIDE_INT min_size,
+                            unsigned HOST_WIDE_INT max_size,
+                            unsigned HOST_WIDE_INT probable_max_size,
+                            bool might_overlap)
 {
   if (expected_align < align)
     expected_align = align;
@@ -1752,7 +1781,11 @@
   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
     {
       scalar_int_mode mode = mode_iter.require ();
-      enum insn_code code = direct_optab_handler (cpymem_optab, mode);
+      enum insn_code code;
+      if (might_overlap)
+       code = direct_optab_handler (movmem_optab, mode);
+      else
+       code = direct_optab_handler (cpymem_optab, mode);

       if (code != CODE_FOR_nothing
          /* We don't need MODE to be narrower than BITS_PER_HOST_WIDE_INT
Index: gcc/expr.h
===================================================================
--- gcc/expr.h  (revision 276131)
+++ gcc/expr.h  (working copy)
@@ -116,7 +116,8 @@
                                  unsigned HOST_WIDE_INT,
                                  unsigned HOST_WIDE_INT,
                                  bool bail_out_libcall = false,
-                                 bool *is_move_done = NULL);
+                                 bool *is_move_done = NULL,
+                                 bool might_overlap = false);
 extern rtx emit_block_cmp_hints (rtx, rtx, rtx, tree, rtx, bool,
                                 by_pieces_constfn, void *);
 extern bool emit_storent_insn (rtx to, rtx from);

-- 
Aaron Sawdey, Ph.D.  acsaw...@linux.vnet.ibm.com
050-2/C113  (507) 253-7520 home: 507/263-0782
IBM Linux Technology Center - PPC Toolchain

Reply via email to