On Thu, 26 Feb 2026 at 9:18 Richard Biener <[email protected]>
wrote:

> On Wed, Feb 25, 2026 at 11:03 PM Netanel Komm <[email protected]>
> wrote:
> >
> > This patch allows the GIMPLE folder to transform __builtin_mempcpy into
> __builtin_memcpy
> > in cases where the return value is ignored. This is beneficial because
> most targets have
> > an efficient implementation for memcpy.
> >
> > Existing tests that relied on the unfolded mempcpy have been duplicated
> - one version now
> > takes the folded mempcpy into account, and the other intentionally
> prevents the folding
> > from happening.
> >
> > Bootstrapped and regression tested on x86_64-linux-gnu.
>
> LGTM, but this has to wait for stage1.  One nit below
>
> >         PR tree-optimization/93556
> >
> > gcc/ChangeLog:
> >
> >         * gimple-fold.cc (gimple_fold_builtin_mempcpy): New function.
> >         (gimple_fold_builtin): Handle BUILT_IN_MEMPCPY.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.dg/pr79223.c: Rename to gcc.dg/pr79223-1.c and update
> scans.
> >         * gcc.dg/tree-prof/val-prof-7.c: Rename to
> gcc.dg/tree-prof/val-prof-7-1.c and update scans.
> >         * gcc.dg/tree-ssa/builtins-folding-gimple-3.c: Update scans.
> >         * gcc.dg/builtin-mempcpy-1.c: New test.
> >         * gcc.dg/builtin-mempcpy-2.c: New test.
> >         * gcc.dg/pr79223-2.c: New test.
> >         * gcc.dg/tree-prof/val-prof-7-2.c: New test.
> >         * gcc.dg/tree-ssa/builtins-folding-gimple-4.c: New test.
> >
> > Signed-off-by: Netanel Komm <[email protected]>
> > ---
> >  gcc/gimple-fold.cc                            | 24 +++++-
> >  gcc/testsuite/gcc.dg/builtin-mempcpy-1.c      |  9 ++
> >  gcc/testsuite/gcc.dg/builtin-mempcpy-2.c      | 39 +++++++++
> >  .../gcc.dg/{pr79223.c => pr79223-1.c}         |  2 +-
> >  gcc/testsuite/gcc.dg/pr79223-2.c              | 38 +++++++++
> >  gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c | 82 +++++++++++++++++++
> >  .../{val-prof-7.c => val-prof-7-2.c}          |  2 +-
> >  .../tree-ssa/builtins-folding-gimple-3.c      |  5 +-
> >  .../tree-ssa/builtins-folding-gimple-4.c      | 45 ++++++++++
> >  9 files changed, 240 insertions(+), 6 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.dg/builtin-mempcpy-1.c
> >  create mode 100644 gcc/testsuite/gcc.dg/builtin-mempcpy-2.c
> >  rename gcc/testsuite/gcc.dg/{pr79223.c => pr79223-1.c} (86%)
> >  create mode 100644 gcc/testsuite/gcc.dg/pr79223-2.c
> >  create mode 100644 gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c
> >  rename gcc/testsuite/gcc.dg/tree-prof/{val-prof-7.c => val-prof-7-2.c}
> (97%)
> >  create mode 100644
> gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-4.c
> >
> > diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
> > index bc8540a8c5c..f870a7feb51 100644
> > --- a/gcc/gimple-fold.cc
> > +++ b/gcc/gimple-fold.cc
> > @@ -3339,6 +3339,24 @@ gimple_fold_builtin_stpcpy (gimple_stmt_iterator
> *gsi)
> >    return true;
> >  }
> >
> > +static bool
> > +gimple_fold_builtin_mempcpy (gimple_stmt_iterator *gsi)
> > +{
> > +  gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
> > +
> > +  if (gimple_call_lhs (stmt) == NULL_TREE)
> > +    {
> > +      tree fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
>
> I think you can use builtin_decl_explicit as we require memcpy
> to be available.
>
> > +      if (!fn)
> > +       return false;
> > +      gimple_call_set_fndecl (stmt, fn);
> > +      fold_stmt (gsi);
> > +      return true;
> > +    }
> > +
> > +  return false;
> > +}
> > +
> >  /* Fold a call EXP to {,v}snprintf having NARGS passed as ARGS.  Return
> >     NULL_TREE if a normal call should be emitted rather than expanding
> >     the function inline.  FCODE is either BUILT_IN_SNPRINTF_CHK or
> > @@ -5387,8 +5405,12 @@ gimple_fold_builtin (gimple_stmt_iterator *gsi)
> >        return gimple_fold_builtin_memset (gsi,
> >                                          gimple_call_arg (stmt, 1),
> >                                          gimple_call_arg (stmt, 2));
> > -    case BUILT_IN_MEMCPY:
> >      case BUILT_IN_MEMPCPY:
> > +      if (gimple_fold_builtin_memory_op (gsi, gimple_call_arg (stmt, 0),
> > +                                           gimple_call_arg (stmt, 1),
> fcode))
> > +       return true;
> > +      return gimple_fold_builtin_mempcpy (gsi);
> > +    case BUILT_IN_MEMCPY:
> >      case BUILT_IN_MEMMOVE:
> >        return gimple_fold_builtin_memory_op (gsi, gimple_call_arg (stmt,
> 0),
> >                                             gimple_call_arg (stmt, 1),
> fcode);
> > diff --git a/gcc/testsuite/gcc.dg/builtin-mempcpy-1.c
> b/gcc/testsuite/gcc.dg/builtin-mempcpy-1.c
> > new file mode 100644
> > index 00000000000..19d9a224657
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/builtin-mempcpy-1.c
> > @@ -0,0 +1,9 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-fdump-tree-lower" } */
> > +
> > +/* Basic MRE from bug report */
> > +void test_bare (void *d, const void *s, __SIZE_TYPE__ n) {
> > +  __builtin_mempcpy (d, s, n);
> > +}
> > +
> > +/* { dg-final { scan-tree-dump "__builtin_memcpy" "lower" } } */
> > diff --git a/gcc/testsuite/gcc.dg/builtin-mempcpy-2.c
> b/gcc/testsuite/gcc.dg/builtin-mempcpy-2.c
> > new file mode 100644
> > index 00000000000..65f80f97f4f
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/builtin-mempcpy-2.c
> > @@ -0,0 +1,39 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O1 -fdump-tree-optimized" } */
> > +
> > +/* Indirectly unused result */
> > +void test_unused_indirect (void *d, const void *s, __SIZE_TYPE__ n) {
> > +  void *a = __builtin_mempcpy (d, s, n);
> > +  void *b = a;
> > +}
> > +
> > +/* Simple used result (in statement) */
> > +void *test_used_simple (void *d, const void *s, __SIZE_TYPE__ n) {
> > +  return __builtin_mempcpy (d, s, n);
> > +}
> > +
> > +/* More complicated used result (in expression) */
> > +__SIZE_TYPE__ test_used_in_expr (char *d, const char *s, __SIZE_TYPE__
> n) {
> > +  return (char *)__builtin_mempcpy (d, s, n) - d;
> > +}
> > +
> > +/* Unused in all paths */
> > +void *test_unused_indirect2 (void *d, const void *s, __SIZE_TYPE__ n) {
> > +  void *a = __builtin_mempcpy (d, s, n);
> > +  if (n > 20) {
> > +       return (void *)20;
> > +  }
> > +  return (void *)7;
> > +}
> > +
> > +/* Used in at least one path */
> > +void *test_maybe_used (void *d, const void *s, __SIZE_TYPE__ n) {
> > +  void *a = __builtin_mempcpy (d, s, n);
> > +  if (n > 20) {
> > +    return a;
> > +  }
> > +  return (void *)0;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "__builtin_memcpy" 2 "optimized" }
> } */
> > +/* { dg-final { scan-tree-dump-times "__builtin_mempcpy" 3 "optimized"
> } } */
> > diff --git a/gcc/testsuite/gcc.dg/pr79223.c
> b/gcc/testsuite/gcc.dg/pr79223-1.c
> > similarity index 86%
> > rename from gcc/testsuite/gcc.dg/pr79223.c
> > rename to gcc/testsuite/gcc.dg/pr79223-1.c
> > index ef0dd1b7bc5..099d18333e8 100644
> > --- a/gcc/testsuite/gcc.dg/pr79223.c
> > +++ b/gcc/testsuite/gcc.dg/pr79223-1.c
> > @@ -28,7 +28,7 @@ void test_memcpy (void)
> >
> >  void test_mempcpy (void)
> >  {
> > -  mempcpy (d, s, range ());   /* { dg-warning ".mempcpy. writing 4 or
> more bytes into a region of size 3 overflows the destination" } */
> > +  mempcpy (d, s, range ());   /* { dg-warning ".memcpy. writing 4 or
> more bytes into a region of size 3 overflows the destination" } */
> >  }
> >
> >  void test_memmove (void)
> > diff --git a/gcc/testsuite/gcc.dg/pr79223-2.c
> b/gcc/testsuite/gcc.dg/pr79223-2.c
> > new file mode 100644
> > index 00000000000..6e78b326671
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/pr79223-2.c
> > @@ -0,0 +1,38 @@
> > +/* PR middle-end/79223 - missing -Wstringop-overflow on a memmove
> overflow
> > +   { dg-do compile }
> > +   { dg-additional-options "-O2 -Wall -Wno-array-bounds -std=gnu99" }
> */
> > +
> > +typedef __SIZE_TYPE__ size_t;
> > +
> > +extern void* memcpy (void*, const void*, size_t);
> > +extern void* mempcpy (void*, const void*, size_t);
> > +extern void* memmove (void*, const void*, size_t);
> > +
> > +char d[3];
> > +char s[4];
> > +void *sink;
> > +
> > +size_t range (void)
> > +{
> > +  extern size_t size ();
> > +  size_t n = size ();
> > +  if (n <= sizeof d)
> > +    return sizeof d + 1;
> > +
> > +  return n;
> > +}
> > +
> > +void test_memcpy (void)
> > +{
> > +  memcpy (d, s, range ());   /* { dg-warning ".memcpy. writing 4 or
> more bytes into a region of size 3 overflows the destination" } */
> > +}
> > +
> > +void test_mempcpy (void)
> > +{
> > +  sink = mempcpy (d, s, range ());   /* { dg-warning ".mempcpy. writing
> 4 or more bytes into a region of size 3 overflows the destination" } */
> > +}
> > +
> > +void test_memmove (void)
> > +{
> > +  memmove (d + 1, d, range ());   /* { dg-warning ".memmove. writing 4
> or more bytes into a region of size 2 overflows the destination" } */
> > +}
> > diff --git a/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c
> b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c
> > new file mode 100644
> > index 00000000000..321eefa8207
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c
> > @@ -0,0 +1,82 @@
> > +/* { dg-options "-O2 -fdump-ipa-profile-optimized -mtune=core2" } */
> > +/* { dg-skip-if "" { ! { i?86-*-* x86_64-*-* } } } */
> > +
> > +char *buffer1;
> > +char *buffer2;
> > +
> > +/* Bzero is not tested because it gets transformed into memset.  */
> > +
> > +#define DEFINE_TEST(N) \
> > +__attribute__((noinline)) \
> > +void memcpy_test_ ## N (int len) \
> > +{ \
> > +  __builtin_memcpy (buffer1, buffer2, len); \
> > +} \
> > + \
> > +__attribute__((noinline)) \
> > +void mempcpy_test_ ## N (int len) \
> > +{ \
> > +  __builtin_mempcpy (buffer1, buffer2, len); \
> > +} \
> > + \
> > +__attribute__((noinline)) \
> > +void memset_test_ ## N (int len) \
> > +{ \
> > +  __builtin_memset (buffer1, 'c', len); \
> > +} \
> > +__attribute__((noinline)) \
> > +void memmove_test_ ## N (int len) \
> > +{ \
> > +  __builtin_memmove (buffer1, buffer2, len); \
> > +} \
> > + \
> > +void test_stringops_ ## N(int len) \
> > +{ \
> > +  memcpy_test_## N (len); \
> > +  mempcpy_test_ ## N (len); \
> > +  memset_test_ ## N (len); \
> > +  memmove_test_ ## N (len); \
> > +} \
> > + \
> > +void test_stringops_with_values_ ## N (int common, int not_common) \
> > +{ \
> > +  int i; \
> > +  for (i = 0; i < 1000; i++) \
> > +    { \
> > +      if (i > 990) \
> > +       test_stringops_ ## N (not_common); \
> > +      else \
> > +       test_stringops_ ## N (common); \
> > +    } \
> > +}
> > +
> > +DEFINE_TEST(0);
> > +DEFINE_TEST(1);
> > +DEFINE_TEST(2);
> > +
> > +int main() {
> > +  buffer1 = __builtin_malloc (1000);
> > +  buffer2 = __builtin_malloc (1000);
> > +
> > +  test_stringops_with_values_0 (8, 55);
> > +  test_stringops_with_values_1 (55, 55);
> > +  test_stringops_with_values_2 (257, 55);
> > +
> > +  return 0;
> > +}
> > +
> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done:
> single value 8 stringop for BUILT_IN_MEMCPY" "profile" } } */
> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done:
> single value 55 stringop for BUILT_IN_MEMCPY" "profile" } } */
> > +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation
> done: single value 32 stringop for BUILT_IN_MEMCPY" 0 "profile" } } */
> > +
> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done:
> single value 8 stringop for BUILT_IN_MEMCPY" "profile" } } */
> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done:
> single value 55 stringop for BUILT_IN_MEMCPY" "profile" } } */
> > +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation
> done: single value 32 stringop for BUILT_IN_MEMPCPY" 0 "profile" } } */
> > +
> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done:
> single value 8 stringop for BUILT_IN_MEMSET" "profile" } } */
> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done:
> single value 55 stringop for BUILT_IN_MEMSET" "profile" } } */
> > +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation
> done: single value 32 stringop for BUILT_IN_MEMSET" 0 "profile" } } */
> > +
> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done:
> single value 8 stringop for BUILT_IN_MEMMOVE" "profile" } } */
> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done:
> single value 55 stringop for BUILT_IN_MEMMOVE" "profile" } } */
> > +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation
> done: single value 32 stringop for BUILT_IN_MEMMOVE" 0 "profile" } } */
> > diff --git a/gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c
> b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-2.c
> > similarity index 97%
> > rename from gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c
> > rename to gcc/testsuite/gcc.dg/tree-prof/val-prof-7-2.c
> > index 5ddb1a88c29..1084a852fc3 100644
> > --- a/gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c
> > +++ b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-2.c
> > @@ -16,7 +16,7 @@ void memcpy_test_ ## N (int len) \
> >  __attribute__((noinline)) \
> >  void mempcpy_test_ ## N (int len) \
> >  { \
> > -  __builtin_mempcpy (buffer1, buffer2, len); \
> > +  void * volatile res = __builtin_mempcpy (buffer1, buffer2, len); \
> >  } \
> >   \
> >  __attribute__((noinline)) \
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-3.c
> b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-3.c
> > index 716be5b8094..0872cc52a7f 100644
> > --- a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-3.c
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-3.c
> > @@ -38,6 +38,5 @@ AR (memmove, int,   2, INT_MAX, 1);
> >  AR (mempcpy, short, 2, SHRT_MAX, 1);
> >  AR (mempcpy, int,   2, INT_MAX, 1);
> >
> > -/* { dg-final { scan-tree-dump-times "builtin_memcpy" 8 "optimized" } }
> > -   { dg-final { scan-tree-dump-times "builtin_memmove" 2 "optimized" } }
> > -   { dg-final { scan-tree-dump-times "builtin_mempcpy" 2 "optimized" }
> }  */
> > +/* { dg-final { scan-tree-dump-times "builtin_memcpy" 10 "optimized" } }
> > +   { dg-final { scan-tree-dump-times "builtin_memmove" 2 "optimized" }
> }  */
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-4.c
> b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-4.c
> > new file mode 100644
> > index 00000000000..0f0c577c64a
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-4.c
> > @@ -0,0 +1,45 @@
> > +/* PR 81908 - FAIL: gfortran.dg/alloc_comp_auto_array_2.f90 -O3 -g -m32
> > +   Test to verify that calls to memcpy et al. where the size is in a
> range
> > +   with more than one valid value are not eliminated (this test
> complements
> > +   builtins-folding-gimple-2.c).
> > +   { dg-do compile }
> > +   { dg-options "-O2 -Wall -fdump-tree-optimized" } */
> > +
> > +#define SHRT_MAX   __SHRT_MAX__
> > +#define SHRT_MIN   (-SHRT_MAX - 1)
> > +#define INT_MAX    __INT_MAX__
> > +#define INT_MIN    (-INT_MAX - 1)
> > +
> > +#define UNIQUE_FUNCNAME(func, line) test_ ## func ## _ ## line
> > +#define FUNCNAME(func, line)        UNIQUE_FUNCNAME (func, line)
> > +
> > +void *sink;
> > +
> > +#define AR(func, type, min, max, val)                                  \
> > +  void __attribute__ ((noclone, noinline))                             \
> > +  FUNCNAME (func, __LINE__) (char *d, const char *s, type n)           \
> > +  {                                                                    \
> > +    if ((type)min <= n && n <= (type)max)                              \
> > +      n = val;                                                         \
> > +    sink = __builtin_ ## func (d, s, n);
>        \
> > +  } typedef void DummyType
> > +
> > +AR (memcpy, short, SHRT_MIN, 0, 1);
> > +AR (memcpy, short, SHRT_MIN, 1, 2);
> > +AR (memcpy, short, 2, SHRT_MAX, 1);
> > +
> > +AR (memcpy, int, INT_MIN, 0, 1);
> > +AR (memcpy, int, INT_MIN, 1, 2);
> > +AR (memcpy, int, INT_MIN, 2, 3);
> > +AR (memcpy, int, 2, INT_MAX, 1);
> > +AR (memcpy, int, 2, INT_MAX, 1);
> > +
> > +AR (memmove, short, 2, SHRT_MAX, 1);
> > +AR (memmove, int,   2, INT_MAX, 1);
> > +
> > +AR (mempcpy, short, 2, SHRT_MAX, 1);
> > +AR (mempcpy, int,   2, INT_MAX, 1);
> > +
> > +/* { dg-final { scan-tree-dump-times "builtin_memcpy" 8 "optimized" } }
> > +   { dg-final { scan-tree-dump-times "builtin_memmove" 2 "optimized" } }
> > +   { dg-final { scan-tree-dump-times "builtin_mempcpy" 2 "optimized" }
> }  */
> > --
> > 2.30.2
> >


Thanks for the review, I'll change to builtin_decl_explicit and send a V2
when Stage 1 opens.


Netanel

Reply via email to