On Thu, Feb 26, 2026 at 5:09 AM Netanel Komm <[email protected]> wrote:
>
>
>
> On Thu, 26 Feb 2026 at 9:18 Richard Biener <[email protected]> wrote:
>>
>> On Wed, Feb 25, 2026 at 11:03 PM Netanel Komm <[email protected]> wrote:
>> >
>> > This patch allows the GIMPLE folder to transform __builtin_mempcpy into 
>> > __builtin_memcpy
>> > in cases where the return value is ignored. This is beneficial because 
>> > most targets have
>> > an efficient implementation for memcpy.
>> >
>> > Existing tests that relied on the unfolded mempcpy have been duplicated - 
>> > one version now
>> > takes the folded mempcpy into account, and the other intentionally 
>> > prevents the folding
>> > from happening.
>> >
>> > Bootstrapped and regression tested on x86_64-linux-gnu.
>>
>> LGTM, but this has to wait for stage1.  One nit below

I have one more nit below too.

>>
>> >         PR tree-optimization/93556
>> >
>> > gcc/ChangeLog:
>> >
>> >         * gimple-fold.cc (gimple_fold_builtin_mempcpy): New function.
>> >         (gimple_fold_builtin): Handle BUILT_IN_MEMPCPY.
>> >
>> > gcc/testsuite/ChangeLog:
>> >
>> >         * gcc.dg/pr79223.c: Rename to gcc.dg/pr79223-1.c and update scans.
>> >         * gcc.dg/tree-prof/val-prof-7.c: Rename to 
>> > gcc.dg/tree-prof/val-prof-7-1.c and update scans.
>> >         * gcc.dg/tree-ssa/builtins-folding-gimple-3.c: Update scans.
>> >         * gcc.dg/builtin-mempcpy-1.c: New test.
>> >         * gcc.dg/builtin-mempcpy-2.c: New test.
>> >         * gcc.dg/pr79223-2.c: New test.
>> >         * gcc.dg/tree-prof/val-prof-7-2.c: New test.
>> >         * gcc.dg/tree-ssa/builtins-folding-gimple-4.c: New test.
>> >
>> > Signed-off-by: Netanel Komm <[email protected]>
>> > ---
>> >  gcc/gimple-fold.cc                            | 24 +++++-
>> >  gcc/testsuite/gcc.dg/builtin-mempcpy-1.c      |  9 ++
>> >  gcc/testsuite/gcc.dg/builtin-mempcpy-2.c      | 39 +++++++++
>> >  .../gcc.dg/{pr79223.c => pr79223-1.c}         |  2 +-
>> >  gcc/testsuite/gcc.dg/pr79223-2.c              | 38 +++++++++
>> >  gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c | 82 +++++++++++++++++++
>> >  .../{val-prof-7.c => val-prof-7-2.c}          |  2 +-
>> >  .../tree-ssa/builtins-folding-gimple-3.c      |  5 +-
>> >  .../tree-ssa/builtins-folding-gimple-4.c      | 45 ++++++++++
>> >  9 files changed, 240 insertions(+), 6 deletions(-)
>> >  create mode 100644 gcc/testsuite/gcc.dg/builtin-mempcpy-1.c
>> >  create mode 100644 gcc/testsuite/gcc.dg/builtin-mempcpy-2.c
>> >  rename gcc/testsuite/gcc.dg/{pr79223.c => pr79223-1.c} (86%)
>> >  create mode 100644 gcc/testsuite/gcc.dg/pr79223-2.c
>> >  create mode 100644 gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c
>> >  rename gcc/testsuite/gcc.dg/tree-prof/{val-prof-7.c => val-prof-7-2.c} 
>> > (97%)
>> >  create mode 100644 
>> > gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-4.c
>> >
>> > diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
>> > index bc8540a8c5c..f870a7feb51 100644
>> > --- a/gcc/gimple-fold.cc
>> > +++ b/gcc/gimple-fold.cc
>> > @@ -3339,6 +3339,24 @@ gimple_fold_builtin_stpcpy (gimple_stmt_iterator 
>> > *gsi)
>> >    return true;
>> >  }
>> >
>> > +static bool
>> > +gimple_fold_builtin_mempcpy (gimple_stmt_iterator *gsi)

All functions should have a comment in front of them saying what it
does and what it returns when.
So this could be something like:
/* Simplify mempcpy call stmt at GSI returning true if simplified.
   Currently only handling mempcy->memcpy when the return value is ignored.  */


Thanks,
Andrew Pinski

>> > +{
>> > +  gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
>> > +
>> > +  if (gimple_call_lhs (stmt) == NULL_TREE)
>> > +    {
>> > +      tree fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
>>
>> I think you can use builtin_decl_explicit as we require memcpy
>> to be available.
>>
>> > +      if (!fn)
>> > +       return false;
>> > +      gimple_call_set_fndecl (stmt, fn);
>> > +      fold_stmt (gsi);
>> > +      return true;
>> > +    }
>> > +
>> > +  return false;
>> > +}
>> > +
>> >  /* Fold a call EXP to {,v}snprintf having NARGS passed as ARGS.  Return
>> >     NULL_TREE if a normal call should be emitted rather than expanding
>> >     the function inline.  FCODE is either BUILT_IN_SNPRINTF_CHK or
>> > @@ -5387,8 +5405,12 @@ gimple_fold_builtin (gimple_stmt_iterator *gsi)
>> >        return gimple_fold_builtin_memset (gsi,
>> >                                          gimple_call_arg (stmt, 1),
>> >                                          gimple_call_arg (stmt, 2));
>> > -    case BUILT_IN_MEMCPY:
>> >      case BUILT_IN_MEMPCPY:
>> > +      if (gimple_fold_builtin_memory_op (gsi, gimple_call_arg (stmt, 0),
>> > +                                           gimple_call_arg (stmt, 1), 
>> > fcode))
>> > +       return true;
>> > +      return gimple_fold_builtin_mempcpy (gsi);
>> > +    case BUILT_IN_MEMCPY:
>> >      case BUILT_IN_MEMMOVE:
>> >        return gimple_fold_builtin_memory_op (gsi, gimple_call_arg (stmt, 
>> > 0),
>> >                                             gimple_call_arg (stmt, 1), 
>> > fcode);
>> > diff --git a/gcc/testsuite/gcc.dg/builtin-mempcpy-1.c 
>> > b/gcc/testsuite/gcc.dg/builtin-mempcpy-1.c
>> > new file mode 100644
>> > index 00000000000..19d9a224657
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.dg/builtin-mempcpy-1.c
>> > @@ -0,0 +1,9 @@
>> > +/* { dg-do compile } */
>> > +/* { dg-options "-fdump-tree-lower" } */
>> > +
>> > +/* Basic MRE from bug report */
>> > +void test_bare (void *d, const void *s, __SIZE_TYPE__ n) {
>> > +  __builtin_mempcpy (d, s, n);
>> > +}
>> > +
>> > +/* { dg-final { scan-tree-dump "__builtin_memcpy" "lower" } } */
>> > diff --git a/gcc/testsuite/gcc.dg/builtin-mempcpy-2.c 
>> > b/gcc/testsuite/gcc.dg/builtin-mempcpy-2.c
>> > new file mode 100644
>> > index 00000000000..65f80f97f4f
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.dg/builtin-mempcpy-2.c
>> > @@ -0,0 +1,39 @@
>> > +/* { dg-do compile } */
>> > +/* { dg-options "-O1 -fdump-tree-optimized" } */
>> > +
>> > +/* Indirectly unused result */
>> > +void test_unused_indirect (void *d, const void *s, __SIZE_TYPE__ n) {
>> > +  void *a = __builtin_mempcpy (d, s, n);
>> > +  void *b = a;
>> > +}
>> > +
>> > +/* Simple used result (in statement) */
>> > +void *test_used_simple (void *d, const void *s, __SIZE_TYPE__ n) {
>> > +  return __builtin_mempcpy (d, s, n);
>> > +}
>> > +
>> > +/* More complicated used result (in expression) */
>> > +__SIZE_TYPE__ test_used_in_expr (char *d, const char *s, __SIZE_TYPE__ n) 
>> > {
>> > +  return (char *)__builtin_mempcpy (d, s, n) - d;
>> > +}
>> > +
>> > +/* Unused in all paths */
>> > +void *test_unused_indirect2 (void *d, const void *s, __SIZE_TYPE__ n) {
>> > +  void *a = __builtin_mempcpy (d, s, n);
>> > +  if (n > 20) {
>> > +       return (void *)20;
>> > +  }
>> > +  return (void *)7;
>> > +}
>> > +
>> > +/* Used in at least one path */
>> > +void *test_maybe_used (void *d, const void *s, __SIZE_TYPE__ n) {
>> > +  void *a = __builtin_mempcpy (d, s, n);
>> > +  if (n > 20) {
>> > +    return a;
>> > +  }
>> > +  return (void *)0;
>> > +}
>> > +
>> > +/* { dg-final { scan-tree-dump-times "__builtin_memcpy" 2 "optimized" } } 
>> > */
>> > +/* { dg-final { scan-tree-dump-times "__builtin_mempcpy" 3 "optimized" } 
>> > } */
>> > diff --git a/gcc/testsuite/gcc.dg/pr79223.c 
>> > b/gcc/testsuite/gcc.dg/pr79223-1.c
>> > similarity index 86%
>> > rename from gcc/testsuite/gcc.dg/pr79223.c
>> > rename to gcc/testsuite/gcc.dg/pr79223-1.c
>> > index ef0dd1b7bc5..099d18333e8 100644
>> > --- a/gcc/testsuite/gcc.dg/pr79223.c
>> > +++ b/gcc/testsuite/gcc.dg/pr79223-1.c
>> > @@ -28,7 +28,7 @@ void test_memcpy (void)
>> >
>> >  void test_mempcpy (void)
>> >  {
>> > -  mempcpy (d, s, range ());   /* { dg-warning ".mempcpy. writing 4 or 
>> > more bytes into a region of size 3 overflows the destination" } */
>> > +  mempcpy (d, s, range ());   /* { dg-warning ".memcpy. writing 4 or more 
>> > bytes into a region of size 3 overflows the destination" } */
>> >  }
>> >
>> >  void test_memmove (void)
>> > diff --git a/gcc/testsuite/gcc.dg/pr79223-2.c 
>> > b/gcc/testsuite/gcc.dg/pr79223-2.c
>> > new file mode 100644
>> > index 00000000000..6e78b326671
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.dg/pr79223-2.c
>> > @@ -0,0 +1,38 @@
>> > +/* PR middle-end/79223 - missing -Wstringop-overflow on a memmove overflow
>> > +   { dg-do compile }
>> > +   { dg-additional-options "-O2 -Wall -Wno-array-bounds -std=gnu99" }  */
>> > +
>> > +typedef __SIZE_TYPE__ size_t;
>> > +
>> > +extern void* memcpy (void*, const void*, size_t);
>> > +extern void* mempcpy (void*, const void*, size_t);
>> > +extern void* memmove (void*, const void*, size_t);
>> > +
>> > +char d[3];
>> > +char s[4];
>> > +void *sink;
>> > +
>> > +size_t range (void)
>> > +{
>> > +  extern size_t size ();
>> > +  size_t n = size ();
>> > +  if (n <= sizeof d)
>> > +    return sizeof d + 1;
>> > +
>> > +  return n;
>> > +}
>> > +
>> > +void test_memcpy (void)
>> > +{
>> > +  memcpy (d, s, range ());   /* { dg-warning ".memcpy. writing 4 or more 
>> > bytes into a region of size 3 overflows the destination" } */
>> > +}
>> > +
>> > +void test_mempcpy (void)
>> > +{
>> > +  sink = mempcpy (d, s, range ());   /* { dg-warning ".mempcpy. writing 4 
>> > or more bytes into a region of size 3 overflows the destination" } */
>> > +}
>> > +
>> > +void test_memmove (void)
>> > +{
>> > +  memmove (d + 1, d, range ());   /* { dg-warning ".memmove. writing 4 or 
>> > more bytes into a region of size 2 overflows the destination" } */
>> > +}
>> > diff --git a/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c 
>> > b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c
>> > new file mode 100644
>> > index 00000000000..321eefa8207
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c
>> > @@ -0,0 +1,82 @@
>> > +/* { dg-options "-O2 -fdump-ipa-profile-optimized -mtune=core2" } */
>> > +/* { dg-skip-if "" { ! { i?86-*-* x86_64-*-* } } } */
>> > +
>> > +char *buffer1;
>> > +char *buffer2;
>> > +
>> > +/* Bzero is not tested because it gets transformed into memset.  */
>> > +
>> > +#define DEFINE_TEST(N) \
>> > +__attribute__((noinline)) \
>> > +void memcpy_test_ ## N (int len) \
>> > +{ \
>> > +  __builtin_memcpy (buffer1, buffer2, len); \
>> > +} \
>> > + \
>> > +__attribute__((noinline)) \
>> > +void mempcpy_test_ ## N (int len) \
>> > +{ \
>> > +  __builtin_mempcpy (buffer1, buffer2, len); \
>> > +} \
>> > + \
>> > +__attribute__((noinline)) \
>> > +void memset_test_ ## N (int len) \
>> > +{ \
>> > +  __builtin_memset (buffer1, 'c', len); \
>> > +} \
>> > +__attribute__((noinline)) \
>> > +void memmove_test_ ## N (int len) \
>> > +{ \
>> > +  __builtin_memmove (buffer1, buffer2, len); \
>> > +} \
>> > + \
>> > +void test_stringops_ ## N(int len) \
>> > +{ \
>> > +  memcpy_test_## N (len); \
>> > +  mempcpy_test_ ## N (len); \
>> > +  memset_test_ ## N (len); \
>> > +  memmove_test_ ## N (len); \
>> > +} \
>> > + \
>> > +void test_stringops_with_values_ ## N (int common, int not_common) \
>> > +{ \
>> > +  int i; \
>> > +  for (i = 0; i < 1000; i++) \
>> > +    { \
>> > +      if (i > 990) \
>> > +       test_stringops_ ## N (not_common); \
>> > +      else \
>> > +       test_stringops_ ## N (common); \
>> > +    } \
>> > +}
>> > +
>> > +DEFINE_TEST(0);
>> > +DEFINE_TEST(1);
>> > +DEFINE_TEST(2);
>> > +
>> > +int main() {
>> > +  buffer1 = __builtin_malloc (1000);
>> > +  buffer2 = __builtin_malloc (1000);
>> > +
>> > +  test_stringops_with_values_0 (8, 55);
>> > +  test_stringops_with_values_1 (55, 55);
>> > +  test_stringops_with_values_2 (257, 55);
>> > +
>> > +  return 0;
>> > +}
>> > +
>> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: 
>> > single value 8 stringop for BUILT_IN_MEMCPY" "profile" } } */
>> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: 
>> > single value 55 stringop for BUILT_IN_MEMCPY" "profile" } } */
>> > +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation done: 
>> > single value 32 stringop for BUILT_IN_MEMCPY" 0 "profile" } } */
>> > +
>> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: 
>> > single value 8 stringop for BUILT_IN_MEMCPY" "profile" } } */
>> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: 
>> > single value 55 stringop for BUILT_IN_MEMCPY" "profile" } } */
>> > +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation done: 
>> > single value 32 stringop for BUILT_IN_MEMPCPY" 0 "profile" } } */
>> > +
>> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: 
>> > single value 8 stringop for BUILT_IN_MEMSET" "profile" } } */
>> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: 
>> > single value 55 stringop for BUILT_IN_MEMSET" "profile" } } */
>> > +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation done: 
>> > single value 32 stringop for BUILT_IN_MEMSET" 0 "profile" } } */
>> > +
>> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: 
>> > single value 8 stringop for BUILT_IN_MEMMOVE" "profile" } } */
>> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: 
>> > single value 55 stringop for BUILT_IN_MEMMOVE" "profile" } } */
>> > +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation done: 
>> > single value 32 stringop for BUILT_IN_MEMMOVE" 0 "profile" } } */
>> > diff --git a/gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c 
>> > b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-2.c
>> > similarity index 97%
>> > rename from gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c
>> > rename to gcc/testsuite/gcc.dg/tree-prof/val-prof-7-2.c
>> > index 5ddb1a88c29..1084a852fc3 100644
>> > --- a/gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c
>> > +++ b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-2.c
>> > @@ -16,7 +16,7 @@ void memcpy_test_ ## N (int len) \
>> >  __attribute__((noinline)) \
>> >  void mempcpy_test_ ## N (int len) \
>> >  { \
>> > -  __builtin_mempcpy (buffer1, buffer2, len); \
>> > +  void * volatile res = __builtin_mempcpy (buffer1, buffer2, len); \
>> >  } \
>> >   \
>> >  __attribute__((noinline)) \
>> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-3.c 
>> > b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-3.c
>> > index 716be5b8094..0872cc52a7f 100644
>> > --- a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-3.c
>> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-3.c
>> > @@ -38,6 +38,5 @@ AR (memmove, int,   2, INT_MAX, 1);
>> >  AR (mempcpy, short, 2, SHRT_MAX, 1);
>> >  AR (mempcpy, int,   2, INT_MAX, 1);
>> >
>> > -/* { dg-final { scan-tree-dump-times "builtin_memcpy" 8 "optimized" } }
>> > -   { dg-final { scan-tree-dump-times "builtin_memmove" 2 "optimized" } }
>> > -   { dg-final { scan-tree-dump-times "builtin_mempcpy" 2 "optimized" } }  
>> > */
>> > +/* { dg-final { scan-tree-dump-times "builtin_memcpy" 10 "optimized" } }
>> > +   { dg-final { scan-tree-dump-times "builtin_memmove" 2 "optimized" } }  
>> > */
>> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-4.c 
>> > b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-4.c
>> > new file mode 100644
>> > index 00000000000..0f0c577c64a
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-4.c
>> > @@ -0,0 +1,45 @@
>> > +/* PR 81908 - FAIL: gfortran.dg/alloc_comp_auto_array_2.f90 -O3 -g -m32
>> > +   Test to verify that calls to memcpy et al. where the size is in a range
>> > +   with more than one valid value are not eliminated (this test 
>> > complements
>> > +   builtins-folding-gimple-2.c).
>> > +   { dg-do compile }
>> > +   { dg-options "-O2 -Wall -fdump-tree-optimized" } */
>> > +
>> > +#define SHRT_MAX   __SHRT_MAX__
>> > +#define SHRT_MIN   (-SHRT_MAX - 1)
>> > +#define INT_MAX    __INT_MAX__
>> > +#define INT_MIN    (-INT_MAX - 1)
>> > +
>> > +#define UNIQUE_FUNCNAME(func, line) test_ ## func ## _ ## line
>> > +#define FUNCNAME(func, line)        UNIQUE_FUNCNAME (func, line)
>> > +
>> > +void *sink;
>> > +
>> > +#define AR(func, type, min, max, val)                                  \
>> > +  void __attribute__ ((noclone, noinline))                             \
>> > +  FUNCNAME (func, __LINE__) (char *d, const char *s, type n)           \
>> > +  {                                                                    \
>> > +    if ((type)min <= n && n <= (type)max)                              \
>> > +      n = val;                                                         \
>> > +    sink = __builtin_ ## func (d, s, n);                                  
>> >      \
>> > +  } typedef void DummyType
>> > +
>> > +AR (memcpy, short, SHRT_MIN, 0, 1);
>> > +AR (memcpy, short, SHRT_MIN, 1, 2);
>> > +AR (memcpy, short, 2, SHRT_MAX, 1);
>> > +
>> > +AR (memcpy, int, INT_MIN, 0, 1);
>> > +AR (memcpy, int, INT_MIN, 1, 2);
>> > +AR (memcpy, int, INT_MIN, 2, 3);
>> > +AR (memcpy, int, 2, INT_MAX, 1);
>> > +AR (memcpy, int, 2, INT_MAX, 1);
>> > +
>> > +AR (memmove, short, 2, SHRT_MAX, 1);
>> > +AR (memmove, int,   2, INT_MAX, 1);
>> > +
>> > +AR (mempcpy, short, 2, SHRT_MAX, 1);
>> > +AR (mempcpy, int,   2, INT_MAX, 1);
>> > +
>> > +/* { dg-final { scan-tree-dump-times "builtin_memcpy" 8 "optimized" } }
>> > +   { dg-final { scan-tree-dump-times "builtin_memmove" 2 "optimized" } }
>> > +   { dg-final { scan-tree-dump-times "builtin_mempcpy" 2 "optimized" } }  
>> > */
>> > --
>> > 2.30.2
>> >
>
>
> Thanks for the review, I'll change to builtin_decl_explicit and send a V2 
> when Stage 1 opens.
>
>
> Netanel

Reply via email to