On Thu, 26 Feb 2026 at 9:18 Richard Biener <[email protected]> wrote:
> On Wed, Feb 25, 2026 at 11:03 PM Netanel Komm <[email protected]> > wrote: > > > > This patch allows the GIMPLE folder to transform __builtin_mempcpy into > __builtin_memcpy > > in cases where the return value is ignored. This is beneficial because > most targets have > > an efficient implementation for memcpy. > > > > Existing tests that relied on the unfolded mempcpy have been duplicated > - one version now > > takes the folded mempcpy into account, and the other intentionally > prevents the folding > > from happening. > > > > Bootstrapped and regression tested on x86_64-linux-gnu. > > LGTM, but this has to wait for stage1. One nit below > > > PR tree-optimization/93556 > > > > gcc/ChangeLog: > > > > * gimple-fold.cc (gimple_fold_builtin_mempcpy): New function. > > (gimple_fold_builtin): Handle BUILT_IN_MEMPCPY. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.dg/pr79223.c: Rename to gcc.dg/pr79223-1.c and update > scans. > > * gcc.dg/tree-prof/val-prof-7.c: Rename to > gcc.dg/tree-prof/val-prof-7-1.c and update scans. > > * gcc.dg/tree-ssa/builtins-folding-gimple-3.c: Update scans. > > * gcc.dg/builtin-mempcpy-1.c: New test. > > * gcc.dg/builtin-mempcpy-2.c: New test. > > * gcc.dg/pr79223-2.c: New test. > > * gcc.dg/tree-prof/val-prof-7-2.c: New test. > > * gcc.dg/tree-ssa/builtins-folding-gimple-4.c: New test. > > > > Signed-off-by: Netanel Komm <[email protected]> > > --- > > gcc/gimple-fold.cc | 24 +++++- > > gcc/testsuite/gcc.dg/builtin-mempcpy-1.c | 9 ++ > > gcc/testsuite/gcc.dg/builtin-mempcpy-2.c | 39 +++++++++ > > .../gcc.dg/{pr79223.c => pr79223-1.c} | 2 +- > > gcc/testsuite/gcc.dg/pr79223-2.c | 38 +++++++++ > > gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c | 82 +++++++++++++++++++ > > .../{val-prof-7.c => val-prof-7-2.c} | 2 +- > > .../tree-ssa/builtins-folding-gimple-3.c | 5 +- > > .../tree-ssa/builtins-folding-gimple-4.c | 45 ++++++++++ > > 9 files changed, 240 insertions(+), 6 deletions(-) > > create mode 100644 gcc/testsuite/gcc.dg/builtin-mempcpy-1.c > > create mode 100644 gcc/testsuite/gcc.dg/builtin-mempcpy-2.c > > rename gcc/testsuite/gcc.dg/{pr79223.c => pr79223-1.c} (86%) > > create mode 100644 gcc/testsuite/gcc.dg/pr79223-2.c > > create mode 100644 gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c > > rename gcc/testsuite/gcc.dg/tree-prof/{val-prof-7.c => val-prof-7-2.c} > (97%) > > create mode 100644 > gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-4.c > > > > diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc > > index bc8540a8c5c..f870a7feb51 100644 > > --- a/gcc/gimple-fold.cc > > +++ b/gcc/gimple-fold.cc > > @@ -3339,6 +3339,24 @@ gimple_fold_builtin_stpcpy (gimple_stmt_iterator > *gsi) > > return true; > > } > > > > +static bool > > +gimple_fold_builtin_mempcpy (gimple_stmt_iterator *gsi) > > +{ > > + gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi)); > > + > > + if (gimple_call_lhs (stmt) == NULL_TREE) > > + { > > + tree fn = builtin_decl_implicit (BUILT_IN_MEMCPY); > > I think you can use builtin_decl_explicit as we require memcpy > to be available. > > > + if (!fn) > > + return false; > > + gimple_call_set_fndecl (stmt, fn); > > + fold_stmt (gsi); > > + return true; > > + } > > + > > + return false; > > +} > > + > > /* Fold a call EXP to {,v}snprintf having NARGS passed as ARGS. Return > > NULL_TREE if a normal call should be emitted rather than expanding > > the function inline. FCODE is either BUILT_IN_SNPRINTF_CHK or > > @@ -5387,8 +5405,12 @@ gimple_fold_builtin (gimple_stmt_iterator *gsi) > > return gimple_fold_builtin_memset (gsi, > > gimple_call_arg (stmt, 1), > > gimple_call_arg (stmt, 2)); > > - case BUILT_IN_MEMCPY: > > case BUILT_IN_MEMPCPY: > > + if (gimple_fold_builtin_memory_op (gsi, gimple_call_arg (stmt, 0), > > + gimple_call_arg (stmt, 1), > fcode)) > > + return true; > > + return gimple_fold_builtin_mempcpy (gsi); > > + case BUILT_IN_MEMCPY: > > case BUILT_IN_MEMMOVE: > > return gimple_fold_builtin_memory_op (gsi, gimple_call_arg (stmt, > 0), > > gimple_call_arg (stmt, 1), > fcode); > > diff --git a/gcc/testsuite/gcc.dg/builtin-mempcpy-1.c > b/gcc/testsuite/gcc.dg/builtin-mempcpy-1.c > > new file mode 100644 > > index 00000000000..19d9a224657 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/builtin-mempcpy-1.c > > @@ -0,0 +1,9 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-fdump-tree-lower" } */ > > + > > +/* Basic MRE from bug report */ > > +void test_bare (void *d, const void *s, __SIZE_TYPE__ n) { > > + __builtin_mempcpy (d, s, n); > > +} > > + > > +/* { dg-final { scan-tree-dump "__builtin_memcpy" "lower" } } */ > > diff --git a/gcc/testsuite/gcc.dg/builtin-mempcpy-2.c > b/gcc/testsuite/gcc.dg/builtin-mempcpy-2.c > > new file mode 100644 > > index 00000000000..65f80f97f4f > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/builtin-mempcpy-2.c > > @@ -0,0 +1,39 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O1 -fdump-tree-optimized" } */ > > + > > +/* Indirectly unused result */ > > +void test_unused_indirect (void *d, const void *s, __SIZE_TYPE__ n) { > > + void *a = __builtin_mempcpy (d, s, n); > > + void *b = a; > > +} > > + > > +/* Simple used result (in statement) */ > > +void *test_used_simple (void *d, const void *s, __SIZE_TYPE__ n) { > > + return __builtin_mempcpy (d, s, n); > > +} > > + > > +/* More complicated used result (in expression) */ > > +__SIZE_TYPE__ test_used_in_expr (char *d, const char *s, __SIZE_TYPE__ > n) { > > + return (char *)__builtin_mempcpy (d, s, n) - d; > > +} > > + > > +/* Unused in all paths */ > > +void *test_unused_indirect2 (void *d, const void *s, __SIZE_TYPE__ n) { > > + void *a = __builtin_mempcpy (d, s, n); > > + if (n > 20) { > > + return (void *)20; > > + } > > + return (void *)7; > > +} > > + > > +/* Used in at least one path */ > > +void *test_maybe_used (void *d, const void *s, __SIZE_TYPE__ n) { > > + void *a = __builtin_mempcpy (d, s, n); > > + if (n > 20) { > > + return a; > > + } > > + return (void *)0; > > +} > > + > > +/* { dg-final { scan-tree-dump-times "__builtin_memcpy" 2 "optimized" } > } */ > > +/* { dg-final { scan-tree-dump-times "__builtin_mempcpy" 3 "optimized" > } } */ > > diff --git a/gcc/testsuite/gcc.dg/pr79223.c > b/gcc/testsuite/gcc.dg/pr79223-1.c > > similarity index 86% > > rename from gcc/testsuite/gcc.dg/pr79223.c > > rename to gcc/testsuite/gcc.dg/pr79223-1.c > > index ef0dd1b7bc5..099d18333e8 100644 > > --- a/gcc/testsuite/gcc.dg/pr79223.c > > +++ b/gcc/testsuite/gcc.dg/pr79223-1.c > > @@ -28,7 +28,7 @@ void test_memcpy (void) > > > > void test_mempcpy (void) > > { > > - mempcpy (d, s, range ()); /* { dg-warning ".mempcpy. writing 4 or > more bytes into a region of size 3 overflows the destination" } */ > > + mempcpy (d, s, range ()); /* { dg-warning ".memcpy. writing 4 or > more bytes into a region of size 3 overflows the destination" } */ > > } > > > > void test_memmove (void) > > diff --git a/gcc/testsuite/gcc.dg/pr79223-2.c > b/gcc/testsuite/gcc.dg/pr79223-2.c > > new file mode 100644 > > index 00000000000..6e78b326671 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/pr79223-2.c > > @@ -0,0 +1,38 @@ > > +/* PR middle-end/79223 - missing -Wstringop-overflow on a memmove > overflow > > + { dg-do compile } > > + { dg-additional-options "-O2 -Wall -Wno-array-bounds -std=gnu99" } > */ > > + > > +typedef __SIZE_TYPE__ size_t; > > + > > +extern void* memcpy (void*, const void*, size_t); > > +extern void* mempcpy (void*, const void*, size_t); > > +extern void* memmove (void*, const void*, size_t); > > + > > +char d[3]; > > +char s[4]; > > +void *sink; > > + > > +size_t range (void) > > +{ > > + extern size_t size (); > > + size_t n = size (); > > + if (n <= sizeof d) > > + return sizeof d + 1; > > + > > + return n; > > +} > > + > > +void test_memcpy (void) > > +{ > > + memcpy (d, s, range ()); /* { dg-warning ".memcpy. writing 4 or > more bytes into a region of size 3 overflows the destination" } */ > > +} > > + > > +void test_mempcpy (void) > > +{ > > + sink = mempcpy (d, s, range ()); /* { dg-warning ".mempcpy. writing > 4 or more bytes into a region of size 3 overflows the destination" } */ > > +} > > + > > +void test_memmove (void) > > +{ > > + memmove (d + 1, d, range ()); /* { dg-warning ".memmove. writing 4 > or more bytes into a region of size 2 overflows the destination" } */ > > +} > > diff --git a/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c > b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c > > new file mode 100644 > > index 00000000000..321eefa8207 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c > > @@ -0,0 +1,82 @@ > > +/* { dg-options "-O2 -fdump-ipa-profile-optimized -mtune=core2" } */ > > +/* { dg-skip-if "" { ! { i?86-*-* x86_64-*-* } } } */ > > + > > +char *buffer1; > > +char *buffer2; > > + > > +/* Bzero is not tested because it gets transformed into memset. */ > > + > > +#define DEFINE_TEST(N) \ > > +__attribute__((noinline)) \ > > +void memcpy_test_ ## N (int len) \ > > +{ \ > > + __builtin_memcpy (buffer1, buffer2, len); \ > > +} \ > > + \ > > +__attribute__((noinline)) \ > > +void mempcpy_test_ ## N (int len) \ > > +{ \ > > + __builtin_mempcpy (buffer1, buffer2, len); \ > > +} \ > > + \ > > +__attribute__((noinline)) \ > > +void memset_test_ ## N (int len) \ > > +{ \ > > + __builtin_memset (buffer1, 'c', len); \ > > +} \ > > +__attribute__((noinline)) \ > > +void memmove_test_ ## N (int len) \ > > +{ \ > > + __builtin_memmove (buffer1, buffer2, len); \ > > +} \ > > + \ > > +void test_stringops_ ## N(int len) \ > > +{ \ > > + memcpy_test_## N (len); \ > > + mempcpy_test_ ## N (len); \ > > + memset_test_ ## N (len); \ > > + memmove_test_ ## N (len); \ > > +} \ > > + \ > > +void test_stringops_with_values_ ## N (int common, int not_common) \ > > +{ \ > > + int i; \ > > + for (i = 0; i < 1000; i++) \ > > + { \ > > + if (i > 990) \ > > + test_stringops_ ## N (not_common); \ > > + else \ > > + test_stringops_ ## N (common); \ > > + } \ > > +} > > + > > +DEFINE_TEST(0); > > +DEFINE_TEST(1); > > +DEFINE_TEST(2); > > + > > +int main() { > > + buffer1 = __builtin_malloc (1000); > > + buffer2 = __builtin_malloc (1000); > > + > > + test_stringops_with_values_0 (8, 55); > > + test_stringops_with_values_1 (55, 55); > > + test_stringops_with_values_2 (257, 55); > > + > > + return 0; > > +} > > + > > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: > single value 8 stringop for BUILT_IN_MEMCPY" "profile" } } */ > > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: > single value 55 stringop for BUILT_IN_MEMCPY" "profile" } } */ > > +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation > done: single value 32 stringop for BUILT_IN_MEMCPY" 0 "profile" } } */ > > + > > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: > single value 8 stringop for BUILT_IN_MEMCPY" "profile" } } */ > > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: > single value 55 stringop for BUILT_IN_MEMCPY" "profile" } } */ > > +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation > done: single value 32 stringop for BUILT_IN_MEMPCPY" 0 "profile" } } */ > > + > > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: > single value 8 stringop for BUILT_IN_MEMSET" "profile" } } */ > > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: > single value 55 stringop for BUILT_IN_MEMSET" "profile" } } */ > > +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation > done: single value 32 stringop for BUILT_IN_MEMSET" 0 "profile" } } */ > > + > > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: > single value 8 stringop for BUILT_IN_MEMMOVE" "profile" } } */ > > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: > single value 55 stringop for BUILT_IN_MEMMOVE" "profile" } } */ > > +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation > done: single value 32 stringop for BUILT_IN_MEMMOVE" 0 "profile" } } */ > > diff --git a/gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c > b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-2.c > > similarity index 97% > > rename from gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c > > rename to gcc/testsuite/gcc.dg/tree-prof/val-prof-7-2.c > > index 5ddb1a88c29..1084a852fc3 100644 > > --- a/gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c > > +++ b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-2.c > > @@ -16,7 +16,7 @@ void memcpy_test_ ## N (int len) \ > > __attribute__((noinline)) \ > > void mempcpy_test_ ## N (int len) \ > > { \ > > - __builtin_mempcpy (buffer1, buffer2, len); \ > > + void * volatile res = __builtin_mempcpy (buffer1, buffer2, len); \ > > } \ > > \ > > __attribute__((noinline)) \ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-3.c > b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-3.c > > index 716be5b8094..0872cc52a7f 100644 > > --- a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-3.c > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-3.c > > @@ -38,6 +38,5 @@ AR (memmove, int, 2, INT_MAX, 1); > > AR (mempcpy, short, 2, SHRT_MAX, 1); > > AR (mempcpy, int, 2, INT_MAX, 1); > > > > -/* { dg-final { scan-tree-dump-times "builtin_memcpy" 8 "optimized" } } > > - { dg-final { scan-tree-dump-times "builtin_memmove" 2 "optimized" } } > > - { dg-final { scan-tree-dump-times "builtin_mempcpy" 2 "optimized" } > } */ > > +/* { dg-final { scan-tree-dump-times "builtin_memcpy" 10 "optimized" } } > > + { dg-final { scan-tree-dump-times "builtin_memmove" 2 "optimized" } > } */ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-4.c > b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-4.c > > new file mode 100644 > > index 00000000000..0f0c577c64a > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-4.c > > @@ -0,0 +1,45 @@ > > +/* PR 81908 - FAIL: gfortran.dg/alloc_comp_auto_array_2.f90 -O3 -g -m32 > > + Test to verify that calls to memcpy et al. where the size is in a > range > > + with more than one valid value are not eliminated (this test > complements > > + builtins-folding-gimple-2.c). > > + { dg-do compile } > > + { dg-options "-O2 -Wall -fdump-tree-optimized" } */ > > + > > +#define SHRT_MAX __SHRT_MAX__ > > +#define SHRT_MIN (-SHRT_MAX - 1) > > +#define INT_MAX __INT_MAX__ > > +#define INT_MIN (-INT_MAX - 1) > > + > > +#define UNIQUE_FUNCNAME(func, line) test_ ## func ## _ ## line > > +#define FUNCNAME(func, line) UNIQUE_FUNCNAME (func, line) > > + > > +void *sink; > > + > > +#define AR(func, type, min, max, val) \ > > + void __attribute__ ((noclone, noinline)) \ > > + FUNCNAME (func, __LINE__) (char *d, const char *s, type n) \ > > + { \ > > + if ((type)min <= n && n <= (type)max) \ > > + n = val; \ > > + sink = __builtin_ ## func (d, s, n); > \ > > + } typedef void DummyType > > + > > +AR (memcpy, short, SHRT_MIN, 0, 1); > > +AR (memcpy, short, SHRT_MIN, 1, 2); > > +AR (memcpy, short, 2, SHRT_MAX, 1); > > + > > +AR (memcpy, int, INT_MIN, 0, 1); > > +AR (memcpy, int, INT_MIN, 1, 2); > > +AR (memcpy, int, INT_MIN, 2, 3); > > +AR (memcpy, int, 2, INT_MAX, 1); > > +AR (memcpy, int, 2, INT_MAX, 1); > > + > > +AR (memmove, short, 2, SHRT_MAX, 1); > > +AR (memmove, int, 2, INT_MAX, 1); > > + > > +AR (mempcpy, short, 2, SHRT_MAX, 1); > > +AR (mempcpy, int, 2, INT_MAX, 1); > > + > > +/* { dg-final { scan-tree-dump-times "builtin_memcpy" 8 "optimized" } } > > + { dg-final { scan-tree-dump-times "builtin_memmove" 2 "optimized" } } > > + { dg-final { scan-tree-dump-times "builtin_mempcpy" 2 "optimized" } > } */ > > -- > > 2.30.2 > > Thanks for the review, I'll change to builtin_decl_explicit and send a V2 when Stage 1 opens. Netanel
