On Thu, Feb 26, 2026 at 5:09 AM Netanel Komm <[email protected]> wrote: > > > > On Thu, 26 Feb 2026 at 9:18 Richard Biener <[email protected]> wrote: >> >> On Wed, Feb 25, 2026 at 11:03 PM Netanel Komm <[email protected]> wrote: >> > >> > This patch allows the GIMPLE folder to transform __builtin_mempcpy into >> > __builtin_memcpy >> > in cases where the return value is ignored. This is beneficial because >> > most targets have >> > an efficient implementation for memcpy. >> > >> > Existing tests that relied on the unfolded mempcpy have been duplicated - >> > one version now >> > takes the folded mempcpy into account, and the other intentionally >> > prevents the folding >> > from happening. >> > >> > Bootstrapped and regression tested on x86_64-linux-gnu. >> >> LGTM, but this has to wait for stage1. One nit below
I have one more nit below too. >> >> > PR tree-optimization/93556 >> > >> > gcc/ChangeLog: >> > >> > * gimple-fold.cc (gimple_fold_builtin_mempcpy): New function. >> > (gimple_fold_builtin): Handle BUILT_IN_MEMPCPY. >> > >> > gcc/testsuite/ChangeLog: >> > >> > * gcc.dg/pr79223.c: Rename to gcc.dg/pr79223-1.c and update scans. >> > * gcc.dg/tree-prof/val-prof-7.c: Rename to >> > gcc.dg/tree-prof/val-prof-7-1.c and update scans. >> > * gcc.dg/tree-ssa/builtins-folding-gimple-3.c: Update scans. >> > * gcc.dg/builtin-mempcpy-1.c: New test. >> > * gcc.dg/builtin-mempcpy-2.c: New test. >> > * gcc.dg/pr79223-2.c: New test. >> > * gcc.dg/tree-prof/val-prof-7-2.c: New test. >> > * gcc.dg/tree-ssa/builtins-folding-gimple-4.c: New test. >> > >> > Signed-off-by: Netanel Komm <[email protected]> >> > --- >> > gcc/gimple-fold.cc | 24 +++++- >> > gcc/testsuite/gcc.dg/builtin-mempcpy-1.c | 9 ++ >> > gcc/testsuite/gcc.dg/builtin-mempcpy-2.c | 39 +++++++++ >> > .../gcc.dg/{pr79223.c => pr79223-1.c} | 2 +- >> > gcc/testsuite/gcc.dg/pr79223-2.c | 38 +++++++++ >> > gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c | 82 +++++++++++++++++++ >> > .../{val-prof-7.c => val-prof-7-2.c} | 2 +- >> > .../tree-ssa/builtins-folding-gimple-3.c | 5 +- >> > .../tree-ssa/builtins-folding-gimple-4.c | 45 ++++++++++ >> > 9 files changed, 240 insertions(+), 6 deletions(-) >> > create mode 100644 gcc/testsuite/gcc.dg/builtin-mempcpy-1.c >> > create mode 100644 gcc/testsuite/gcc.dg/builtin-mempcpy-2.c >> > rename gcc/testsuite/gcc.dg/{pr79223.c => pr79223-1.c} (86%) >> > create mode 100644 gcc/testsuite/gcc.dg/pr79223-2.c >> > create mode 100644 gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c >> > rename gcc/testsuite/gcc.dg/tree-prof/{val-prof-7.c => val-prof-7-2.c} >> > (97%) >> > create mode 100644 >> > gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-4.c >> > >> > diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc >> > index bc8540a8c5c..f870a7feb51 100644 >> > --- a/gcc/gimple-fold.cc >> > +++ b/gcc/gimple-fold.cc >> > @@ -3339,6 +3339,24 @@ gimple_fold_builtin_stpcpy (gimple_stmt_iterator >> > *gsi) >> > return true; >> > } >> > >> > +static bool >> > +gimple_fold_builtin_mempcpy (gimple_stmt_iterator *gsi) All functions should have a comment in front of them saying what it does and what it returns when. So this could be something like: /* Simplify mempcpy call stmt at GSI returning true if simplified. Currently only handling mempcy->memcpy when the return value is ignored. */ Thanks, Andrew Pinski >> > +{ >> > + gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi)); >> > + >> > + if (gimple_call_lhs (stmt) == NULL_TREE) >> > + { >> > + tree fn = builtin_decl_implicit (BUILT_IN_MEMCPY); >> >> I think you can use builtin_decl_explicit as we require memcpy >> to be available. >> >> > + if (!fn) >> > + return false; >> > + gimple_call_set_fndecl (stmt, fn); >> > + fold_stmt (gsi); >> > + return true; >> > + } >> > + >> > + return false; >> > +} >> > + >> > /* Fold a call EXP to {,v}snprintf having NARGS passed as ARGS. Return >> > NULL_TREE if a normal call should be emitted rather than expanding >> > the function inline. FCODE is either BUILT_IN_SNPRINTF_CHK or >> > @@ -5387,8 +5405,12 @@ gimple_fold_builtin (gimple_stmt_iterator *gsi) >> > return gimple_fold_builtin_memset (gsi, >> > gimple_call_arg (stmt, 1), >> > gimple_call_arg (stmt, 2)); >> > - case BUILT_IN_MEMCPY: >> > case BUILT_IN_MEMPCPY: >> > + if (gimple_fold_builtin_memory_op (gsi, gimple_call_arg (stmt, 0), >> > + gimple_call_arg (stmt, 1), >> > fcode)) >> > + return true; >> > + return gimple_fold_builtin_mempcpy (gsi); >> > + case BUILT_IN_MEMCPY: >> > case BUILT_IN_MEMMOVE: >> > return gimple_fold_builtin_memory_op (gsi, gimple_call_arg (stmt, >> > 0), >> > gimple_call_arg (stmt, 1), >> > fcode); >> > diff --git a/gcc/testsuite/gcc.dg/builtin-mempcpy-1.c >> > b/gcc/testsuite/gcc.dg/builtin-mempcpy-1.c >> > new file mode 100644 >> > index 00000000000..19d9a224657 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.dg/builtin-mempcpy-1.c >> > @@ -0,0 +1,9 @@ >> > +/* { dg-do compile } */ >> > +/* { dg-options "-fdump-tree-lower" } */ >> > + >> > +/* Basic MRE from bug report */ >> > +void test_bare (void *d, const void *s, __SIZE_TYPE__ n) { >> > + __builtin_mempcpy (d, s, n); >> > +} >> > + >> > +/* { dg-final { scan-tree-dump "__builtin_memcpy" "lower" } } */ >> > diff --git a/gcc/testsuite/gcc.dg/builtin-mempcpy-2.c >> > b/gcc/testsuite/gcc.dg/builtin-mempcpy-2.c >> > new file mode 100644 >> > index 00000000000..65f80f97f4f >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.dg/builtin-mempcpy-2.c >> > @@ -0,0 +1,39 @@ >> > +/* { dg-do compile } */ >> > +/* { dg-options "-O1 -fdump-tree-optimized" } */ >> > + >> > +/* Indirectly unused result */ >> > +void test_unused_indirect (void *d, const void *s, __SIZE_TYPE__ n) { >> > + void *a = __builtin_mempcpy (d, s, n); >> > + void *b = a; >> > +} >> > + >> > +/* Simple used result (in statement) */ >> > +void *test_used_simple (void *d, const void *s, __SIZE_TYPE__ n) { >> > + return __builtin_mempcpy (d, s, n); >> > +} >> > + >> > +/* More complicated used result (in expression) */ >> > +__SIZE_TYPE__ test_used_in_expr (char *d, const char *s, __SIZE_TYPE__ n) >> > { >> > + return (char *)__builtin_mempcpy (d, s, n) - d; >> > +} >> > + >> > +/* Unused in all paths */ >> > +void *test_unused_indirect2 (void *d, const void *s, __SIZE_TYPE__ n) { >> > + void *a = __builtin_mempcpy (d, s, n); >> > + if (n > 20) { >> > + return (void *)20; >> > + } >> > + return (void *)7; >> > +} >> > + >> > +/* Used in at least one path */ >> > +void *test_maybe_used (void *d, const void *s, __SIZE_TYPE__ n) { >> > + void *a = __builtin_mempcpy (d, s, n); >> > + if (n > 20) { >> > + return a; >> > + } >> > + return (void *)0; >> > +} >> > + >> > +/* { dg-final { scan-tree-dump-times "__builtin_memcpy" 2 "optimized" } } >> > */ >> > +/* { dg-final { scan-tree-dump-times "__builtin_mempcpy" 3 "optimized" } >> > } */ >> > diff --git a/gcc/testsuite/gcc.dg/pr79223.c >> > b/gcc/testsuite/gcc.dg/pr79223-1.c >> > similarity index 86% >> > rename from gcc/testsuite/gcc.dg/pr79223.c >> > rename to gcc/testsuite/gcc.dg/pr79223-1.c >> > index ef0dd1b7bc5..099d18333e8 100644 >> > --- a/gcc/testsuite/gcc.dg/pr79223.c >> > +++ b/gcc/testsuite/gcc.dg/pr79223-1.c >> > @@ -28,7 +28,7 @@ void test_memcpy (void) >> > >> > void test_mempcpy (void) >> > { >> > - mempcpy (d, s, range ()); /* { dg-warning ".mempcpy. writing 4 or >> > more bytes into a region of size 3 overflows the destination" } */ >> > + mempcpy (d, s, range ()); /* { dg-warning ".memcpy. writing 4 or more >> > bytes into a region of size 3 overflows the destination" } */ >> > } >> > >> > void test_memmove (void) >> > diff --git a/gcc/testsuite/gcc.dg/pr79223-2.c >> > b/gcc/testsuite/gcc.dg/pr79223-2.c >> > new file mode 100644 >> > index 00000000000..6e78b326671 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.dg/pr79223-2.c >> > @@ -0,0 +1,38 @@ >> > +/* PR middle-end/79223 - missing -Wstringop-overflow on a memmove overflow >> > + { dg-do compile } >> > + { dg-additional-options "-O2 -Wall -Wno-array-bounds -std=gnu99" } */ >> > + >> > +typedef __SIZE_TYPE__ size_t; >> > + >> > +extern void* memcpy (void*, const void*, size_t); >> > +extern void* mempcpy (void*, const void*, size_t); >> > +extern void* memmove (void*, const void*, size_t); >> > + >> > +char d[3]; >> > +char s[4]; >> > +void *sink; >> > + >> > +size_t range (void) >> > +{ >> > + extern size_t size (); >> > + size_t n = size (); >> > + if (n <= sizeof d) >> > + return sizeof d + 1; >> > + >> > + return n; >> > +} >> > + >> > +void test_memcpy (void) >> > +{ >> > + memcpy (d, s, range ()); /* { dg-warning ".memcpy. writing 4 or more >> > bytes into a region of size 3 overflows the destination" } */ >> > +} >> > + >> > +void test_mempcpy (void) >> > +{ >> > + sink = mempcpy (d, s, range ()); /* { dg-warning ".mempcpy. writing 4 >> > or more bytes into a region of size 3 overflows the destination" } */ >> > +} >> > + >> > +void test_memmove (void) >> > +{ >> > + memmove (d + 1, d, range ()); /* { dg-warning ".memmove. writing 4 or >> > more bytes into a region of size 2 overflows the destination" } */ >> > +} >> > diff --git a/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c >> > b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c >> > new file mode 100644 >> > index 00000000000..321eefa8207 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-1.c >> > @@ -0,0 +1,82 @@ >> > +/* { dg-options "-O2 -fdump-ipa-profile-optimized -mtune=core2" } */ >> > +/* { dg-skip-if "" { ! { i?86-*-* x86_64-*-* } } } */ >> > + >> > +char *buffer1; >> > +char *buffer2; >> > + >> > +/* Bzero is not tested because it gets transformed into memset. */ >> > + >> > +#define DEFINE_TEST(N) \ >> > +__attribute__((noinline)) \ >> > +void memcpy_test_ ## N (int len) \ >> > +{ \ >> > + __builtin_memcpy (buffer1, buffer2, len); \ >> > +} \ >> > + \ >> > +__attribute__((noinline)) \ >> > +void mempcpy_test_ ## N (int len) \ >> > +{ \ >> > + __builtin_mempcpy (buffer1, buffer2, len); \ >> > +} \ >> > + \ >> > +__attribute__((noinline)) \ >> > +void memset_test_ ## N (int len) \ >> > +{ \ >> > + __builtin_memset (buffer1, 'c', len); \ >> > +} \ >> > +__attribute__((noinline)) \ >> > +void memmove_test_ ## N (int len) \ >> > +{ \ >> > + __builtin_memmove (buffer1, buffer2, len); \ >> > +} \ >> > + \ >> > +void test_stringops_ ## N(int len) \ >> > +{ \ >> > + memcpy_test_## N (len); \ >> > + mempcpy_test_ ## N (len); \ >> > + memset_test_ ## N (len); \ >> > + memmove_test_ ## N (len); \ >> > +} \ >> > + \ >> > +void test_stringops_with_values_ ## N (int common, int not_common) \ >> > +{ \ >> > + int i; \ >> > + for (i = 0; i < 1000; i++) \ >> > + { \ >> > + if (i > 990) \ >> > + test_stringops_ ## N (not_common); \ >> > + else \ >> > + test_stringops_ ## N (common); \ >> > + } \ >> > +} >> > + >> > +DEFINE_TEST(0); >> > +DEFINE_TEST(1); >> > +DEFINE_TEST(2); >> > + >> > +int main() { >> > + buffer1 = __builtin_malloc (1000); >> > + buffer2 = __builtin_malloc (1000); >> > + >> > + test_stringops_with_values_0 (8, 55); >> > + test_stringops_with_values_1 (55, 55); >> > + test_stringops_with_values_2 (257, 55); >> > + >> > + return 0; >> > +} >> > + >> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: >> > single value 8 stringop for BUILT_IN_MEMCPY" "profile" } } */ >> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: >> > single value 55 stringop for BUILT_IN_MEMCPY" "profile" } } */ >> > +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation done: >> > single value 32 stringop for BUILT_IN_MEMCPY" 0 "profile" } } */ >> > + >> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: >> > single value 8 stringop for BUILT_IN_MEMCPY" "profile" } } */ >> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: >> > single value 55 stringop for BUILT_IN_MEMCPY" "profile" } } */ >> > +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation done: >> > single value 32 stringop for BUILT_IN_MEMPCPY" 0 "profile" } } */ >> > + >> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: >> > single value 8 stringop for BUILT_IN_MEMSET" "profile" } } */ >> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: >> > single value 55 stringop for BUILT_IN_MEMSET" "profile" } } */ >> > +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation done: >> > single value 32 stringop for BUILT_IN_MEMSET" 0 "profile" } } */ >> > + >> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: >> > single value 8 stringop for BUILT_IN_MEMMOVE" "profile" } } */ >> > +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: >> > single value 55 stringop for BUILT_IN_MEMMOVE" "profile" } } */ >> > +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation done: >> > single value 32 stringop for BUILT_IN_MEMMOVE" 0 "profile" } } */ >> > diff --git a/gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c >> > b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-2.c >> > similarity index 97% >> > rename from gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c >> > rename to gcc/testsuite/gcc.dg/tree-prof/val-prof-7-2.c >> > index 5ddb1a88c29..1084a852fc3 100644 >> > --- a/gcc/testsuite/gcc.dg/tree-prof/val-prof-7.c >> > +++ b/gcc/testsuite/gcc.dg/tree-prof/val-prof-7-2.c >> > @@ -16,7 +16,7 @@ void memcpy_test_ ## N (int len) \ >> > __attribute__((noinline)) \ >> > void mempcpy_test_ ## N (int len) \ >> > { \ >> > - __builtin_mempcpy (buffer1, buffer2, len); \ >> > + void * volatile res = __builtin_mempcpy (buffer1, buffer2, len); \ >> > } \ >> > \ >> > __attribute__((noinline)) \ >> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-3.c >> > b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-3.c >> > index 716be5b8094..0872cc52a7f 100644 >> > --- a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-3.c >> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-3.c >> > @@ -38,6 +38,5 @@ AR (memmove, int, 2, INT_MAX, 1); >> > AR (mempcpy, short, 2, SHRT_MAX, 1); >> > AR (mempcpy, int, 2, INT_MAX, 1); >> > >> > -/* { dg-final { scan-tree-dump-times "builtin_memcpy" 8 "optimized" } } >> > - { dg-final { scan-tree-dump-times "builtin_memmove" 2 "optimized" } } >> > - { dg-final { scan-tree-dump-times "builtin_mempcpy" 2 "optimized" } } >> > */ >> > +/* { dg-final { scan-tree-dump-times "builtin_memcpy" 10 "optimized" } } >> > + { dg-final { scan-tree-dump-times "builtin_memmove" 2 "optimized" } } >> > */ >> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-4.c >> > b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-4.c >> > new file mode 100644 >> > index 00000000000..0f0c577c64a >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtins-folding-gimple-4.c >> > @@ -0,0 +1,45 @@ >> > +/* PR 81908 - FAIL: gfortran.dg/alloc_comp_auto_array_2.f90 -O3 -g -m32 >> > + Test to verify that calls to memcpy et al. where the size is in a range >> > + with more than one valid value are not eliminated (this test >> > complements >> > + builtins-folding-gimple-2.c). >> > + { dg-do compile } >> > + { dg-options "-O2 -Wall -fdump-tree-optimized" } */ >> > + >> > +#define SHRT_MAX __SHRT_MAX__ >> > +#define SHRT_MIN (-SHRT_MAX - 1) >> > +#define INT_MAX __INT_MAX__ >> > +#define INT_MIN (-INT_MAX - 1) >> > + >> > +#define UNIQUE_FUNCNAME(func, line) test_ ## func ## _ ## line >> > +#define FUNCNAME(func, line) UNIQUE_FUNCNAME (func, line) >> > + >> > +void *sink; >> > + >> > +#define AR(func, type, min, max, val) \ >> > + void __attribute__ ((noclone, noinline)) \ >> > + FUNCNAME (func, __LINE__) (char *d, const char *s, type n) \ >> > + { \ >> > + if ((type)min <= n && n <= (type)max) \ >> > + n = val; \ >> > + sink = __builtin_ ## func (d, s, n); >> > \ >> > + } typedef void DummyType >> > + >> > +AR (memcpy, short, SHRT_MIN, 0, 1); >> > +AR (memcpy, short, SHRT_MIN, 1, 2); >> > +AR (memcpy, short, 2, SHRT_MAX, 1); >> > + >> > +AR (memcpy, int, INT_MIN, 0, 1); >> > +AR (memcpy, int, INT_MIN, 1, 2); >> > +AR (memcpy, int, INT_MIN, 2, 3); >> > +AR (memcpy, int, 2, INT_MAX, 1); >> > +AR (memcpy, int, 2, INT_MAX, 1); >> > + >> > +AR (memmove, short, 2, SHRT_MAX, 1); >> > +AR (memmove, int, 2, INT_MAX, 1); >> > + >> > +AR (mempcpy, short, 2, SHRT_MAX, 1); >> > +AR (mempcpy, int, 2, INT_MAX, 1); >> > + >> > +/* { dg-final { scan-tree-dump-times "builtin_memcpy" 8 "optimized" } } >> > + { dg-final { scan-tree-dump-times "builtin_memmove" 2 "optimized" } } >> > + { dg-final { scan-tree-dump-times "builtin_mempcpy" 2 "optimized" } } >> > */ >> > -- >> > 2.30.2 >> > > > > Thanks for the review, I'll change to builtin_decl_explicit and send a V2 > when Stage 1 opens. > > > Netanel
