On Tue, Jul 22, 2025 at 7:16 PM Andrew Pinski <pins...@gmail.com> wrote:
>
> On Fri, Apr 18, 2025 at 5:08 PM Andrew Pinski <quic_apin...@quicinc.com> 
> wrote:
> >
> > This fixes a long standing (since GCC 5) issue where the 
> > malloc+memset->calloc
> > optimization would happen even if the memset was not always executed.
> > This is a varient of Nathan's patch: 
> > https://inbox.sourceware.org/gcc-patches/f4b5d106-8176-b7bd-709b-d43518878...@acm.org/
> > Jeff Law had suggested to look at probabilities of the basic blocks to see
> > if it is profitable or not; I am not totally convinced that is a good idea.
> > Though this is an extended version of Nathan's patch as it uses post 
> > domination to see
> > if the memset is always called after the condition of null-ness.
>
> Ping?
> https://gcc.gnu.org/pipermail/gcc-patches/2025-April/681439.html

Ping?

>
> Thanks,
> Andrew Pinski
>
> >
> >         PR tree-optimization/83022
> >
> > gcc/ChangeLog:
> >
> >         * tree-ssa-strlen.cc (last_stmt_ptr_check): New function.
> >         (allow_memset_malloc_to_calloc): New function.
> >         (strlen_pass::handle_builtin_memset): Check to see if it is a good
> >         idea to do the malloc+memset->calloc optimization.
> >         (printf_strlen_execute): Free post dom info.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.dg/tree-ssa/calloc-6.c: New test.
> >         * gcc.dg/tree-ssa/calloc-7.c: New test.
> >         * gcc.dg/tree-ssa/calloc-8.c: New test.
> >         * gcc.dg/tree-ssa/calloc-9.c: New test.
> >
> > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
> > ---
> >  gcc/testsuite/gcc.dg/tree-ssa/calloc-6.c | 31 +++++++++++
> >  gcc/testsuite/gcc.dg/tree-ssa/calloc-7.c | 30 +++++++++++
> >  gcc/testsuite/gcc.dg/tree-ssa/calloc-8.c | 21 ++++++++
> >  gcc/testsuite/gcc.dg/tree-ssa/calloc-9.c | 20 +++++++
> >  gcc/tree-ssa-strlen.cc                   | 67 ++++++++++++++++++++++++
> >  5 files changed, 169 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/calloc-6.c
> >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/calloc-7.c
> >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/calloc-8.c
> >  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/calloc-9.c
> >
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/calloc-6.c 
> > b/gcc/testsuite/gcc.dg/tree-ssa/calloc-6.c
> > new file mode 100644
> > index 00000000000..ca5968707a8
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/calloc-6.c
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > +
> > +/* PR tree-optimization/83022 */
> > +
> > +void* f(int n, int *q)
> > +{
> > +  int *p = __builtin_malloc (n);
> > +  for(int i = 0; i < n; i++)
> > +    q[i] = i;
> > +  if (p)
> > +    __builtin_memset (p, 0, n);
> > +  return p;
> > +}
> > +
> > +void* g(int n, int *q)
> > +{
> > +  int *p = __builtin_malloc (n);
> > +  for(int i = 0; i < n; i++)
> > +    q[i] = i;
> > +  __builtin_memset (p, 0, n);
> > +  return p;
> > +}
> > +
> > +/* These 2 should be converted to calloc as the memset is
> > +   always executed or is conditional on the ptr nullness.  */
> > +
> > +/* { dg-final { scan-tree-dump-times "calloc" 2 "optimized" } } */
> > +/* { dg-final { scan-tree-dump-not "malloc" "optimized" } } */
> > +/* { dg-final { scan-tree-dump-not "memset" "optimized" } } */
> > +
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/calloc-7.c 
> > b/gcc/testsuite/gcc.dg/tree-ssa/calloc-7.c
> > new file mode 100644
> > index 00000000000..665ebe2a049
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/calloc-7.c
> > @@ -0,0 +1,30 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > +
> > +/* PR tree-optimization/83022 */
> > +
> > +void* f(int n, int *q, int t)
> > +{
> > +  int *p = __builtin_malloc (n);
> > +  for(int i = 0; i < n; i++)
> > +    q[i] = i;
> > +  if (t)
> > +    __builtin_memset (p, 0, n);
> > +  return p;
> > +}
> > +
> > +void* g(int n, int *q, int t)
> > +{
> > +  int *p = __builtin_malloc (n);
> > +  if (t)
> > +    __builtin_memset (p, 0, n);
> > +  return p;
> > +}
> > +
> > +/* These 2 should not be converted into calloc as the memset is not
> > +   conditionalized on the pointer being checked for nullness. */
> > +
> > +/* { dg-final { scan-tree-dump-not "calloc" "optimized" } } */
> > +/* { dg-final { scan-tree-dump-times "malloc" 2 "optimized" } } */
> > +/* { dg-final { scan-tree-dump-times "memset" 2 "optimized" } } */
> > +
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/calloc-8.c 
> > b/gcc/testsuite/gcc.dg/tree-ssa/calloc-8.c
> > new file mode 100644
> > index 00000000000..7d7d504ff9e
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/calloc-8.c
> > @@ -0,0 +1,21 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > +
> > +/* PR tree-optimization/83022 */
> > +
> > +void *combine_alt (int s, bool c, bool *a)
> > +{
> > +  void *r = __builtin_malloc (s);
> > +  if (!r)  return 0;
> > +  if (a) *a = r != 0;
> > +  __builtin_memset (r, 0, s);
> > +  return r;
> > +}
> > +
> > +/* These one  should be converted to calloc as the memset is
> > +   is conditional on the ptr nullness.  */
> > +
> > +/* { dg-final { scan-tree-dump-times "calloc" 1 "optimized" } } */
> > +/* { dg-final { scan-tree-dump-not "malloc" "optimized" } } */
> > +/* { dg-final { scan-tree-dump-not "memset" "optimized" } } */
> > +
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/calloc-9.c 
> > b/gcc/testsuite/gcc.dg/tree-ssa/calloc-9.c
> > new file mode 100644
> > index 00000000000..e43dc359082
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/calloc-9.c
> > @@ -0,0 +1,20 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > +
> > +/* PR tree-optimization/83022 */
> > +
> > +void *m (int s, int c)
> > +{
> > +  void *r = __builtin_malloc (s);
> > +  if (r && c)
> > +    __builtin_memset (r, 0, s);
> > +  return r;
> > +}
> > +
> > +/* This one should not be converted into calloc as the memset is not
> > +   conditionalized on the pointer being checked for nullness. */
> > +
> > +/* { dg-final { scan-tree-dump-not "calloc" "optimized" } } */
> > +/* { dg-final { scan-tree-dump-times "malloc" 1 "optimized" } } */
> > +/* { dg-final { scan-tree-dump-times "memset" 1 "optimized" } } */
> > +
> > diff --git a/gcc/tree-ssa-strlen.cc b/gcc/tree-ssa-strlen.cc
> > index c4d64132e53..e69ceeffb03 100644
> > --- a/gcc/tree-ssa-strlen.cc
> > +++ b/gcc/tree-ssa-strlen.cc
> > @@ -3785,6 +3785,70 @@ strlen_pass::handle_alloc_call (built_in_function 
> > bcode)
> >    si->dont_invalidate = true;
> >  }
> >
> > +/* Returns true of the last statement of the bb is a conditional
> > +   that checks ptr for null-ness. */
> > +static bool
> > +last_stmt_ptr_check (tree ptr, basic_block bb)
> > +{
> > +  gimple_stmt_iterator gsi = gsi_last_bb (bb);
> > +  gcond *cstmt = dyn_cast <gcond *>(gsi_stmt (gsi));
> > +  if (!cstmt)
> > +    return false;
> > +  if (gimple_cond_code (cstmt) != EQ_EXPR && gimple_cond_code (cstmt) != 
> > NE_EXPR)
> > +    return false;
> > +  if (!integer_zerop (gimple_cond_rhs (cstmt)))
> > +    return false;
> > +  if (!operand_equal_p (gimple_cond_lhs (cstmt), ptr))
> > +    return false;
> > +  return true;
> > +}
> > +
> > +/* Check if doing a malloc+memset to calloc is a good idea. PTR is the
> > +   return value of the malloc/where the memset happens. MALLOC_BB is
> > +   the basic block of the malloc. MEMSET_BB is basic block of the memset.  
> > */
> > +
> > +static bool
> > +allow_memset_malloc_to_calloc (tree ptr, basic_block malloc_bb,
> > +                              basic_block memset_bb)
> > +{
> > +  /* If the malloc and memset are in the same block, then always
> > +     allow the transformation. Don't need post dominator calculation. */
> > +  if (malloc_bb == memset_bb)
> > +    return true;
> > +
> > +  if (!dom_info_available_p (cfun, CDI_POST_DOMINATORS))
> > +    calculate_dominance_info (CDI_POST_DOMINATORS);
> > +
> > +  /* If the memset is always executed after the malloc, then allow
> > +      to optimize to calloc. */
> > +  if (dominated_by_p (CDI_POST_DOMINATORS, malloc_bb, memset_bb))
> > +    return true;
> > +
> > +  /* If the malloc bb ends in a ptr check, then we need to check if
> > +     either successor is post dominated by the memset bb.  */
> > +  if (last_stmt_ptr_check (ptr, malloc_bb))
> > +    {
> > +      if (dominated_by_p (CDI_POST_DOMINATORS, EDGE_SUCC (malloc_bb, 
> > 0)->dest, memset_bb))
> > +       return true;
> > +      if (dominated_by_p (CDI_POST_DOMINATORS, EDGE_SUCC (malloc_bb, 
> > 1)->dest, memset_bb))
> > +       return true;
> > +    }
> > +
> > +  /* At this point we want to only handle:
> > +     malloc();
> > +     ...
> > +     if (ptr)  goto memset_bb; */
> > +  if (!single_pred_p (memset_bb))
> > +    return false;
> > +
> > +  /* If the predecessor of the memset bb is not post dominated by malloc, 
> > then the memset is
> > +     conditionalized by something more than just the checking if ptr is 
> > non-null.  */
> > +  if (!dominated_by_p (CDI_POST_DOMINATORS, malloc_bb, single_pred_edge 
> > (memset_bb)->src))
> > +    return false;
> > +
> > +  return last_stmt_ptr_check (ptr, single_pred_edge (memset_bb)->src);
> > +}
> > +
> >  /* Handle a call to memset.
> >     After a call to calloc, memset(,0,) is unnecessary.
> >     memset(malloc(n),0,n) is calloc(n,1).
> > @@ -3870,6 +3934,8 @@ strlen_pass::handle_builtin_memset (bool *zero_write)
> >    enum built_in_function code1 = DECL_FUNCTION_CODE (callee1);
> >    if (code1 == BUILT_IN_CALLOC)
> >      /* Not touching alloc_stmt */ ;
> > +  else if (!allow_memset_malloc_to_calloc (ptr, gimple_bb (si1->stmt), 
> > gimple_bb (memset_stmt)))
> > +     return false;
> >    else if (code1 == BUILT_IN_MALLOC
> >            && operand_equal_p (memset_size, alloc_size, 0))
> >      {
> > @@ -6005,6 +6071,7 @@ printf_strlen_execute (function *fun, bool warn_only)
> >    disable_ranger (fun);
> >    scev_finalize ();
> >    loop_optimizer_finalize ();
> > +  free_dominance_info (CDI_POST_DOMINATORS);
> >
> >    return walker.m_cleanup_cfg ? TODO_cleanup_cfg : 0;
> >  }
> > --
> > 2.43.0
> >

Reply via email to