Hi! As the testcase shows, we fold strlen (str) == 0 into *str == 0 before the strlen pass has a chance to optimize. The following patch optimizes loads from the known termination '\0' into lhs = '\0' and loads from the part of the string known to be non-zero results in ~[0, 0] range being recorded if we don't have anything better.
Note that unfortunately vrp2 doesn't consider any memory loads as interesting, even when they have non-varying ranges from other passes, but guess that is something to improve later. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2017-12-18 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/83444 * tree-ssa-strlen.c (strlen_check_and_optimize_stmt): Optimize character loads. * gcc.dg/strlenopt-36.c: New test. --- gcc/tree-ssa-strlen.c.jj 2017-12-18 14:57:20.000000000 +0100 +++ gcc/tree-ssa-strlen.c 2017-12-18 16:45:04.689862506 +0100 @@ -3104,6 +3104,64 @@ strlen_check_and_optimize_stmt (gimple_s else if (code == EQ_EXPR || code == NE_EXPR) fold_strstr_to_strncmp (gimple_assign_rhs1 (stmt), gimple_assign_rhs2 (stmt), stmt); + else if (gimple_assign_load_p (stmt) + && TREE_CODE (TREE_TYPE (lhs)) == INTEGER_TYPE + && TYPE_MODE (TREE_TYPE (lhs)) == TYPE_MODE (char_type_node) + && (TYPE_PRECISION (TREE_TYPE (lhs)) + == TYPE_PRECISION (char_type_node)) + && !gimple_has_volatile_ops (stmt)) + { + tree off = integer_zero_node; + unsigned HOST_WIDE_INT coff = 0; + int idx = -1; + tree rhs1 = gimple_assign_rhs1 (stmt); + if (code == MEM_REF) + { + idx = get_stridx (TREE_OPERAND (rhs1, 0)); + off = TREE_OPERAND (rhs1, 1); + } + else + idx = get_addr_stridx (rhs1, NULL_TREE, &coff); + if (idx > 0) + { + strinfo *si = get_strinfo (idx); + if (si + && si->nonzero_chars + && TREE_CODE (si->nonzero_chars) == INTEGER_CST) + { + widest_int w1 = wi::to_widest (si->nonzero_chars); + widest_int w2 = wi::to_widest (off) + coff; + if (w1 == w2 + && si->full_string_p) + { + /* Reading the final '\0' character. */ + tree zero = build_int_cst (TREE_TYPE (lhs), 0); + gimple_set_vuse (stmt, NULL_TREE); + gimple_assign_set_rhs_from_tree (gsi, zero); + update_stmt (gsi_stmt (*gsi)); + } + else if (w1 > w2) + { + /* Reading a character before the final '\0' + character. Just set the value range to ~[0, 0] + if we don't have anything better. */ + wide_int min, max; + tree type = TREE_TYPE (lhs); + enum value_range_type vr + = get_range_info (lhs, &min, &max); + if (vr == VR_VARYING + || (vr == VR_RANGE + && min == wi::min_value (TYPE_PRECISION (type), + TYPE_SIGN (type)) + && max == wi::max_value (TYPE_PRECISION (type), + TYPE_SIGN (type)))) + set_range_info (lhs, VR_ANTI_RANGE, + wi::zero (TYPE_PRECISION (type)), + wi::zero (TYPE_PRECISION (type))); + } + } + } + } if (strlen_to_stridx) { --- gcc/testsuite/gcc.dg/strlenopt-36.c.jj 2017-12-18 16:49:48.357210798 +0100 +++ gcc/testsuite/gcc.dg/strlenopt-36.c 2017-12-18 17:02:41.804261717 +0100 @@ -0,0 +1,38 @@ +/* PR tree-optimization/83444 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-not "abort \\(\\)" "optimized" } } */ + +#include "strlenopt.h" + +void +foo (void) +{ + char a[5] = "012"; + strcpy (a, ""); + if (strlen (a) != 0) + abort (); +} + +void +bar (void) +{ + char a[5] = "012"; + char b[7] = ""; + strcpy (a, b); + if (strlen (a) != 0) + abort (); +} + +struct S { char a[4]; char b[5]; char c[7]; }; + +void +baz (void) +{ + struct S s; + strcpy (s.b, "012"); + strcpy (s.c, ""); + strcpy (s.b, s.c); + if (s.b[0] != 0) + abort (); +} Jakub