On Fri, Jul 8, 2022 at 5:54 AM Richard Biener <richard.guent...@gmail.com> wrote: > > On Thu, Jul 7, 2022 at 6:45 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > When memchr is applied on a constant string of no more than the bytes of > > a word, simplify memchr by checking each byte in the constant string. > > > > int f (int a) > > { > > return __builtin_memchr ("AE", a, 2) != 0; > > } > > > > is simplified to > > > > int f (int a) > > { > > return ((char) a == 'A' || (char) a == 'E') != 0; > > } > > > > gcc/ > > > > PR tree-optimization/103798 > > * tree-ssa-forwprop.cc: Include "tree-ssa-strlen.h". > > (simplify_builtin_call): Inline memchr with constant strings of > > no more than the bytes of a word. > > * tree-ssa-strlen.cc (use_in_zero_equality): Make it global. > > * tree-ssa-strlen.h (use_in_zero_equality): New. > > > > gcc/testsuite/ > > > > PR tree-optimization/103798 > > * c-c++-common/pr103798-1.c: New test. > > * c-c++-common/pr103798-2.c: Likewise. > > * c-c++-common/pr103798-3.c: Likewise. > > * c-c++-common/pr103798-4.c: Likewise. > > * c-c++-common/pr103798-5.c: Likewise. > > * c-c++-common/pr103798-6.c: Likewise. > > * c-c++-common/pr103798-7.c: Likewise. > > * c-c++-common/pr103798-8.c: Likewise. > > --- > > gcc/testsuite/c-c++-common/pr103798-1.c | 28 +++++++++++ > > gcc/testsuite/c-c++-common/pr103798-2.c | 30 ++++++++++++ > > gcc/testsuite/c-c++-common/pr103798-3.c | 28 +++++++++++ > > gcc/testsuite/c-c++-common/pr103798-4.c | 28 +++++++++++ > > gcc/testsuite/c-c++-common/pr103798-5.c | 26 ++++++++++ > > gcc/testsuite/c-c++-common/pr103798-6.c | 27 +++++++++++ > > gcc/testsuite/c-c++-common/pr103798-7.c | 27 +++++++++++ > > gcc/testsuite/c-c++-common/pr103798-8.c | 27 +++++++++++ > > gcc/tree-ssa-forwprop.cc | 64 +++++++++++++++++++++++++ > > gcc/tree-ssa-strlen.cc | 4 +- > > gcc/tree-ssa-strlen.h | 2 + > > 11 files changed, 289 insertions(+), 2 deletions(-) > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-1.c > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-2.c > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-3.c > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-4.c > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-5.c > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-6.c > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-7.c > > create mode 100644 gcc/testsuite/c-c++-common/pr103798-8.c > > > > diff --git a/gcc/testsuite/c-c++-common/pr103798-1.c > > b/gcc/testsuite/c-c++-common/pr103798-1.c > > new file mode 100644 > > index 00000000000..cd3edf569fc > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/pr103798-1.c > > @@ -0,0 +1,28 @@ > > +/* { dg-do run } */ > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > + > > +__attribute__ ((weak)) > > +int > > +f (char a) > > +{ > > + return __builtin_memchr ("a", a, 1) == 0; > > +} > > + > > +__attribute__ ((weak)) > > +int > > +g (char a) > > +{ > > + return a != 'a'; > > +} > > + > > +int > > +main () > > +{ > > + for (int i = 0; i < 255; i++) > > + if (f (i) != g (i)) > > + __builtin_abort (); > > + > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > diff --git a/gcc/testsuite/c-c++-common/pr103798-2.c > > b/gcc/testsuite/c-c++-common/pr103798-2.c > > new file mode 100644 > > index 00000000000..e7e99c3679e > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/pr103798-2.c > > @@ -0,0 +1,30 @@ > > +/* { dg-do run } */ > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > + > > +#include <string.h> > > + > > +__attribute__ ((weak)) > > +int > > +f (int a) > > +{ > > + return memchr ("aE", a, 2) != NULL; > > +} > > + > > +__attribute__ ((weak)) > > +int > > +g (char a) > > +{ > > + return a == 'a' || a == 'E'; > > +} > > + > > +int > > +main () > > +{ > > + for (int i = 0; i < 255; i++) > > + if (f (i + 256) != g (i + 256)) > > + __builtin_abort (); > > + > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > diff --git a/gcc/testsuite/c-c++-common/pr103798-3.c > > b/gcc/testsuite/c-c++-common/pr103798-3.c > > new file mode 100644 > > index 00000000000..ddcedc7e238 > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/pr103798-3.c > > @@ -0,0 +1,28 @@ > > +/* { dg-do run } */ > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > + > > +__attribute__ ((weak)) > > +int > > +f (char a) > > +{ > > + return __builtin_memchr ("aEgZ", a, 3) == 0; > > +} > > + > > +__attribute__ ((weak)) > > +int > > +g (char a) > > +{ > > + return a != 'a' && a != 'E' && a != 'g'; > > +} > > + > > +int > > +main () > > +{ > > + for (int i = 0; i < 255; i++) > > + if (f (i) != g (i)) > > + __builtin_abort (); > > + > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > diff --git a/gcc/testsuite/c-c++-common/pr103798-4.c > > b/gcc/testsuite/c-c++-common/pr103798-4.c > > new file mode 100644 > > index 00000000000..00e8302a833 > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/pr103798-4.c > > @@ -0,0 +1,28 @@ > > +/* { dg-do run } */ > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > + > > +__attribute__ ((weak)) > > +int > > +f (char a) > > +{ > > + return __builtin_memchr ("aEgi", a, 4) != 0; > > +} > > + > > +__attribute__ ((weak)) > > +int > > +g (char a) > > +{ > > + return a == 'a' || a == 'E' || a == 'g' || a == 'i'; > > +} > > + > > +int > > +main () > > +{ > > + for (int i = 0; i < 255; i++) > > + if (f (i) != g (i)) > > + __builtin_abort (); > > + > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > diff --git a/gcc/testsuite/c-c++-common/pr103798-5.c > > b/gcc/testsuite/c-c++-common/pr103798-5.c > > new file mode 100644 > > index 00000000000..0d6487a13df > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/pr103798-5.c > > @@ -0,0 +1,26 @@ > > +/* { dg-do run { target int128 } } */ > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > + > > +__attribute__ ((weak)) > > +int f(char a) > > +{ > > + return __builtin_memchr ("aEgiH", a, 5) == 0; > > +} > > + > > +__attribute__ ((weak)) > > +int g(char a) > > +{ > > + return a != 'a' && a != 'E' && a != 'g' && a != 'i' && a != 'H'; > > +} > > + > > +int > > +main () > > +{ > > + for (int i = 0; i < 255; i++) > > + if (f (i) != g (i)) > > + __builtin_abort (); > > + > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > diff --git a/gcc/testsuite/c-c++-common/pr103798-6.c > > b/gcc/testsuite/c-c++-common/pr103798-6.c > > new file mode 100644 > > index 00000000000..5ccb5ee66e0 > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/pr103798-6.c > > @@ -0,0 +1,27 @@ > > +/* { dg-do run { target int128 } } */ > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > + > > +__attribute__ ((weak)) > > +int f(char a) > > +{ > > + return __builtin_memchr ("aEgiHx", a, 6) != 0; > > +} > > + > > +__attribute__ ((weak)) > > +int g(char a) > > +{ > > + return (a == 'a' || a == 'E' || a == 'g' || a == 'i' || a == 'H' > > + || a == 'x'); > > +} > > + > > +int > > +main () > > +{ > > + for (int i = 0; i < 255; i++) > > + if (f (i) != g (i)) > > + __builtin_abort (); > > + > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > diff --git a/gcc/testsuite/c-c++-common/pr103798-7.c > > b/gcc/testsuite/c-c++-common/pr103798-7.c > > new file mode 100644 > > index 00000000000..40fd38257d1 > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/pr103798-7.c > > @@ -0,0 +1,27 @@ > > +/* { dg-do run { target int128 } } */ > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > + > > +__attribute__ ((weak)) > > +int f(char a) > > +{ > > + return __builtin_memchr ("aEgiHjZ", a, 7) == 0; > > +} > > + > > +__attribute__ ((weak)) > > +int g(char a) > > +{ > > + return (a != 'a' && a != 'E' && a != 'g' && a != 'i' && a != 'H' > > + && a != 'j' && a != 'Z'); > > +} > > + > > +int > > +main () > > +{ > > + for (int i = 0; i < 255; i++) > > + if (f (i) != g (i)) > > + __builtin_abort (); > > + > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > diff --git a/gcc/testsuite/c-c++-common/pr103798-8.c > > b/gcc/testsuite/c-c++-common/pr103798-8.c > > new file mode 100644 > > index 00000000000..0841b18cea4 > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/pr103798-8.c > > @@ -0,0 +1,27 @@ > > +/* { dg-do run { target int128 } } */ > > +/* { dg-options "-O2 -fdump-tree-optimized -save-temps" } */ > > + > > +__attribute__ ((weak)) > > +int f(int a) > > +{ > > + return __builtin_memchr ("aEgiHx19ABC", a, 8) != 0; > > +} > > + > > +__attribute__ ((weak)) > > +int g(char a) > > +{ > > + return (a == 'a' || a == 'E' || a == 'g' || a == 'i' || a == 'H' > > + || a == 'x' || a == '1' || a == '9'); > > +} > > + > > +int > > +main () > > +{ > > + for (int i = 0; i < 255; i++) > > + if (f (i + 256) != g (i + 256)) > > + __builtin_abort (); > > + > > + return 0; > > +} > > + > > +/* { dg-final { scan-assembler-not "memchr" } } */ > > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > > index 69567ab3275..edd1277c23b 100644 > > --- a/gcc/tree-ssa-forwprop.cc > > +++ b/gcc/tree-ssa-forwprop.cc > > @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see > > #include "tree-dfa.h" > > #include "tree-ssa-propagate.h" > > #include "tree-ssa-dom.h" > > +#include "tree-ssa-strlen.h" > > #include "builtins.h" > > #include "tree-cfgcleanup.h" > > #include "cfganal.h" > > @@ -1177,6 +1178,15 @@ constant_pointer_difference (tree p1, tree p2) > > memcpy (p, "abcd ", 7); > > call if the latter can be stored by pieces during expansion. > > > > + Optimize > > + memchr ("abcd", a, 4) == 0; > > + or > > + memchr ("abcd", a, 4) != 0; > > + to > > + (a == 'a' || a == 'b' || a == 'c' || a == 'd') == 0 > > + or > > + (a == 'a' || a == 'b' || a == 'c' || a == 'd') != 0 > > + > > Also canonicalize __atomic_fetch_op (p, x, y) op x > > to __atomic_op_fetch (p, x, y) or > > __atomic_op_fetch (p, x, y) iop x > > @@ -1193,8 +1203,62 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, > > tree callee2) > > return false; > > stmt1 = SSA_NAME_DEF_STMT (vuse); > > > > + tree res; > > + > > switch (DECL_FUNCTION_CODE (callee2)) > > { > > + case BUILT_IN_MEMCHR: > > + if (gimple_call_num_args (stmt2) == 3 > > + && (res = gimple_call_lhs (stmt2)) != nullptr > > + && use_in_zero_equality (res) != nullptr > > + && CHAR_BIT == 8 > > + && BITS_PER_UNIT == 8) > > + { > > + tree ptr = gimple_call_arg (stmt2, 0); > > + if (TREE_CODE (ptr) != ADDR_EXPR > > + || TREE_CODE (TREE_OPERAND (ptr, 0)) != STRING_CST) > > + break; > > + unsigned HOST_WIDE_INT slen > > + = TREE_STRING_LENGTH (TREE_OPERAND (ptr, 0)); > > + /* It must be a non-empty string constant. */ > > + if (slen < 2) > > + break; > > + tree size = gimple_call_arg (stmt2, 2); > > + /* Size must be a constant which is <= UNITS_PER_WORD and > > + <= the string length. */ > > + if (TREE_CODE (size) != INTEGER_CST > > + || integer_zerop (size) > > + || wi::gtu_p (wi::to_wide (size), UNITS_PER_WORD) > > + || wi::geu_p (wi::to_wide (size), slen)) > > it might be convenient to check tree_fits_uhwi_p (size) and > do unsigned HOST_WIDE_INT sz = tree_to_uhwi (size); instead > of playing with wide-int here.
Fixed. > > + break; > > + tree ch = gimple_call_arg (stmt2, 1); > > + location_t loc = gimple_location (stmt2); > > + if (!useless_type_conversion_p (char_type_node, > > + TREE_TYPE (ch))) > > + ch = fold_convert_loc (loc, char_type_node, ch); > > + const char *p = TREE_STRING_POINTER (TREE_OPERAND (ptr, 0)); > > + unsigned int isize = TREE_INT_CST_LOW (size); > > + tree *op = new tree[isize]; > > + for (unsigned int i = 0; i < isize; i++) > > + { > > + op[i] = build_int_cst (char_type_node, p[i]); > > + op[i] = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, > > + op[i], ch); > > + } > > + for (unsigned int i = isize - 1; i >= 1; i--) > > + op[i - 1] = fold_convert_loc (loc, boolean_type_node, > > + fold_build2_loc (loc, > > + BIT_IOR_EXPR, > > + > > boolean_type_node, > > + op[i - 1], > > + op[i])); > > + res = fold_convert_loc (loc, TREE_TYPE (res), op[0]); > > + gimplify_and_update_call_from_tree (gsi_p, res); > > my worry about -Os remains, can you please add a check > like optimize_bb_for_speed (gimple_bb (stmt2)) || sz <= 2 Fixed. > (so we allow inline expanding two comparisons when not optimizing > for speed)? For UNITS_PER_WORD == 8 this is possibly a > lot of compares, don't we have some _BY_PIECEs limit we There are a few comparisons. Should I limit it to 4 bytes? > can use here? There's no COMPARE_RATIO it seems, > not sure what we use there. > > Since we only inline expand memchr(..) != 0 we're leaving > 'a' != x | 'b' != x | ... optimization to possibly memchr("abcd", a, 4) == 0 is optimized to (a == 'a' || a == 'b' || a == 'c' || a == 'd') == 0 by this patch. > 'ab...' != (x|x<<8|x<<12|...) for followup (I'm quite sure we don't It works only for memchr("aaaa", a, 4) == 0. It is optimized to x != 'a' by this patch. Thanks. > do anything like that). The x "splat" needs log2 (sz) operations > (but they might be slow?), but in the end it might pay off > for power-of-two (sz) (one could even do this in chunks)? > Anyway, this might be for a followup. > > Otherwise it looks OK. > > Thanks, > Richard. > > > + delete[] op; > > + return true; > > + } > > + break; > > + > > case BUILT_IN_MEMSET: > > if (gimple_call_num_args (stmt2) != 3 > > || gimple_call_lhs (stmt2) > > diff --git a/gcc/tree-ssa-strlen.cc b/gcc/tree-ssa-strlen.cc > > index 7b3e3899ea2..5afbae1b72e 100644 > > --- a/gcc/tree-ssa-strlen.cc > > +++ b/gcc/tree-ssa-strlen.cc > > @@ -3913,8 +3913,8 @@ strlen_pass::handle_builtin_memset (bool *zero_write) > > nonnull if and only RES is used in such expressions exclusively and > > in none other. */ > > > > -static gimple * > > -use_in_zero_equality (tree res, bool exclusive = true) > > +gimple * > > +use_in_zero_equality (tree res, bool exclusive) > > { > > gimple *first_use = NULL; > > > > diff --git a/gcc/tree-ssa-strlen.h b/gcc/tree-ssa-strlen.h > > index 8d155450db8..fdb4d9d7783 100644 > > --- a/gcc/tree-ssa-strlen.h > > +++ b/gcc/tree-ssa-strlen.h > > @@ -35,6 +35,8 @@ struct c_strlen_data; > > extern void get_range_strlen_dynamic (tree, gimple *, c_strlen_data *, > > pointer_query &); > > > > +extern gimple *use_in_zero_equality (tree, bool = true); > > + > > /* APIs internal to strlen pass. Defined in gimple-ssa-sprintf.cc. */ > > extern bool handle_printf_call (gimple_stmt_iterator *, pointer_query &); > > > > -- > > 2.36.1 > > -- H.J.