From e57f4ad19e3001f17bf779bd4419baf944d62d2d Mon Sep 17 00:00:00 2001
From: Jim Meyering <meyering@fb.com>
Date: Wed, 25 Nov 2020 16:49:51 -0800
Subject: [PATCH] grep: avoid performance regression with many patterns

* src/grep.c (hash_pattern): Switch from PJW to DJB2, to avoid an
O(N) to O(N^2) performance regression due to hash collisions with
patterns from e.g., seq 500000|tr 0-9 A-J
Reported by Frank Heckenbach in https://bugs.gnu.org/44754

* configure.ac (GNULIB_TEST_WARN_CFLAGS): Disable
the same three warning options that coreutils does:
-Wstrict-prototypes
-Wsuggest-attribute=const
-Wsuggest-attribute=pure
---
 configure.ac | 3 +++
 src/grep.c   | 5 +++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index bb20e39..e279886 100644
--- a/configure.ac
+++ b/configure.ac
@@ -154,9 +154,12 @@ if test "$gl_gcc_warnings" = yes; then
   # It's not worth being this picky about test programs.
   nw="$nw -Wsuggest-attribute=const"
   nw="$nw -Wsuggest-attribute=pure"
+  nw="$nw -Wsuggest-attribute=format"
   nw="$nw -Wformat-truncation=2"    # False alarm in strerror_r.c
+  nw="$nw -Wold-style-definition"
   gl_MANYWARN_COMPLEMENT([GNULIB_TEST_WARN_CFLAGS],
                          [$GNULIB_WARN_CFLAGS], [$nw])
+  gl_WARN_ADD([-Wno-return-type], [GNULIB_TEST_WARN_CFLAGS])
   AC_SUBST([GNULIB_TEST_WARN_CFLAGS])
 fi

diff --git a/src/grep.c b/src/grep.c
index cc2b962..74bddb7 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -128,8 +128,9 @@ hash_pattern (void const *pat, size_t n_buckets)
 {
   size_t h = 0;
   intptr_t pat_offset = (intptr_t) pat - 1;
-  for (char const *s = pattern_array + pat_offset; *s != '\n'; s++)
-    h = *s + ((h << 9) | (h >> (SIZE_WIDTH - 9)));
+  unsigned char const *s = (unsigned char const *) pattern_array + pat_offset;
+  for ( ; *s != '\n'; s++)
+    h = h * 33 ^ *s;
   return h % n_buckets;
 }
 static bool _GL_ATTRIBUTE_PURE
-- 
2.29.2.154.g7f7ebe054a

