Paul,

Thanks very match.  I checked the patch, and I add fixes to it as
following.

 1. Fixed warning.

    dfa.c: In function 'build_mbrtowc_cache':
    dfa.c:448: warning: pointer targets in passing argument 1 of
    'mbrtowc' differ in signedness

 2. took mbrtowc_cache into new member of struct dfa.

    When struct dfa more than one are used at the same time, mbrtowc cache
    may be conflict.  So, take mbrtowc_cache into new member of struct dfa,
    and define each mbrtowc cache for them.

Norihiro
From 41bfd2f66a48efc0cdf1b865c2cc4cdb48d98ce0 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Sat, 29 Mar 2014 00:28:56 +0900
Subject: [PATCH] grep: take mbrtowc_cache into new member of struct dfa

When struct dfa more than one are used at the same time, mbrtowc cache
may be conflict.  So, take mbrtowc_cache into new member of struct dfa,
and define each mbrtowc cache for them.

* src/dfa.c (struct dfa): New member `mbrtowc_cache'.
(dfambcache): Rename from build_mbrtowc_cache.  Add dependency on struct dfa.
(mbs_to_wchar): Add dependency on struct dfa.
(FETCH_WC): Use it.
(prepare_wc_buf): Use it.  Add dependency on struct dfa.
(dfacomp): Call it.
(dfafree): Release it.
---
 src/dfa.c | 133 +++++++++++++++++++++++++++++++++-----------------------------
 1 file changed, 71 insertions(+), 62 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 626087e..1ca7f38 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -376,6 +376,14 @@ struct dfa
   size_t nmultibyte_prop;
   int *multibyte_prop;
 
+#if MBS_SUPPORT
+  /* A table indexed by byte values that contains the corresponding wide
+     character (if any) for that byte.  WEOF means the byte is the
+     leading byte of a multibyte character.  Invalid and null bytes are
+     mapped to themselves.  */
+  wint_t *mbrtowc_cache;
+#endif
+
   /* Array of the bracket expression in the DFA.  */
   struct mb_char_classes *mbcsets;
   size_t nmbcsets;
@@ -430,62 +438,6 @@ struct dfa
                                    the dfa.  */
 };
 
-/* A table indexed by byte values that contains the corresponding wide
-   character (if any) for that byte.  WEOF means the byte is the
-   leading byte of a multibyte character.  Invalid and null bytes are
-   mapped to themselves.  */
-static wint_t mbrtowc_cache[NOTCHAR];
-
-static void
-build_mbrtowc_cache (void)
-{
-  int i;
-  for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
-    {
-      char c = i;
-      unsigned char uc = i;
-      mbstate_t s = { 0 };
-      switch (mbrtowc (&mbrtowc_cache[uc], &c, 1, &s))
-        {
-        case (size_t) -2: mbrtowc_cache[uc] = WEOF; break;
-        case (size_t) -1: mbrtowc_cache[uc] = uc; break;
-        }
-    }
-}
-
-/* Store into *PWC the result of converting the leading bytes of the
-   multibyte buffer S of length N bytes, updating the conversion state
-   in *MBS.  On conversion error, convert just a single byte as-is.
-   Return the number of bytes converted.
-
-   This differs from mbrtowc (PWC, S, N, MBS) as follows:
-
-   * N must be at least 1.
-   * S[N - 1] must be a sentinel byte.
-   * Shift encodings are not supported.
-   * The return value is always in the range 1..N.
-   * *MBS is always valid afterwards.
-   * *PWC is always set to something.
-   * This uses mbrtowc_cache for speed in the typical case.  */
-static size_t
-mbs_to_wchar (wchar_t *pwc, char const *s, size_t n, mbstate_t *mbs)
-{
-  unsigned char uc = s[0];
-  wint_t wc = mbrtowc_cache[uc];
-
-  if (wc == WEOF)
-    {
-      size_t nbytes = mbrtowc (pwc, s, n, mbs);
-      if (0 < nbytes && nbytes < (size_t) -2)
-        return nbytes;
-      memset (mbs, 0, sizeof *mbs);
-      wc = uc;
-    }
-
-  *pwc = wc;
-  return 1;
-}
-
 /* Some macros for user access to dfa internals.  */
 
 /* ACCEPTING returns true if s could possibly be an accepting state of r.  */
@@ -533,6 +485,60 @@ static void regexp (void);
     }                                                          \
   while (false)
 
+static void
+dfambcache (struct dfa *d)
+{
+#if MBS_SUPPORT
+  int i;
+  MALLOC (d->mbrtowc_cache, NOTCHAR);
+  for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
+    {
+      char c = i;
+      unsigned char uc = i;
+      mbstate_t s = { 0 };
+      switch (mbrtowc ((wchar_t *) &d->mbrtowc_cache[uc], &c, 1, &s))
+        {
+        case (size_t) -2: d->mbrtowc_cache[uc] = WEOF; break;
+        case (size_t) -1: d->mbrtowc_cache[uc] = uc; break;
+        }
+    }
+#endif
+}
+
+#if MBS_SUPPORT
+/* Store into *PWC the result of converting the leading bytes of the
+   multibyte buffer S of length N bytes, updating the conversion state
+   in *MBS.  On conversion error, convert just a single byte as-is.
+   Return the number of bytes converted.
+
+   This differs from mbrtowc (PWC, S, N, MBS) as follows:
+
+   * N must be at least 1.
+   * S[N - 1] must be a sentinel byte.
+   * Shift encodings are not supported.
+   * The return value is always in the range 1..N.
+   * *MBS is always valid afterwards.
+   * *PWC is always set to something.
+   * This uses mbrtowc_cache for speed in the typical case.  */
+static size_t
+mbs_to_wchar (struct dfa *d, wchar_t *pwc, char const *s, size_t n, mbstate_t 
*mbs)
+{
+  unsigned char uc = s[0];
+  wint_t wc = d->mbrtowc_cache[uc];
+
+  if (wc == WEOF)
+    {
+      size_t nbytes = mbrtowc (pwc, s, n, mbs);
+      if (0 < nbytes && nbytes < (size_t) -2)
+        return nbytes;
+      memset (mbs, 0, sizeof *mbs);
+      wc = uc;
+    }
+
+  *pwc = wc;
+  return 1;
+}
+#endif
 
 #ifdef DEBUG
 
@@ -900,7 +906,7 @@ static unsigned char const *buf_end;    /* reference to end 
in dfaexec.  */
     else                                       \
       {                                                \
         wchar_t _wc;                           \
-        size_t nbytes = mbs_to_wchar (&_wc, lexptr, lexleft, &mbs); \
+        size_t nbytes = mbs_to_wchar (dfa, &_wc, lexptr, lexleft, &mbs); \
         cur_mb_len = nbytes;                   \
         (wc) = _wc;                            \
         (c) = nbytes == 1 ? to_uchar (*lexptr) : EOF;    \
@@ -3353,7 +3359,7 @@ transit_state (struct dfa *d, state_num s, unsigned char 
const **pp)
 /* Initialize mblen_buf and inputwcs with data from the next line.  */
 
 static void
-prepare_wc_buf (const char *begin, const char *end)
+prepare_wc_buf (struct dfa *d, const char *begin, const char *end)
 {
 #if MBS_SUPPORT
   unsigned char eol = eolbyte;
@@ -3364,7 +3370,7 @@ prepare_wc_buf (const char *begin, const char *end)
 
   for (i = 0; i < ilim; i++)
     {
-      size_t nbytes = mbs_to_wchar (inputwcs + i, begin + i, ilim - i, &mbs);
+      size_t nbytes = mbs_to_wchar (d, inputwcs + i, begin + i, ilim - i, 
&mbs);
       mblen_buf[i] = nbytes - (nbytes == 1);
       if (begin[i] == eol)
         break;
@@ -3419,7 +3425,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
       MALLOC (mblen_buf, end - begin + 2);
       MALLOC (inputwcs, end - begin + 2);
       memset (&mbs, 0, sizeof (mbstate_t));
-      prepare_wc_buf ((const char *) p, end);
+      prepare_wc_buf (d, (const char *) p, end);
     }
 
   for (;;)
@@ -3509,7 +3515,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
             ++*count;
 
           if (d->mb_cur_max > 1)
-            prepare_wc_buf ((const char *) p, end);
+            prepare_wc_buf (d, (const char *) p, end);
         }
 
       /* Check if we've run off the end of the buffer.  */
@@ -3628,7 +3634,7 @@ void
 dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
 {
   dfainit (d);
-  build_mbrtowc_cache ();
+  dfambcache (d);
   dfaparse (s, len, d);
   dfamust (d);
   dfaoptimize (d);
@@ -3647,6 +3653,9 @@ dfafree (struct dfa *d)
 
   if (d->mb_cur_max > 1)
     free_mbdata (d);
+#if MBS_SUPPORT
+  free (d->mbrtowc_cache);
+#endif
 
   for (i = 0; i < d->sindex; ++i)
     {
-- 
1.9.1

Reply via email to