Paul,
Thanks very match. I checked the patch, and I add fixes to it as
following.
1. Fixed warning.
dfa.c: In function 'build_mbrtowc_cache':
dfa.c:448: warning: pointer targets in passing argument 1 of
'mbrtowc' differ in signedness
2. took mbrtowc_cache into new member of struct dfa.
When struct dfa more than one are used at the same time, mbrtowc cache
may be conflict. So, take mbrtowc_cache into new member of struct dfa,
and define each mbrtowc cache for them.
Norihiro
From 41bfd2f66a48efc0cdf1b865c2cc4cdb48d98ce0 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Sat, 29 Mar 2014 00:28:56 +0900
Subject: [PATCH] grep: take mbrtowc_cache into new member of struct dfa
When struct dfa more than one are used at the same time, mbrtowc cache
may be conflict. So, take mbrtowc_cache into new member of struct dfa,
and define each mbrtowc cache for them.
* src/dfa.c (struct dfa): New member `mbrtowc_cache'.
(dfambcache): Rename from build_mbrtowc_cache. Add dependency on struct dfa.
(mbs_to_wchar): Add dependency on struct dfa.
(FETCH_WC): Use it.
(prepare_wc_buf): Use it. Add dependency on struct dfa.
(dfacomp): Call it.
(dfafree): Release it.
---
src/dfa.c | 133 +++++++++++++++++++++++++++++++++-----------------------------
1 file changed, 71 insertions(+), 62 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 626087e..1ca7f38 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -376,6 +376,14 @@ struct dfa
size_t nmultibyte_prop;
int *multibyte_prop;
+#if MBS_SUPPORT
+ /* A table indexed by byte values that contains the corresponding wide
+ character (if any) for that byte. WEOF means the byte is the
+ leading byte of a multibyte character. Invalid and null bytes are
+ mapped to themselves. */
+ wint_t *mbrtowc_cache;
+#endif
+
/* Array of the bracket expression in the DFA. */
struct mb_char_classes *mbcsets;
size_t nmbcsets;
@@ -430,62 +438,6 @@ struct dfa
the dfa. */
};
-/* A table indexed by byte values that contains the corresponding wide
- character (if any) for that byte. WEOF means the byte is the
- leading byte of a multibyte character. Invalid and null bytes are
- mapped to themselves. */
-static wint_t mbrtowc_cache[NOTCHAR];
-
-static void
-build_mbrtowc_cache (void)
-{
- int i;
- for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
- {
- char c = i;
- unsigned char uc = i;
- mbstate_t s = { 0 };
- switch (mbrtowc (&mbrtowc_cache[uc], &c, 1, &s))
- {
- case (size_t) -2: mbrtowc_cache[uc] = WEOF; break;
- case (size_t) -1: mbrtowc_cache[uc] = uc; break;
- }
- }
-}
-
-/* Store into *PWC the result of converting the leading bytes of the
- multibyte buffer S of length N bytes, updating the conversion state
- in *MBS. On conversion error, convert just a single byte as-is.
- Return the number of bytes converted.
-
- This differs from mbrtowc (PWC, S, N, MBS) as follows:
-
- * N must be at least 1.
- * S[N - 1] must be a sentinel byte.
- * Shift encodings are not supported.
- * The return value is always in the range 1..N.
- * *MBS is always valid afterwards.
- * *PWC is always set to something.
- * This uses mbrtowc_cache for speed in the typical case. */
-static size_t
-mbs_to_wchar (wchar_t *pwc, char const *s, size_t n, mbstate_t *mbs)
-{
- unsigned char uc = s[0];
- wint_t wc = mbrtowc_cache[uc];
-
- if (wc == WEOF)
- {
- size_t nbytes = mbrtowc (pwc, s, n, mbs);
- if (0 < nbytes && nbytes < (size_t) -2)
- return nbytes;
- memset (mbs, 0, sizeof *mbs);
- wc = uc;
- }
-
- *pwc = wc;
- return 1;
-}
-
/* Some macros for user access to dfa internals. */
/* ACCEPTING returns true if s could possibly be an accepting state of r. */
@@ -533,6 +485,60 @@ static void regexp (void);
} \
while (false)
+static void
+dfambcache (struct dfa *d)
+{
+#if MBS_SUPPORT
+ int i;
+ MALLOC (d->mbrtowc_cache, NOTCHAR);
+ for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
+ {
+ char c = i;
+ unsigned char uc = i;
+ mbstate_t s = { 0 };
+ switch (mbrtowc ((wchar_t *) &d->mbrtowc_cache[uc], &c, 1, &s))
+ {
+ case (size_t) -2: d->mbrtowc_cache[uc] = WEOF; break;
+ case (size_t) -1: d->mbrtowc_cache[uc] = uc; break;
+ }
+ }
+#endif
+}
+
+#if MBS_SUPPORT
+/* Store into *PWC the result of converting the leading bytes of the
+ multibyte buffer S of length N bytes, updating the conversion state
+ in *MBS. On conversion error, convert just a single byte as-is.
+ Return the number of bytes converted.
+
+ This differs from mbrtowc (PWC, S, N, MBS) as follows:
+
+ * N must be at least 1.
+ * S[N - 1] must be a sentinel byte.
+ * Shift encodings are not supported.
+ * The return value is always in the range 1..N.
+ * *MBS is always valid afterwards.
+ * *PWC is always set to something.
+ * This uses mbrtowc_cache for speed in the typical case. */
+static size_t
+mbs_to_wchar (struct dfa *d, wchar_t *pwc, char const *s, size_t n, mbstate_t
*mbs)
+{
+ unsigned char uc = s[0];
+ wint_t wc = d->mbrtowc_cache[uc];
+
+ if (wc == WEOF)
+ {
+ size_t nbytes = mbrtowc (pwc, s, n, mbs);
+ if (0 < nbytes && nbytes < (size_t) -2)
+ return nbytes;
+ memset (mbs, 0, sizeof *mbs);
+ wc = uc;
+ }
+
+ *pwc = wc;
+ return 1;
+}
+#endif
#ifdef DEBUG
@@ -900,7 +906,7 @@ static unsigned char const *buf_end; /* reference to end
in dfaexec. */
else \
{ \
wchar_t _wc; \
- size_t nbytes = mbs_to_wchar (&_wc, lexptr, lexleft, &mbs); \
+ size_t nbytes = mbs_to_wchar (dfa, &_wc, lexptr, lexleft, &mbs); \
cur_mb_len = nbytes; \
(wc) = _wc; \
(c) = nbytes == 1 ? to_uchar (*lexptr) : EOF; \
@@ -3353,7 +3359,7 @@ transit_state (struct dfa *d, state_num s, unsigned char
const **pp)
/* Initialize mblen_buf and inputwcs with data from the next line. */
static void
-prepare_wc_buf (const char *begin, const char *end)
+prepare_wc_buf (struct dfa *d, const char *begin, const char *end)
{
#if MBS_SUPPORT
unsigned char eol = eolbyte;
@@ -3364,7 +3370,7 @@ prepare_wc_buf (const char *begin, const char *end)
for (i = 0; i < ilim; i++)
{
- size_t nbytes = mbs_to_wchar (inputwcs + i, begin + i, ilim - i, &mbs);
+ size_t nbytes = mbs_to_wchar (d, inputwcs + i, begin + i, ilim - i,
&mbs);
mblen_buf[i] = nbytes - (nbytes == 1);
if (begin[i] == eol)
break;
@@ -3419,7 +3425,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
MALLOC (mblen_buf, end - begin + 2);
MALLOC (inputwcs, end - begin + 2);
memset (&mbs, 0, sizeof (mbstate_t));
- prepare_wc_buf ((const char *) p, end);
+ prepare_wc_buf (d, (const char *) p, end);
}
for (;;)
@@ -3509,7 +3515,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
++*count;
if (d->mb_cur_max > 1)
- prepare_wc_buf ((const char *) p, end);
+ prepare_wc_buf (d, (const char *) p, end);
}
/* Check if we've run off the end of the buffer. */
@@ -3628,7 +3634,7 @@ void
dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
{
dfainit (d);
- build_mbrtowc_cache ();
+ dfambcache (d);
dfaparse (s, len, d);
dfamust (d);
dfaoptimize (d);
@@ -3647,6 +3653,9 @@ dfafree (struct dfa *d)
if (d->mb_cur_max > 1)
free_mbdata (d);
+#if MBS_SUPPORT
+ free (d->mbrtowc_cache);
+#endif
for (i = 0; i < d->sindex; ++i)
{
--
1.9.1