Thanks for the review and the fixes. I found a couple more things. First, it's not portable to cast wint_t * to wchar_t *, since the pointed-to types might be different sizes or representations. Second, we can put the cache directly in the struct dfa, saving the overhead of doing a separate malloc.

The attached further patch should address these problems. I pushed this, along with the earlier two patches in this sequence, and am marking this as done.


>From c51be43c00c8ae05a1a19d503865ef5f97ea6612 Mon Sep 17 00:00:00 2001
From: Paul Eggert <[email protected]>
Date: Fri, 28 Mar 2014 09:32:29 -0700
Subject: [PATCH 3/3] dfa: avoid an indirection and port wint_t usage

* src/dfa.c (struct dfa): Put mbrtowc_cache directly into struct dfa
rather than having a pointer; this saves a malloc and an indirection.
All uses changed.
(dfambcache): Port to hosts where wint_t * can't be cast to wchar_t *.
---
 src/dfa.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 1ca7f38..4ed2189 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -381,7 +381,7 @@ struct dfa
      character (if any) for that byte.  WEOF means the byte is the
      leading byte of a multibyte character.  Invalid and null bytes are
      mapped to themselves.  */
-  wint_t *mbrtowc_cache;
+  wint_t mbrtowc_cache[NOTCHAR];
 #endif
 
   /* Array of the bracket expression in the DFA.  */
@@ -490,38 +490,42 @@ dfambcache (struct dfa *d)
 {
 #if MBS_SUPPORT
   int i;
-  MALLOC (d->mbrtowc_cache, NOTCHAR);
   for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
     {
       char c = i;
       unsigned char uc = i;
       mbstate_t s = { 0 };
-      switch (mbrtowc ((wchar_t *) &d->mbrtowc_cache[uc], &c, 1, &s))
+      wchar_t wc;
+      wint_t wi;
+      switch (mbrtowc (&wc, &c, 1, &s))
         {
-        case (size_t) -2: d->mbrtowc_cache[uc] = WEOF; break;
-        case (size_t) -1: d->mbrtowc_cache[uc] = uc; break;
+        default: wi = wc; break;
+        case (size_t) -2: wi = WEOF; break;
+        case (size_t) -1: wi = uc; break;
         }
+      d->mbrtowc_cache[uc] = wi;
     }
 #endif
 }
 
 #if MBS_SUPPORT
-/* Store into *PWC the result of converting the leading bytes of the
-   multibyte buffer S of length N bytes, updating the conversion state
-   in *MBS.  On conversion error, convert just a single byte as-is.
-   Return the number of bytes converted.
+/* Given the dfa D, store into *PWC the result of converting the
+   leading bytes of the multibyte buffer S of length N bytes, updating
+   the conversion state in *MBS.  On conversion error, convert just a
+   single byte as-is.  Return the number of bytes converted.
 
    This differs from mbrtowc (PWC, S, N, MBS) as follows:
 
+   * Extra arg D, containing an mbrtowc_cache for speed.
    * N must be at least 1.
    * S[N - 1] must be a sentinel byte.
    * Shift encodings are not supported.
    * The return value is always in the range 1..N.
    * *MBS is always valid afterwards.
-   * *PWC is always set to something.
-   * This uses mbrtowc_cache for speed in the typical case.  */
+   * *PWC is always set to something.  */
 static size_t
-mbs_to_wchar (struct dfa *d, wchar_t *pwc, char const *s, size_t n, mbstate_t *mbs)
+mbs_to_wchar (struct dfa *d, wchar_t *pwc, char const *s, size_t n,
+              mbstate_t *mbs)
 {
   unsigned char uc = s[0];
   wint_t wc = d->mbrtowc_cache[uc];
@@ -3653,9 +3657,6 @@ dfafree (struct dfa *d)
 
   if (d->mb_cur_max > 1)
     free_mbdata (d);
-#if MBS_SUPPORT
-  free (d->mbrtowc_cache);
-#endif
 
   for (i = 0; i < d->sindex; ++i)
     {
-- 
1.9.0

Reply via email to