[I've reopened 16911 since the bug's not fixed on OS X.]

Here's my guess. In glibc's en_US locale, 'Lj' is considered to be both uppercase and lowercase; but in OS X's en_US locale, it's considered to be neither uppercase nor lowercase. If so, the attached gnulib patch should fix the problem (though I can't easily test this). Could you please give it a try?

By the way, I'd like to remove the need for grep's local differences from the glibc regex code. I assume it's there only to pacify GCC's warnings flags, and we can do that with pragmas in gnulib. One fix at a time, though.

diff --git a/lib/regcomp.c b/lib/regcomp.c
index 56faf11..664a0fe 100644
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -297,7 +297,10 @@ re_set_fastmap (char *fastmap, bool icase, int ch)
 {
   fastmap[ch] = 1;
   if (icase)
-    fastmap[tolower (ch)] = 1;
+    {
+      fastmap[tolower (ch)] = 1;
+      fastmap[toupper (ch)] = 1;
+    }
 }
 
 /* Helper function for re_compile_fastmap.
@@ -334,10 +337,15 @@ re_compile_fastmap_iter (regex_t *bufp, const 
re_dfastate_t *init_state,
                *p++ = dfa->nodes[node].opr.c;
              memset (&state, '\0', sizeof (state));
              if (__mbrtowc (&wc, (const char *) buf, p - buf,
-                            &state) == p - buf
-                 && (__wcrtomb ((char *) buf, towlower (wc), &state)
-                     != (size_t) -1))
-               re_set_fastmap (fastmap, false, buf[0]);
+                            &state) == p - buf)
+                {
+                  if (__wcrtomb ((char *) buf, towlower (wc), &state)
+                     != (size_t) -1)
+                    re_set_fastmap (fastmap, false, buf[0]);
+                  if (__wcrtomb ((char *) buf, towupper (wc), &state)
+                     != (size_t) -1)
+                    re_set_fastmap (fastmap, false, buf[0]);
+                }
            }
 #endif
        }
@@ -414,6 +422,9 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t 
*init_state,
                      if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
                          != (size_t) -1)
                        re_set_fastmap (fastmap, false, *(unsigned char *) buf);
+                     if (__wcrtomb (buf, towupper (cset->mbchars[i]), &state)
+                         != (size_t) -1)
+                       re_set_fastmap (fastmap, false, *(unsigned char *) buf);
                    }
                }
            }
diff --git a/lib/regex_internal.c b/lib/regex_internal.c
index 0343ee6..79181a3 100644
--- a/lib/regex_internal.c
+++ b/lib/regex_internal.c
@@ -311,12 +311,11 @@ build_wcs_upper_buffer (re_string_t *pstr)
                               + byte_idx), remain_len, &pstr->cur_state);
          if (BE (mbclen < (size_t) -2, 1))
            {
-             wchar_t wcu = wc;
-             if (iswlower (wc))
+             wchar_t wcu = towupper (wc);
+             if (wcu != wc)
                {
                  size_t mbcdlen;
 
-                 wcu = towupper (wc);
                  mbcdlen = wcrtomb (buf, wcu, &prev_st);
                  if (BE (mbclen == mbcdlen, 1))
                    memcpy (pstr->mbs + byte_idx, buf, mbclen);
@@ -381,12 +380,11 @@ build_wcs_upper_buffer (re_string_t *pstr)
        mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
        if (BE (mbclen < (size_t) -2, 1))
          {
-           wchar_t wcu = wc;
-           if (iswlower (wc))
+           wchar_t wcu = towupper (wc);
+           if (wcu != wc)
              {
                size_t mbcdlen;
 
-               wcu = towupper (wc);
                mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
                if (BE (mbclen == mbcdlen, 1))
                  memcpy (pstr->mbs + byte_idx, buf, mbclen);
@@ -538,10 +536,7 @@ build_upper_buffer (re_string_t *pstr)
       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
       if (BE (pstr->trans != NULL, 0))
        ch = pstr->trans[ch];
-      if (islower (ch))
-       pstr->mbs[char_idx] = toupper (ch);
-      else
-       pstr->mbs[char_idx] = ch;
+      pstr->mbs[char_idx] = toupper (ch);
     }
   pstr->valid_len = char_idx;
   pstr->valid_raw_len = char_idx;

Reply via email to