[I've reopened 16911 since the bug's not fixed on OS X.]
Here's my guess. In glibc's en_US locale, 'Lj' is considered to be both
uppercase and lowercase; but in OS X's en_US locale, it's considered to
be neither uppercase nor lowercase. If so, the attached gnulib patch
should fix the problem (though I can't easily test this). Could you
please give it a try?
By the way, I'd like to remove the need for grep's local differences
from the glibc regex code. I assume it's there only to pacify GCC's
warnings flags, and we can do that with pragmas in gnulib. One fix at a
time, though.
diff --git a/lib/regcomp.c b/lib/regcomp.c
index 56faf11..664a0fe 100644
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -297,7 +297,10 @@ re_set_fastmap (char *fastmap, bool icase, int ch)
{
fastmap[ch] = 1;
if (icase)
- fastmap[tolower (ch)] = 1;
+ {
+ fastmap[tolower (ch)] = 1;
+ fastmap[toupper (ch)] = 1;
+ }
}
/* Helper function for re_compile_fastmap.
@@ -334,10 +337,15 @@ re_compile_fastmap_iter (regex_t *bufp, const
re_dfastate_t *init_state,
*p++ = dfa->nodes[node].opr.c;
memset (&state, '\0', sizeof (state));
if (__mbrtowc (&wc, (const char *) buf, p - buf,
- &state) == p - buf
- && (__wcrtomb ((char *) buf, towlower (wc), &state)
- != (size_t) -1))
- re_set_fastmap (fastmap, false, buf[0]);
+ &state) == p - buf)
+ {
+ if (__wcrtomb ((char *) buf, towlower (wc), &state)
+ != (size_t) -1)
+ re_set_fastmap (fastmap, false, buf[0]);
+ if (__wcrtomb ((char *) buf, towupper (wc), &state)
+ != (size_t) -1)
+ re_set_fastmap (fastmap, false, buf[0]);
+ }
}
#endif
}
@@ -414,6 +422,9 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t
*init_state,
if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
!= (size_t) -1)
re_set_fastmap (fastmap, false, *(unsigned char *) buf);
+ if (__wcrtomb (buf, towupper (cset->mbchars[i]), &state)
+ != (size_t) -1)
+ re_set_fastmap (fastmap, false, *(unsigned char *) buf);
}
}
}
diff --git a/lib/regex_internal.c b/lib/regex_internal.c
index 0343ee6..79181a3 100644
--- a/lib/regex_internal.c
+++ b/lib/regex_internal.c
@@ -311,12 +311,11 @@ build_wcs_upper_buffer (re_string_t *pstr)
+ byte_idx), remain_len, &pstr->cur_state);
if (BE (mbclen < (size_t) -2, 1))
{
- wchar_t wcu = wc;
- if (iswlower (wc))
+ wchar_t wcu = towupper (wc);
+ if (wcu != wc)
{
size_t mbcdlen;
- wcu = towupper (wc);
mbcdlen = wcrtomb (buf, wcu, &prev_st);
if (BE (mbclen == mbcdlen, 1))
memcpy (pstr->mbs + byte_idx, buf, mbclen);
@@ -381,12 +380,11 @@ build_wcs_upper_buffer (re_string_t *pstr)
mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
if (BE (mbclen < (size_t) -2, 1))
{
- wchar_t wcu = wc;
- if (iswlower (wc))
+ wchar_t wcu = towupper (wc);
+ if (wcu != wc)
{
size_t mbcdlen;
- wcu = towupper (wc);
mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
if (BE (mbclen == mbcdlen, 1))
memcpy (pstr->mbs + byte_idx, buf, mbclen);
@@ -538,10 +536,7 @@ build_upper_buffer (re_string_t *pstr)
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
if (BE (pstr->trans != NULL, 0))
ch = pstr->trans[ch];
- if (islower (ch))
- pstr->mbs[char_idx] = toupper (ch);
- else
- pstr->mbs[char_idx] = ch;
+ pstr->mbs[char_idx] = toupper (ch);
}
pstr->valid_len = char_idx;
pstr->valid_raw_len = char_idx;