From 08d75a0e6692438d588306c516c80f42de9737d1 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Sun, 2 Mar 2014 09:11:07 +0900 Subject: [PATCH] fix mismatch between dfa and regex for of treatment of titlecase --- src/dfa.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/src/dfa.c b/src/dfa.c index 80bb807..f7f526c 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -90,6 +90,8 @@ /* Sets of unsigned characters are stored as bit vectors in arrays of ints. */ typedef unsigned int charclass[CHARCLASS_INTS]; +static const char * const wctname[] = { "tolower", "toupper", "totitle", NULL }; + /* Convert a possibly-signed character to an unsigned character. This is a bit safer than casting to unsigned char, since it catches some type errors that the cast doesn't. */ @@ -725,8 +727,17 @@ setbit_case_fold_c (int b, charclass c) return; if (case_fold) { - setbit_wc (towlower (wc), c); - setbit_wc (towupper (wc), c); + unsigned int i; + for (i = 0; wctname[i]; i++) + { + wctrans_t wct = wctrans (wctname[i]); + if (wct) + { + wint_t folded = towctrans (wc, wct); + if (folded != wc) + setbit_wc (folded, c); + } + } } } else @@ -1759,17 +1770,19 @@ atom (void) addtok_wc (wctok); if (case_fold) { - wint_t folded = towlower (wctok); - if (folded != wctok) + unsigned int i; + for (i = 0; wctname[i]; i++) { - addtok_wc (folded); - addtok (OR); - } - folded = towupper (wctok); - if (folded != wctok) - { - addtok_wc (folded); - addtok (OR); + wctrans_t wct = wctrans (wctname[i]); + if (wct) + { + wint_t folded = towctrans (wctok, wct); + if (folded != wctok) + { + addtok_wc (folded); + addtok (OR); + } + } } } -- 1.8.5.2