Hello All. Here is my 2nd try at RRI. The 3 patches are for dfa.c, grep.texi, and gnulib/reg*.c.
Paolo - I still think that when compiling the dfa my change is correct, since the regex routines were called only for checking if the range is valid. Thanks, Arnold --------------- >From 5d4a1e56345e14c19224b47424a01b51120cf234 Mon Sep 17 00:00:00 2001 From: Arnold D. Robbins <[email protected]> Date: Mon, 16 Jan 2012 22:04:11 +0200 Subject: [PATCH 1/2] Rational Range Interpretation implemented. * dfa.c (hard_LC_COLLATE): Removed. (parse_bracket_exp): Compare lower and upper range bounds directly. (dfaparse): Don't set hard_LC_COLLATE. (match_mb_charset): Test wide character directly instead of using wcscoll. --- src/dfa.c | 41 ++++++----------------------------------- 1 files changed, 6 insertions(+), 35 deletions(-) diff --git a/src/dfa.c b/src/dfa.c index 6ab0ab4..edc6bd9 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -29,6 +29,7 @@ #include <limits.h> #include <string.h> #include <locale.h> +#include <stdbool.h> #define STREQ(a, b) (strcmp (a, b) == 0) @@ -46,7 +47,7 @@ #include "gettext.h" #define _(str) gettext (str) -#include "mbsupport.h" /* defines MBS_SUPPORT if appropriate */ +#include "mbsupport.h" /* defines MBS_SUPPORT to 1 or 0, as appropriate */ #include <wchar.h> #include <wctype.h> @@ -56,7 +57,6 @@ #include "regex.h" #include "dfa.h" -#include "hard-locale.h" #include "xalloc.h" /* HPUX, define those as macros in sys/param.h */ @@ -657,7 +657,6 @@ static int laststart; /* True if we're separated from beginning or (, | only by zero-width characters. */ static int parens; /* Count of outstanding left parens. */ static int minrep, maxrep; /* Repeat counts for {m,n}. */ -static int hard_LC_COLLATE; /* Nonzero if LC_COLLATE is hard. */ static int cur_mb_len = 1; /* Length of the multibyte representation of wctok. */ @@ -992,26 +991,8 @@ parse_bracket_exp (void) c1 = tolower (c1); c2 = tolower (c2); } - if (!hard_LC_COLLATE) - for (c = c1; c <= c2; c++) - setbit_case_fold_c (c, ccl); - else - { - /* Defer to the system regex library about the meaning - of range expressions. */ - regex_t re; - char pattern[6] = { '[', c1, '-', c2, ']', 0 }; - char subject[2] = { 0, 0 }; - regcomp (&re, pattern, REG_NOSUB); - for (c = 0; c < NOTCHAR; ++c) - { - subject[0] = c; - if (!(case_fold && isupper (c)) - && regexec (&re, subject, 0, NULL, 0) != REG_NOMATCH) - setbit_case_fold_c (c, ccl); - } - regfree (&re); - } + for (c = c1; c <= c2; c++) + setbit_case_fold_c (c, ccl); } colon_warning_state |= 8; @@ -1792,9 +1773,6 @@ dfaparse (char const *s, size_t len, struct dfa *d) lasttok = END; laststart = 1; parens = 0; -#ifdef LC_COLLATE - hard_LC_COLLATE = hard_locale (LC_COLLATE); -#endif if (MB_CUR_MAX > 1) { cur_mb_len = 0; @@ -2884,7 +2862,6 @@ match_mb_charset (struct dfa *d, int s, position pos, int idx) with which this operator match. */ int op_len; /* Length of the operator. */ char buffer[128]; - wchar_t wcbuf[6]; /* Pointer to the structure to which we are currently refering. */ struct mb_char_classes *work_mbc; @@ -2961,17 +2938,11 @@ match_mb_charset (struct dfa *d, int s, position pos, int idx) } } - wcbuf[0] = wc; - wcbuf[1] = wcbuf[3] = wcbuf[5] = '\0'; - /* match with a range? */ for (i = 0; i<work_mbc->nranges; i++) { - wcbuf[2] = work_mbc->range_sts[i]; - wcbuf[4] = work_mbc->range_ends[i]; - - if (wcscoll(wcbuf, wcbuf+2) >= 0 && - wcscoll(wcbuf+4, wcbuf) >= 0) + if (work_mbc->range_sts[i] <= wc && + wc <= work_mbc->range_ends[i]) goto charset_matched; } -- 1.7.1
