Reported by Jaroslav Škarvada <[email protected]>. This is unfortunate. grep needs an automatic testcase generator.
* NEWS: Document bug. * THANKS: Mention reporter. * src/dfa.c (set_bit_casefold): Change type of first argument for self-documentation. (parse_bracket_exp): Fix call. * tests/Makefile.am: Add new testcase. * tests/char-class-multibyte: New testcase. --- NEWS | 4 ++++ THANKS | 1 + src/dfa.c | 4 ++-- tests/Makefile.am | 1 + tests/char-class-multibyte | 23 +++++++++++++++++++++++ 5 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 tests/char-class-multibyte diff --git a/NEWS b/NEWS index 437a93a..9980df9 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,10 @@ GNU grep NEWS -*- outline -*- * Noteworthy changes in release ?.? (????-??-??) [?] +** Bug fixes + + Character classes could cause a segmentation fault if they included a + multibyte character. This is a regression from 2.5.4. * Noteworthy changes in release 2.6 (2010-03-23) [stable] diff --git a/THANKS b/THANKS index 6812dde..d1d1ad4 100644 --- a/THANKS +++ b/THANKS @@ -33,6 +33,7 @@ Harald Hanche-Olsen <[email protected]> Hans-Bernhard Broeker <[email protected]> Heikki Korpela <[email protected]> Isamu Hasegawa <[email protected]> +Jaroslav Škarvada <[email protected]> Jeff Bailey <[email protected]> Jim Hand <[email protected]> Jim Meyering <[email protected]> diff --git a/src/dfa.c b/src/dfa.c index cb45193..a0d9410 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -243,7 +243,7 @@ dfasyntax (reg_syntax_t bits, int fold, unsigned char eol) For MB_CUR_MAX > 1, one or both of the two cases may not be set, so the resulting charset may only be used as an optimization. */ static void -setbit_case_fold (unsigned int b, charclass c) +setbit_case_fold (wint_t b, charclass c) { if (case_fold) { @@ -691,7 +691,7 @@ parse_bracket_exp (void) continue; } - setbit_case_fold (c, ccl); + setbit_case_fold (wc, ccl); #ifdef MBS_SUPPORT /* Build normal characters. */ if (MB_CUR_MAX > 1) diff --git a/tests/Makefile.am b/tests/Makefile.am index 67763b2..02db64c 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -22,6 +22,7 @@ TESTS = \ case-fold-char-class \ case-fold-char-range \ case-fold-char-type \ + char-class-multibyte \ dfaexec-multibyte \ empty.sh \ ere.sh \ diff --git a/tests/char-class-multibyte b/tests/char-class-multibyte new file mode 100644 index 0000000..d77c6de --- /dev/null +++ b/tests/char-class-multibyte @@ -0,0 +1,23 @@ +#!/bin/sh +# This would segfault for grep-2.6 +: ${srcdir=.} +. "$srcdir/init.sh"; path_prepend_ ../src + +printf 'É\n' > exp1 || framework_failure +fail=0 + +for LOC in en_US.UTF-8 $LOCALE_FR_UTF8; do + out=out1-$LOC + printf 'á\nç\nÉ\n' | LC_ALL=$LOC grep '[é]' > $out || fail=1 + compare $out exp1 || fail=1 +done + +printf 'é\n' > exp2 || framework_failure + +for LOC in en_US.UTF-8 $LOCALE_FR_UTF8; do + out=out2-$LOC + printf 'á\nç\né\n' | LC_ALL=$LOC grep '[É]' > $out || fail=1 + compare $out exp2 || fail=1 +done + +Exit $fail -- 1.6.6.1
