Index: grep/tests/foad1.sh
diff -u grep/tests/foad1.sh:1.6 grep/tests/foad1.sh:1.7
--- grep/tests/foad1.sh:1.6 Thu Apr 28 20:27:11 2005
+++ grep/tests/foad1.sh Fri Apr 29 11:30:09 2005
@@ -19,6 +19,7 @@
OUTPUT=`echo -n "$INPUT" | tr "/" "\n" | "$GREP" "$@" | tr "\n" "/"`
if test "$OUTPUT" != "$EXPECT" || test "$VERBOSE" = "1"; then
echo "Testing: $GREP $@"
+ test "$LC_ALL" != C && test "$LC_ALL" != "" && echo " LC_ALL: \"$LC_ALL\""
echo " input: \"$INPUT\""
echo " output: \"$OUTPUT\""
fi
@@ -37,9 +38,10 @@
grep_test "WordA/wordB/WORDC/" "Word/word/WORD/" "word" -o -i
grep_test "WordA/wordB/WORDC/" "Word/word/WORD/" "WORD" -o -i
-# Should display the line number (-n) or file name (-H) of every match,
-# not just of the first match on each input line.
+# Should display the line number (-n), octet offset (-b), or file name
+# (-H) of every match, not just of the first match on each input line.
grep_test "wA wB/wC/" "1:wA/1:wB/2:wC/" "w." -o -n
+grep_test "XwA YwB/ZwC/" "1:wA/5:wB/9:wC/" "w." -o -b
grep_test "wA wB/" "(standard input):wA/(standard input):wB/" "w." -o -H
# End of a previous match should not match a "start of ..." expression.
@@ -80,12 +82,61 @@
grep_test "LIN7C 55327/" "" -wF -e 5327 -e 5532
-# Test character class erroneously matching a '[' character.
-# If the UTF-8 locale doesn't work, skip this test silently.
-if LC_ALL=cs_CZ.UTF-8 locale -k LC_CTYPE 2>/dev/null |
+u=cs_CZ.UTF-8
+# If the UTF-8 locale doesn't work, skip these tests silently.
+if LC_ALL="$u" locale -k LC_CTYPE 2>/dev/null |
"${GREP}" -q "charmap.*UTF-8"
then
- LC_ALL=cs_CZ.UTF-8 grep_test "[/" "" "[[:alpha:]]" -E
+ # Test character class erroneously matching a '[' character.
+ LC_ALL="$u" grep_test "[/" "" "[[:alpha:]]" -E
+
+ for mode in F G E; do
+ # Hint: pipe the output of these tests in
+ # "| LESS= LESSCHARSET=ascii less".
+ # LETTER N WITH TILDE is U+00F1 and U+00D1.
+ # LETTER Y WITH DIAERESIS is U+00FF and U+0178.
+ LC_ALL="$u" grep_test 'añÿb/AÑŸB/' 'ñÿ/ÑŸ/' 'ñÿ' -o -i -$mode
+ LC_ALL="$u" grep_test 'añÿb/AÑŸB/' 'ñÿ/ÑŸ/' 'ÑŸ' -o -i -$mode
+ LC_ALL="$u" grep_test 'añÿb/AÑŸB/'
"a${CB}ñÿ${CE}b/A${CB}ÑŸ${CE}B/" 'ñÿ' --color=always -i -$mode
+ LC_ALL="$u" grep_test 'añÿb/AÑŸB/'
"a${CB}ñÿ${CE}b/A${CB}ÑŸ${CE}B/" 'ÑŸ' --color=always -i -$mode
+
+ # POSIX (about -i): ... each character in the string is matched
+ # against the pattern, not only the character, but also its case
+ # counterpart (if any), shall be matched.
+ # The following were chosen because of their trickiness due to the
+ # differing UTF-8 octet length of their counterpart and to the
+ # non-reflexivity of their mapping.
+ # Beware of homographs! Look carefully at the actual octets.
+
+ # lc(U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE) = U+0069 LATIN SMALL
LETTER I
+ LC_ALL="$u" grep_test 'aİb/' "a${CB}İ${CE}b/" 'i' --color=always -i
-$mode
+ LC_ALL="$u" grep_test 'aib/' '' 'İ' --color=always -i -$mode
+ LC_ALL="$u" grep_test 'aİb/' '' 'I' --color=always -i -$mode
+ # uc(U+0131 LATIN SMALL LETTER DOTLESS I) = U+0049 LATIN CAPITAL
LETTER I
+ LC_ALL="$u" grep_test 'aıb/' "a${CB}ı${CE}b/" 'I' --color=always -i
-$mode
+ LC_ALL="$u" grep_test 'aIb/' '' 'ı' --color=always -i -$mode
+ LC_ALL="$u" grep_test 'aıb/' '' 'i' --color=always -i -$mode
+ # uc(U+017F LATIN SMALL LETTER LONG S) = U+0053 LATIN CAPITAL
LETTER S
+ LC_ALL="$u" grep_test 'aſb/' "a${CB}ſ${CE}b/" 'S' --color=always -i
-$mode
+ LC_ALL="$u" grep_test 'aSb/' '' 'ſ' --color=always -i -$mode
+ LC_ALL="$u" grep_test 'aſb/' '' 's' --color=always -i -$mode
+ # uc(U+1FBE GREEK PROSGEGRAMMENI) = U+0399 GREEK CAPITAL
LETTER IOTA
+ LC_ALL="$u" grep_test 'aιb/' "a${CB}ι${CE}b/" 'Ι' --color=always -i
-$mode
+ LC_ALL="$u" grep_test 'aΙb/' '' 'ι' --color=always -i
-$mode
+ LC_ALL="$u" grep_test 'aιb/' '' 'ι' --color=always -i
-$mode
+ # lc(U+2126 OHM SIGN) = U+03C9 GREEK SMALL
LETTER OMEGA
+ LC_ALL="$u" grep_test 'aΩb/' "a${CB}Ω${CE}b/" 'ω' --color=always -i
-$mode
+ LC_ALL="$u" grep_test 'aωb/' '' 'Ω' --color=always -i
-$mode
+ LC_ALL="$u" grep_test 'aΩb/' '' 'Ω' --color=always -i
-$mode
+ # lc(U+212A KELVIN SIGN) = U+006B LATIN SMALL
LETTER K
+ LC_ALL="$u" grep_test 'aKb/' "a${CB}K${CE}b/" 'k' --color=always -i
-$mode
+ LC_ALL="$u" grep_test 'akb/' '' 'K' --color=always -i
-$mode
+ LC_ALL="$u" grep_test 'aKb/' '' 'K' --color=always -i
-$mode
+ # lc(U+212B ANGSTROM SIGN) = U+00E5 LATIN SMALL
LETTER A WITH RING ABOVE
+ LC_ALL="$u" grep_test 'aÅb/' "a${CB}Å${CE}b/" 'å' --color=always -i
-$mode
+ LC_ALL="$u" grep_test 'aåb/' '' 'Å' --color=always -i
-$mode
+ LC_ALL="$u" grep_test 'aÅb/' '' 'Å' --color=always -i
-$mode
+ done
fi