foad1.sh

Charles Levert Sun, 01 May 2005 12:06:52 -0700
Index: grep/tests/foad1.sh
diff -u grep/tests/foad1.sh:1.6 grep/tests/foad1.sh:1.7
--- grep/tests/foad1.sh:1.6     Thu Apr 28 20:27:11 2005
+++ grep/tests/foad1.sh Fri Apr 29 11:30:09 2005
@@ -19,6 +19,7 @@
   OUTPUT=`echo -n "$INPUT" | tr "/" "\n" | "$GREP" "$@" | tr "\n" "/"`
   if test "$OUTPUT" != "$EXPECT" || test "$VERBOSE" = "1"; then
     echo "Testing:  $GREP $@"
+    test "$LC_ALL" != C && test "$LC_ALL" != "" && echo "  LC_ALL: \"$LC_ALL\""
     echo "  input:  \"$INPUT\""
     echo "  output: \"$OUTPUT\""
   fi
@@ -37,9 +38,10 @@
 grep_test "WordA/wordB/WORDC/" "Word/word/WORD/" "word" -o -i
 grep_test "WordA/wordB/WORDC/" "Word/word/WORD/" "WORD" -o -i
 
-# Should display the line number (-n) or file name (-H) of every match,
-# not just of the first match on each input line.
+# Should display the line number (-n), octet offset (-b), or file name
+# (-H) of every match, not just of the first match on each input line.
 grep_test "wA wB/wC/" "1:wA/1:wB/2:wC/" "w." -o -n
+grep_test "XwA YwB/ZwC/" "1:wA/5:wB/9:wC/" "w." -o -b
 grep_test "wA wB/" "(standard input):wA/(standard input):wB/" "w." -o -H
 
 # End of a previous match should not match a "start of ..." expression.
@@ -80,12 +82,61 @@
 grep_test "LIN7C 55327/" "" -wF -e 5327 -e 5532
 
 
-# Test character class erroneously matching a '[' character.
-# If the UTF-8 locale doesn't work, skip this test silently.
-if LC_ALL=cs_CZ.UTF-8 locale -k LC_CTYPE 2>/dev/null |
+u=cs_CZ.UTF-8
+# If the UTF-8 locale doesn't work, skip these tests silently.
+if LC_ALL="$u" locale -k LC_CTYPE 2>/dev/null |
   "${GREP}" -q "charmap.*UTF-8"
 then
-  LC_ALL=cs_CZ.UTF-8 grep_test "[/" "" "[[:alpha:]]" -E
+  # Test character class erroneously matching a '[' character.
+  LC_ALL="$u" grep_test "[/" "" "[[:alpha:]]" -E
+
+  for mode in F G E; do
+    # Hint:  pipe the output of these tests in
+    #        "| LESS= LESSCHARSET=ascii less".
+    # LETTER N WITH TILDE is U+00F1 and U+00D1.
+    # LETTER Y WITH DIAERESIS is U+00FF and U+0178.
+    LC_ALL="$u" grep_test 'añÿb/AÑŸB/' 'ñÿ/ÑŸ/' 'ñÿ' -o -i -$mode
+    LC_ALL="$u" grep_test 'añÿb/AÑŸB/' 'ñÿ/ÑŸ/' 'ÑŸ' -o -i -$mode
+    LC_ALL="$u" grep_test 'añÿb/AÑŸB/' 
"a${CB}ñÿ${CE}b/A${CB}ÑŸ${CE}B/" 'ñÿ' --color=always -i -$mode
+    LC_ALL="$u" grep_test 'añÿb/AÑŸB/' 
"a${CB}ñÿ${CE}b/A${CB}ÑŸ${CE}B/" 'ÑŸ' --color=always -i -$mode
+
+    # POSIX (about -i):  ... each character in the string is matched
+    # against the pattern, not only the character, but also its case
+    # counterpart (if any), shall be matched.
+    # The following were chosen because of their trickiness due to the
+    # differing UTF-8 octet length of their counterpart and to the
+    # non-reflexivity of their mapping.
+    # Beware of homographs!  Look carefully at the actual octets.
+
+    # lc(U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE) = U+0069 LATIN SMALL 
LETTER I
+    LC_ALL="$u" grep_test 'aİb/' "a${CB}İ${CE}b/" 'i' --color=always -i 
-$mode
+    LC_ALL="$u" grep_test 'aib/' ''               'İ' --color=always -i -$mode
+    LC_ALL="$u" grep_test 'aİb/' ''               'I' --color=always -i -$mode
+    # uc(U+0131 LATIN SMALL LETTER DOTLESS I)          = U+0049 LATIN CAPITAL 
LETTER I
+    LC_ALL="$u" grep_test 'aıb/' "a${CB}ı${CE}b/" 'I' --color=always -i 
-$mode
+    LC_ALL="$u" grep_test 'aIb/' ''               'ı' --color=always -i -$mode
+    LC_ALL="$u" grep_test 'aıb/' ''               'i' --color=always -i -$mode
+    # uc(U+017F LATIN SMALL LETTER LONG S)             = U+0053 LATIN CAPITAL 
LETTER S
+    LC_ALL="$u" grep_test 'aſb/' "a${CB}ſ${CE}b/" 'S' --color=always -i 
-$mode
+    LC_ALL="$u" grep_test 'aSb/' ''               'ſ' --color=always -i -$mode
+    LC_ALL="$u" grep_test 'aſb/' ''               's' --color=always -i -$mode
+    # uc(U+1FBE GREEK PROSGEGRAMMENI)                  = U+0399 GREEK CAPITAL 
LETTER IOTA
+    LC_ALL="$u" grep_test 'aιb/' "a${CB}ι${CE}b/" 'Ι' --color=always -i 
-$mode
+    LC_ALL="$u" grep_test 'aΙb/' ''               'ι' --color=always -i 
-$mode
+    LC_ALL="$u" grep_test 'aιb/' ''               'ι' --color=always -i 
-$mode
+    # lc(U+2126 OHM SIGN)                              = U+03C9 GREEK SMALL 
LETTER OMEGA
+    LC_ALL="$u" grep_test 'aΩb/' "a${CB}Ω${CE}b/" 'ω' --color=always -i 
-$mode
+    LC_ALL="$u" grep_test 'aωb/' ''               'Ω' --color=always -i 
-$mode
+    LC_ALL="$u" grep_test 'aΩb/' ''               'Ω' --color=always -i 
-$mode
+    # lc(U+212A KELVIN SIGN)                           = U+006B LATIN SMALL 
LETTER K
+    LC_ALL="$u" grep_test 'aKb/' "a${CB}K${CE}b/" 'k' --color=always -i 
-$mode
+    LC_ALL="$u" grep_test 'akb/' ''               'K' --color=always -i 
-$mode
+    LC_ALL="$u" grep_test 'aKb/' ''               'K' --color=always -i 
-$mode
+    # lc(U+212B ANGSTROM SIGN)                         = U+00E5 LATIN SMALL 
LETTER A WITH RING ABOVE
+    LC_ALL="$u" grep_test 'aÅb/' "a${CB}Å${CE}b/" 'å' --color=always -i 
-$mode
+    LC_ALL="$u" grep_test 'aåb/' ''               'Å' --color=always -i 
-$mode
+    LC_ALL="$u" grep_test 'aÅb/' ''               'Å' --color=always -i 
-$mode
+  done
 fi
Changes to grep/tests/foad1.sh

Reply via email to