From 2df2a752c8083c3e6046efa8a87171fb78bdf0fe Mon Sep 17 00:00:00 2001
From: Jim Meyering <meyering@fb.com>
Date: Mon, 14 Oct 2013 11:22:22 -0700
Subject: [PATCH 4/4] tests: extend the multibyte-white-space test

* tests/multibyte-white-space (utf8_space_characters): Add more
single-byte whitespace characters.  Align RHS hex values and
make the sed substitution less rigid, to accommodate.
Also, ensure that grep '\S' exits with status 1.
---
 tests/multibyte-white-space | 55 +++++++++++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/tests/multibyte-white-space b/tests/multibyte-white-space
index df2fe1b..07ed085 100755
--- a/tests/multibyte-white-space
+++ b/tests/multibyte-white-space
@@ -14,38 +14,55 @@ require_en_utf8_locale_
 LC_ALL=en_US.UTF-8
 export LC_ALL

+# It would have been nice to be able to use all UTF8 characters
+# with the Unicode WSpace=Y character property,
+# https://en.wikipedia.org/wiki/Whitespace_character, but that
+# would currently cause distracting failures everywhere I've tried.
+
 # FIXME: including any the following in the list below would
 # make this test fail on Fedora 19/glibc-2.17-18.fc19.
 # Restore them to the list once it is fixed.
 these_fail_with_glibc='
-U+00A0 NO-BREAK SPACE: c2 a0
-U+2007 FIGURE SPACE: e2 80 87
-U+200B ZERO WIDTH SPACE: e2 80 8b
-U+202F NARROW NO-BREAK SPACE: e2 80 af
+U+00A0 NO-BREAK SPACE:            c2 a0
+U+2007 FIGURE SPACE:              e2 80 87
+U+200B ZERO WIDTH SPACE:          e2 80 8b
+U+202F NARROW NO-BREAK SPACE:     e2 80 af
+'
+fail_with_other='
+U+000A Line feed:                 0a
+U+0085 Next line:                 85
 '

-utf8_space_characters=$(sed 's/.*://;s/ /\\x/g' <<\EOF
-U+0020 SPACE: 20
-U+1680 OGHAM SPACE MARK: e1 9a 80
-U+2000 EN QUAD: e2 80 80
-U+2001 EM QUAD: e2 80 81
-U+2002 EN SPACE: e2 80 82
-U+2003 EM SPACE: e2 80 83
-U+2004 THREE-PER-EM SPACE: e2 80 84
-U+2005 FOUR-PER-EM SPACE: e2 80 85
-U+2006 SIX-PER-EM SPACE: e2 80 86
-U+2008 PUNCTUATION SPACE: e2 80 88
-U+2009 THIN SPACE: e2 80 89
-U+200A HAIR SPACE: e2 80 8a
+utf8_space_characters=$(sed 's/.*://;s/ *\</\\x/g' <<\EOF
+U+0009 Horizontal Tab:            09
+U+000B Vertical Tab:              0b
+U+000C Form feed:                 0c
+U+000D Carriage return:           0d
+U+0020 SPACE:                     20
+U+1680 OGHAM SPACE MARK:          e1 9a 80
+U+2000 EN QUAD:                   e2 80 80
+U+2001 EM QUAD:                   e2 80 81
+U+2002 EN SPACE:                  e2 80 82
+U+2003 EM SPACE:                  e2 80 83
+U+2004 THREE-PER-EM SPACE:        e2 80 84
+U+2005 FOUR-PER-EM SPACE:         e2 80 85
+U+2006 SIX-PER-EM SPACE:          e2 80 86
+U+2008 PUNCTUATION SPACE:         e2 80 88
+U+2009 THIN SPACE:                e2 80 89
+U+200A HAIR SPACE:                e2 80 8a
 U+205F MEDIUM MATHEMATICAL SPACE: e2 81 9f
-U+3000 IDEOGRAPHIC SPACE: e3 80 80
+U+3000 IDEOGRAPHIC SPACE:         e3 80 80
 EOF
 )

 fail=0

 for i in $utf8_space_characters; do
-  printf "$i\n" | grep -q '^\s$' || { warn_ "$i FAILED"; fail=1; }
+  printf "$i\n" | grep -q '^\s$' \
+      || { warn_ "$i FAILED to match \\s"; fail=1; }
+  printf "$i\n" | grep -q '\S'
+  test $? = 1 \
+      || { warn_ "$i vs. \\S FAILED"; fail=1; }
 done

 Exit $fail
-- 
1.8.4.299.gb3e7d24

