Author: jilles
Date: Sat May  7 14:32:16 2011
New Revision: 221602
URL: http://svn.freebsd.org/changeset/base/221602

Log:
  sh: Add UTF-8 support to ${#var}.
  
  If the current locale uses UTF-8, ${#var} counts codepoints (more precisely,
  bytes b with (b & 0xc0) != 0x80).

Added:
  head/tools/regression/bin/sh/expansion/length7.0   (contents, props changed)
  head/tools/regression/bin/sh/expansion/length8.0   (contents, props changed)
Modified:
  head/bin/sh/expand.c

Modified: head/bin/sh/expand.c
==============================================================================
--- head/bin/sh/expand.c        Sat May  7 13:57:30 2011        (r221601)
+++ head/bin/sh/expand.c        Sat May  7 14:32:16 2011        (r221602)
@@ -665,6 +665,7 @@ evalvar(char *p, int flag)
        int special;
        int startloc;
        int varlen;
+       int varlenb;
        int easy;
        int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR);
 
@@ -712,8 +713,15 @@ again: /* jump here after setting a vari
                if (special) {
                        varvalue(var, varflags & VSQUOTE, subtype, flag);
                        if (subtype == VSLENGTH) {
-                               varlen = expdest - stackblock() - startloc;
-                               STADJUST(-varlen, expdest);
+                               varlenb = expdest - stackblock() - startloc;
+                               varlen = varlenb;
+                               if (localeisutf8) {
+                                       val = stackblock() + startloc;
+                                       for (;val != expdest; val++)
+                                               if ((*val & 0xC0) == 0x80)
+                                                       varlen--;
+                               }
+                               STADJUST(-varlenb, expdest);
                        }
                } else {
                        char const *syntax = (varflags & VSQUOTE) ? DQSYNTAX
@@ -721,7 +729,9 @@ again: /* jump here after setting a vari
 
                        if (subtype == VSLENGTH) {
                                for (;*val; val++)
-                                       varlen++;
+                                       if (!localeisutf8 ||
+                                           (*val & 0xC0) != 0x80)
+                                               varlen++;
                        }
                        else {
                                if (quotes)

Added: head/tools/regression/bin/sh/expansion/length7.0
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/tools/regression/bin/sh/expansion/length7.0    Sat May  7 14:32:16 
2011        (r221602)
@@ -0,0 +1,14 @@
+# $FreeBSD$
+
+unset LC_ALL
+LC_CTYPE=en_US.UTF-8
+export LC_CTYPE
+
+# a umlaut
+s=$(printf '\303\244')
+# euro sign
+s=$s$(printf '\342\202\254')
+# some sort of 't' outside BMP
+s=$s$(printf '\360\235\225\245')
+set -- "$s"
+[ ${#s} = 3 ] && [ ${#1} = 3 ]

Added: head/tools/regression/bin/sh/expansion/length8.0
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/tools/regression/bin/sh/expansion/length8.0    Sat May  7 14:32:16 
2011        (r221602)
@@ -0,0 +1,14 @@
+# $FreeBSD$
+
+unset LC_ALL
+LC_CTYPE=en_US.ISO8859-1
+export LC_CTYPE
+
+# a umlaut
+s=$(printf '\303\244')
+# euro sign
+s=$s$(printf '\342\202\254')
+# some sort of 't' outside BMP
+s=$s$(printf '\360\235\225\245')
+set -- "$s"
+[ ${#s} = 9 ] && [ ${#1} = 9 ]
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "[email protected]"

Reply via email to