Hi,

a new version of my patch wich also makes the SHIFT-JIS character
0x81 0x7C work (yes 0x72 is the pipe symbol).  Beside this I
change the mbchar() cpp macro to be able to use the printf builtin.
The problem was that the former version of mbchar() uses mbtowc()
even for real ASCII characters which shifts the backslash (0x5C)
to the latin1 Yen symbol (0xA5).  The patch also includes a test
case for Japanese SHIFT-JIS characters which include an ASCII
character as second byte.

       Werner

-- 
 Dr. Werner Fink <[EMAIL PROTECTED]>
 SuSE LINUX Products GmbH,  Maxfeldstrasse 5,  Nuernberg,  Germany
 GF: Markus Rex,  HRB 16746 (AG Nuernberg)
 phone: +49-911-740-53-0,  fax: +49-911-3206727,  www.opensuse.org
------------------------------------------------------------------
  "Having a smoking section in a restaurant is like having
          a peeing section in a swimming pool." -- Edward Burr
--- src/cmd/ksh93/sh/macro.c
+++ src/cmd/ksh93/sh/macro.c    2007-11-20 12:09:31.000000000 +0000
@@ -1860,13 +1860,20 @@ static void comsubst(Mac_t *mp,int type)
        return;
 }
 
+#if SHOPT_MULTIBYTE
+#define ismbstate(c,s) ((state[*(unsigned 
char*)(c)]==(s))&&(!mbwide()||(mbsize(c)<2)))
+#else
+#define ismbstate(c,s) (state[*(unsigned char*)(c)]==(s))
+#endif
+
+
 /*
  * copy <str> onto the stack
  */
 static void mac_copy(register Mac_t *mp,register const char *str, register int 
size)
 {
        register char           *state;
-       register const char     *cp=str;
+       register const char     *cp=str, *sp;
        register int            c,n,nopat;
        nopat = (mp->quote||mp->assign==1||mp->arith);
        if(mp->zeros)
@@ -1885,14 +1892,25 @@ static void mac_copy(register Mac_t *mp,
                /* insert \ before file expansion characters */
                while(size-->0)
                {
+                       sp = cp;
                        c = state[n= *(unsigned char*)cp++];
+#if SHOPT_MULTIBYTE
+                       if(mbwide())
+                       {       ssize_t len = mbsize(sp);
+                               if (len-- > 1)
+                               {       cp += len;
+                                       size -= len;
+                                       continue;
+                               }
+                       }
+#endif /* SHOPT_MULTIBYTE */
                        if(nopat&&(c==S_PAT||c==S_ESC||c==S_BRACT||c==S_ENDCH) 
&& mp->pattern!=3)
                                c=1;
                        else if(mp->pattern==4 && 
(c==S_ESC||c==S_BRACT||c==S_ENDCH || isastchar(n)))
                                c=1;
                        else if(mp->pattern==2 && c==S_SLASH)
                                c=1;
-                       else if(mp->pattern==3 && c==S_ESC && (state[*(unsigned 
char*)cp]==S_DIG||(*cp==ESCAPE)))
+                       else if(mp->pattern==3 && c==S_ESC && 
(ismbstate(cp,S_DIG)||(*cp==ESCAPE)))
                        {
                                if(!(c=mp->quote))
                                        cp++;
@@ -1916,14 +1934,14 @@ static void mac_copy(register Mac_t *mp,
                state = sh.ifstable;
                if(mp->pattern)
                {
-                       char *sp = "&|()";
-                       while(c = *sp++)
+                       sp = "&|()";
+                       while((c = *sp++))
                        {
                                if(state[c]==0)
                                        state[c] = S_EPAT;
                        }
                        sp = "*?[{";
-                       while(c = *sp++)
+                       while((c = *sp++))
                        {
                                if(state[c]==0)
                                        state[c] = S_PAT;
@@ -1933,7 +1951,20 @@ static void mac_copy(register Mac_t *mp,
                }
                while(size-->0)
                {
-                       if((n=state[c= *(unsigned char*)cp++])==S_ESC || 
n==S_EPAT)
+                       sp = cp;
+                       n = state[c= *(unsigned char*)cp++];
+#if SHOPT_MULTIBYTE
+                       if(mbwide() && (n!=S_MBYTE))
+                       {       ssize_t len = mbsize(sp);
+                               if (len-- > 1)
+                               {       cp += len;
+                                       size -= len;
+                                       stakwrite(sp, len+1);
+                                       continue;
+                               }
+                       }
+#endif /* SHOPT_MULTIBYTE */
+                       if(n==S_ESC || n==S_EPAT)
                        {
                                /* don't allow extended patterns in this case */
                                mp->patfound = mp->pattern;
--- src/cmd/ksh93/tests/sjis.sh
+++ src/cmd/ksh93/tests/sjis.sh 2007-11-20 13:07:57.000000000 +0000
@@ -0,0 +1,62 @@
+########################################################################
+#                                                                      #
+#   Copyright (c) 2007 SuSE Linux Products GmbH, Nuernberg, Germany    #
+#                                                                      #
+#   This library is free software; you can redistribute it and/or      #
+#   modify it under the terms of the GNU Lesser General Public         #
+#   License as published by the Free Software Foundation;              #
+#   version 2.1 of the License.                                        #
+#                                                                      #
+#   This library is distributed in the hope that it will be useful,    #
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of     #
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the      #
+#   GNU Lesser General Public License at                               #
+#   http://www.gnu.org/licenses/lgpl-2.1.html for more details         #
+#                                                                      #
+#   Author: Werner Fink <[EMAIL PROTECTED]>                               #
+#                                                                      #
+########################################################################
+
+#
+# Byte ranges for Shift-JIS encoding (hexadecimal):
+# First byte:   81-9F, E0-EF
+# Second byte:  40-7E, 80-FC
+#
+# Now test out some multi byte characters which
+# include 7bit aka ASCII bytes with 0x81 0x{40-7E}
+#
+
+typeset -i chr=0
+typeset -i err=0
+typeset printf=$(type -p printf 2>/dev/null)
+
+unset LC_ALL
+unset LC_CTYPE
+export LANG=ja_JP.SJIS
+
+for second in $(seq 64 126); do
+    : $((chr++))
+    second=$(printf '%x' ${second})
+    mbchar="$(printf "\x81\x${second}")"
+    if test -z "${mbchar}" ; then
+       : $((err++))            # ERROR in builtin printf
+       continue
+    fi
+    if test -x "${printf}" ; then
+       if test $(${printf} "\x81\x${second}") != ${mbchar} ; then
+           : $((err++))        # ERROR in builtin printf
+           continue
+       fi
+    fi
+    uq=$(echo ${mbchar})
+    dq=$(echo "${mbchar}")
+    test "$uq" != "$dq" && let err+=1
+done
+
+if test $err -ne 0 ; then
+    : err_exit
+    : err_exit
+    print -u2 -n "\t"
+    print -u2 -r ${0##*/}[$LINENO]: "Shift-JIS encoding failed"
+fi
+exit $err
--- src/lib/libast/include/ast.h
+++ src/lib/libast/include/ast.h        2007-11-20 11:55:01.000000000 +0000
@@ -176,13 +176,16 @@ typedef struct
 #define mbcoll()       (ast.mb_xfrm!=0)
 #define mbwide()       (mbmax()>1)
 
-#define mbchar(p)      
(mbwide()?((ast.tmp_int=(*ast.mb_towc)(&ast.tmp_wchar,(char*)(p),mbmax()))>0?((p+=ast.tmp_int),ast.tmp_wchar):(p++,ast.tmp_int)):(*(unsigned
 char*)(p++)))
 #define mbinit()       
(mbwide()?(*ast.mb_towc)((wchar_t*)0,(char*)0,mbmax()):0)
 #define mbsize(p)      (mbwide()?(*ast.mb_len)((char*)(p),mbmax()):((p),1))
 #define mbconv(s,w)    (ast.mb_conv?(*ast.mb_conv)(s,w):((*(s)=(w)),1))
 #define mbwidth(w)     
(ast.mb_width&&((ast.tmp_int=(*ast.mb_width)(w))>=0||(w)>UCHAR_MAX)?ast.tmp_int:1)
 #define mbxfrm(t,f,n)  (mbcoll()?(*ast.mb_xfrm)((char*)(t),(char*)(f),n):0)
-
+#define mbchar(p)      ((mbsize(p)>1) ? \
+                               
(((ast.tmp_int=(*ast.mb_towc)(&ast.tmp_wchar,(char*)(p),mbmax()))>0) ? \
+                                       ((p+=ast.tmp_int),ast.tmp_wchar) : \
+                                       (*(unsigned char*)(p++))) : \
+                               (*(unsigned char*)(p++)))
 /*
  * common macros
  */
_______________________________________________
ast-developers mailing list
[email protected]
https://mailman.research.att.com/mailman/listinfo/ast-developers

Reply via email to