On Mon, May 23, 2011 at 04:04:47PM +0200, Werner Fink wrote:
>
> At last but not least my Shift-IJS test script (see attachment) fails on all
> platforms with
>
> test sjis begins at 2011-05-20+15:42:26
> \x81\x5c failed with 201\intf} != 201\
> \x81\x7c failed with 201\| != 201|
> sjis.sh[65]: Shift-JIS encoding failed
> test sjis failed at 2011-05-20+15:42:26 with exit code 2 [ 2 tests 2 errors
> ]
> test sjis(C.UTF-8) begins at 2011-05-20+15:42:26
> \x81\x5c failed with 201\intf} != 201\
> \x81\x7c failed with 201\| != 201|
> sjis.sh[65]: Shift-JIS encoding failed
> test sjis(C.UTF-8) failed at 2011-05-20+15:42:26 with exit code 2 [ 2 tests
> 2 errors ]
> test sjis(shcomp) begins at 2011-05-20+15:42:26
> \x81\x5c failed with 201\intf} != 201\
> \x81\x7c failed with 201\| != 201|
> shcomp-sjis.ksh[65]: Shift-JIS encoding failed
> test sjis(shcomp) failed at 2011-05-20+15:42:26 with exit code 2 [ 2 tests
> 2 errors ]
>
> which seems a parser problem as the output of the program
> /usr/bin/printf is broken if and only if it is used in the
> way I've done this in my sjis.sh scriptlet:
>
> typeset printf=$(type -p printf 2>/dev/null)
> second=$(printf '%x' 92)
> export LANG=ja_JP.SJIS
> echo $(${printf} "\x81\x${second}")
With the patch in the attachment I'm avoid that multi byte characters
at the end of output of a command will be split into bytes. This
enables the subsequent parser of the ksh to detect the multi byte
characters even if those multi byte characters includes normal ASCII
bytes like the backslash (0x5c) or the pipe (0x7c) symbol as used
in the Shift-JIS encoding.
I've tried to use the macros of the libast instead of the wctomb()
respectively mbtowc().
Werner
--
"Having a smoking section in a restaurant is like having
a peeing section in a swimming pool." -- Edward Burr
--- src/cmd/ksh93/sh/macro.c
+++ src/cmd/ksh93/sh/macro.c 2011-05-27 13:08:05.000000000 +0200
@@ -505,7 +505,7 @@ static void copyto(register Mac_t *mp,in
int i;
unsigned char mb[8];
- n = wctomb((char*)mb, c);
+ n = mbconv((char*)mb, c);
for(i=0;i<n;i++)
sfputc(stkp,mb[i]);
}
@@ -2006,6 +2006,9 @@ static void comsubst(Mac_t *mp,register
struct _mac_ savemac;
int savtop = stktell(stkp);
char lastc, *savptr = stkfreeze(stkp,0);
+#if SHOPT_MULTIBYTE
+ wchar_t lastw;
+#endif
int was_history = sh_isstate(SH_HISTORY);
int was_verbose = sh_isstate(SH_VERBOSE);
int was_interactive = sh_isstate(SH_INTERACTIVE);
@@ -2129,6 +2132,9 @@ static void comsubst(Mac_t *mp,register
stkset(stkp,savptr,savtop);
newlines = 0;
lastc = 0;
+#if SHOPT_MULTIBYTE
+ lastw = 0;
+#endif
sfsetbuf(sp,(void*)sp,0);
bufsize = sfvalue(sp);
/* read command substitution output and put on stack or here-doc */
@@ -2179,6 +2185,17 @@ static void comsubst(Mac_t *mp,register
}
else if(lastc)
{
+#if SHOPT_MULTIBYTE
+ if(lastw)
+ {
+ int n;
+ char mb[8];
+ n = mbconv(mb, lastw);
+ mac_copy(mp,mb,n);
+ lastw = 0;
+ }
+ else
+#endif
mac_copy(mp,&lastc,1);
lastc = 0;
}
@@ -2187,8 +2204,22 @@ static void comsubst(Mac_t *mp,register
str[c] = 0;
else
{
+ ssize_t len = 1;
+
/* can't write past buffer so save last character */
- lastc = str[--c];
+#if SHOPT_MULTIBYTE
+ if ((len = mbsize(str))>1)
+ {
+ len = mb2wc(lastw,str,len);
+ if (len < 0)
+ {
+ lastw = 0;
+ len = 1;
+ }
+ }
+#endif
+ c -= len;
+ lastc = str[c];
str[c] = 0;
}
mac_copy(mp,str,c);
@@ -2206,7 +2237,21 @@ static void comsubst(Mac_t *mp,register
sfnputc(stkp,'\n',newlines);
}
if(lastc)
+ {
+#if SHOPT_MULTIBYTE
+ if(lastw)
+ {
+ int n;
+ char mb[8];
+ n = mbconv(mb, lastw);
+ mac_copy(mp,mb,n);
+ lastw = 0;
+ }
+ else
+#endif
mac_copy(mp,&lastc,1);
+ lastc = 0;
+ }
sfclose(sp);
return;
}
_______________________________________________
ast-developers mailing list
[email protected]
https://mailman.research.att.com/mailman/listinfo/ast-developers