On Mon, May 23, 2011 at 04:04:47PM +0200, Werner Fink wrote:
> 
> At last but not least my Shift-IJS test script (see attachment) fails on all
> platforms with
> 
>   test sjis begins at 2011-05-20+15:42:26
>    \x81\x5c failed with 201\intf} != 201\
>    \x81\x7c failed with 201\| != 201|
>        sjis.sh[65]: Shift-JIS encoding failed
>   test sjis failed at 2011-05-20+15:42:26 with exit code 2 [ 2 tests 2 errors 
> ]
>   test sjis(C.UTF-8) begins at 2011-05-20+15:42:26
>    \x81\x5c failed with 201\intf} != 201\
>    \x81\x7c failed with 201\| != 201|
>        sjis.sh[65]: Shift-JIS encoding failed
>   test sjis(C.UTF-8) failed at 2011-05-20+15:42:26 with exit code 2 [ 2 tests 
> 2 errors ]
>   test sjis(shcomp) begins at 2011-05-20+15:42:26
>    \x81\x5c failed with 201\intf} != 201\
>    \x81\x7c failed with 201\| != 201|
>        shcomp-sjis.ksh[65]: Shift-JIS encoding failed
>   test sjis(shcomp) failed at 2011-05-20+15:42:26 with exit code 2 [ 2 tests 
> 2 errors ]
> 
> which seems a parser problem as the output of the program
> /usr/bin/printf is broken if and only if it is used in the
> way I've done this in my sjis.sh scriptlet:
> 
>    typeset printf=$(type -p printf 2>/dev/null)
>    second=$(printf '%x' 92)
>    export LANG=ja_JP.SJIS
>    echo $(${printf} "\x81\x${second}")

With the patch in the attachment I'm avoid that multi byte characters
at the end of output of a command will be split into bytes.  This
enables the subsequent parser of the ksh to detect the multi byte
characters even if those multi byte characters includes normal ASCII
bytes like the backslash (0x5c) or the pipe (0x7c) symbol as used
in the Shift-JIS encoding.

I've tried to use the macros of the libast instead of the wctomb()
respectively mbtowc().


     Werner

-- 
  "Having a smoking section in a restaurant is like having
          a peeing section in a swimming pool." -- Edward Burr
--- src/cmd/ksh93/sh/macro.c
+++ src/cmd/ksh93/sh/macro.c	2011-05-27 13:08:05.000000000 +0200
@@ -505,7 +505,7 @@ static void copyto(register Mac_t *mp,in
 					int		i;
 					unsigned char	mb[8];
 
-					n = wctomb((char*)mb, c);
+					n = mbconv((char*)mb, c);
 					for(i=0;i<n;i++)
 						sfputc(stkp,mb[i]);
 				}
@@ -2006,6 +2006,9 @@ static void comsubst(Mac_t *mp,register
 	struct _mac_		savemac;
 	int			savtop = stktell(stkp);
 	char			lastc, *savptr = stkfreeze(stkp,0);
+#if SHOPT_MULTIBYTE
+	wchar_t			lastw;
+#endif
 	int			was_history = sh_isstate(SH_HISTORY);
 	int			was_verbose = sh_isstate(SH_VERBOSE);
 	int			was_interactive = sh_isstate(SH_INTERACTIVE);
@@ -2129,6 +2132,9 @@ static void comsubst(Mac_t *mp,register
 	stkset(stkp,savptr,savtop);
 	newlines = 0;
 	lastc = 0;
+#if SHOPT_MULTIBYTE
+	lastw = 0;
+#endif
 	sfsetbuf(sp,(void*)sp,0);
 	bufsize = sfvalue(sp);
 	/* read command substitution output and put on stack or here-doc */
@@ -2179,6 +2185,17 @@ static void comsubst(Mac_t *mp,register
 		}
 		else if(lastc)
 		{
+#if SHOPT_MULTIBYTE
+			if(lastw)
+			{
+				int	n;
+				char	mb[8];
+				n = mbconv(mb, lastw);
+				mac_copy(mp,mb,n);
+				lastw = 0;
+			}
+			else
+#endif
 			mac_copy(mp,&lastc,1);
 			lastc = 0;
 		}
@@ -2187,8 +2204,22 @@ static void comsubst(Mac_t *mp,register
 			str[c] = 0;
 		else
 		{
+			ssize_t len = 1;
+
 			/* can't write past buffer so save last character */
-			lastc = str[--c];
+#if SHOPT_MULTIBYTE
+			if ((len = mbsize(str))>1)
+			{
+				len = mb2wc(lastw,str,len);
+				if (len < 0)
+				{
+					lastw = 0;
+					len = 1;
+				}
+			}
+#endif
+			c -= len;
+			lastc = str[c];
 			str[c] = 0;
 		}
 		mac_copy(mp,str,c);
@@ -2206,7 +2237,21 @@ static void comsubst(Mac_t *mp,register
 			sfnputc(stkp,'\n',newlines);
 	}
 	if(lastc)
+	{
+#if SHOPT_MULTIBYTE
+		if(lastw)
+		{
+			int	n;
+			char	mb[8];
+			n = mbconv(mb, lastw);
+			mac_copy(mp,mb,n);
+			lastw = 0;
+		}
+		else
+#endif
 		mac_copy(mp,&lastc,1);
+		lastc = 0;
+	}
 	sfclose(sp);
 	return;
 }
_______________________________________________
ast-developers mailing list
[email protected]
https://mailman.research.att.com/mailman/listinfo/ast-developers

Reply via email to