Hi! ----
Attached is the 2nd version of the ksh93 i18n fix ("ksh93-shift_ijs_patch002.diff.txt") for testing. Can anyone please help to verify that this new patch for ksh93 (Korn Shell 93) fixes the problems when inputting/editing text in ja_JP.PCK or *.UTF-8 locales, please ? Building ksh93 from source+patch: 1. Download http://svn.genunix.org/repos/on/branches/ksh93/gisburn/scripts/buildksh93.ksh - this script builds ksh93 from sources (and also contains instructions how to download the sources via "wget") 2. Fetch sources as described in "buildksh93.ksh" 3. Edit "buildksh93.ksh" to match the platform (default is Solaris 10 on i386 with Sun Studio 10/11) 4. Unpack source % mkdir build % cd build % gunzip -c ../ast-ksh.2006-02-14.tgz | tar -xf - % gunzip -c ../INIT.2006-01-24.tgz | tar -xf - 5. Apply patch: % gpatch -p0 <ksh93-shift_ijs_patch002.diff.txt 6. Build ksh93: % time nice ksh ../buildksh93.ksh 2>&1 | tee -a buildlog.log 7. Start ksh93: % ./arch/sol10.i386/bin/ksh % set -o emacs # for emacs editing mode, alternatively you can use "vi" or "gmacs" editing mode # input and/or edit japanese/chinese/korean text and report whether this works correctly Thanks for the help! :-) ---- Bye, Roland -- __ . . __ (o.\ \/ /.o) roland.mainz at nrubsig.org \__\/\/__/ MPEG specialist, C&&JAVA&&Sun&&Unix programmer /O /==\ O\ TEL +49 641 7950090 (;O/ \/ \O;) -------------- next part -------------- --- src/cmd/ksh93/edit/edit.c +++ src/cmd/ksh93/edit/edit.c 2006-04-19 12:17:59.000000000 +0200 @@ -28,6 +28,7 @@ */ #include <ast.h> +#include <ast_wchar.h> #include <errno.h> #include <ccode.h> #include <ctype.h> @@ -53,7 +54,15 @@ #define CURSOR_UP "\E[A" #if SHOPT_MULTIBYTE -# define is_print(c) ((c&~STRIP) || isprint(c)) +# if _hdr_wctype +# include <wctype.h> +# define is_print(c) iswprint((c)) +# else +# define is_print(c) (((c)&~STRIP) || isprint((c))) +# endif +# if !_lib_iswprint && !defined(iswprint) +# define iswprint(c) (((c)&~STRIP) || isprint((c))) +# endif #else # define is_print(c) isprint(c) #endif @@ -1166,7 +1175,6 @@ { if(curp == sp) r = dp - phys; - d = (is_print(c)?1:-1); #if SHOPT_MULTIBYTE d = mbwidth((wchar_t)c); if(d==1 && !is_print(c)) @@ -1183,7 +1191,9 @@ continue; } else -#endif /* SHOPT_MULTIBYTE */ +#else /* not SHOPT_MULTIBYTE */ + d = (is_print(c)?1:-1); +#endif /* not SHOPT_MULTIBYTE */ if(d<0) { if(c=='\t') --- src/cmd/ksh93/edit/vi.c +++ src/cmd/ksh93/edit/vi.c 2006-04-19 12:17:59.000000000 +0200 @@ -28,6 +28,8 @@ * cbosgd!pds -*/ +#include <ast.h> +#include <ast_wchar.h> #if KSHELL # include "defs.h" @@ -65,10 +67,16 @@ # define gencpy(a,b) ed_gencpy(a,b) # define genncpy(a,b,n) ed_genncpy(a,b,n) # define genlen(str) ed_genlen(str) -# define digit(c) ((c&~STRIP)==0 && isdigit(c)) -# define is_print(c) ((c&~STRIP) || isprint(c)) +# if _hdr_wctype +# include <wctype.h> +# define digit(c) iswdigit((c)) +# define is_print(c) iswprint((c)) +# else +# define digit(c) (((c)&~STRIP)==0 && isdigit((c))) +# define is_print(c) (((c)&~STRIP) || isprint((c))) +# endif # if !_lib_iswprint && !defined(iswprint) -# define iswprint(c) is_print((c)) +# define iswprint(c) (((c)&~STRIP) || isprint((c))) # endif static int _isalph(int); static int _ismetach(int); @@ -2027,6 +2035,9 @@ { register int i; + if (vp->lastline == NULL) + return; + if( (i = cur_virt - first_virt + 1) > 0 ) { /*** save last thing user typed ***/ --- src/cmd/ksh93/include/national.h +++ src/cmd/ksh93/include/national.h 2006-04-19 12:17:59.000000000 +0200 @@ -29,7 +29,7 @@ #if SHOPT_MULTIBYTE # ifndef MARKER -# define MARKER 0x7fff /* Must be invalid character */ +# define MARKER 0xdfff /* Must be invalid character */ # endif extern int sh_strchr(const char*,const char*); --- src/cmd/ksh93/sh/lex.c +++ src/cmd/ksh93/sh/lex.c 2006-04-19 12:17:54.000000000 +0200 @@ -293,11 +293,12 @@ { switch(*len = mbsize(_Fcin.fcptr)) { - case -1: /* bogus multiByte char - parse as bytes? */ - case 0: /* NULL byte */ + case -1: /* bogus multiByte char - parse as bytes? */ + case 0: /* NULL byte */ + *len = 1; case 1: - lexState = state[curChar=fcget()]; - break; + lexState = state[curChar=fcget()]; + break; default: /* * None of the state tables contain entries @@ -1596,6 +1597,36 @@ { if(n!=S_NL) { +#if SHOPT_MULTIBYTE + if(mbwide()) + { + do + { + ssize_t len; + switch((len = mbsize(_Fcin.fcptr))) + { + case -1: /* bogus multiByte char - parse as bytes? */ + case 0: /* NULL byte */ + case 1: + n = state[fcget()]; + break; + default: + /* + * None of the state tables contain + * entries for multibyte characters, + * however, they should be treated + * the same as any other alph + * character. Therefore, we'll use + * the state of the 'a' character. + */ + mbchar(_Fcin.fcptr); + n = state['a']; + } + } + while(n == 0); + } + else +#endif /* SHOPT_MULTIBYTE */ /* skip over regular characters */ while((n=state[fcget()])==0); } --- src/cmd/ksh93/sh/macro.c +++ src/cmd/ksh93/sh/macro.c 2006-04-19 12:17:54.000000000 +0200 @@ -266,7 +266,38 @@ cp = fcseek(0); while(1) { +#if SHOPT_MULTIBYTE + if(mbwide()) + { + do + { + ssize_t len; + switch((len = mbsize(cp))) + { + case -1: /* bogus multiByte char - parse as bytes? */ + case 0: /* NULL byte */ + case 1: + n = state[*(unsigned char*)cp++]; + break; + default: + /* + * None of the state tables contain + * entries for multibyte characters, + * however, they should be treated + * the same as any other alph + * character. Therefore, we'll use + * the state of the 'a' character. + */ + cp += len; + n = state['a']; + } + } + while(n == 0); + } + else +#endif /* SHOPT_MULTIBYTE */ while((n=state[*(unsigned char*)cp++])==0); + if(n==S_NL || n==S_QUOTE || n==S_RBRA) continue; if(c=(cp-1)-fcseek(0)) @@ -395,8 +426,42 @@ cp++; while(1) { - while((n=state[*(unsigned char*)cp++])==0); - c = (cp-1) - first; +#if SHOPT_MULTIBYTE + if (mbwide()) + { + ssize_t len; + do + { + switch((len = mbsize(cp))) + { + case -1: /* bogus multiByte char - parse as bytes? */ + case 0: /* NULL byte */ + len = 1; + case 1: + n = state[*(unsigned char*)cp++]; + break; + default: + /* + * None of the state tables contain entries + * for multibyte characters. However, they + * should be treated the same as any other + * alpha character, so we'll use the state + * which would normally be assigned to the + * 'a' character. + */ + cp += len; + n = state['a']; + } + } + while(n == 0); + c = (cp-len) - first; + } + else +#endif /* SHOPT_MULTIBYTE */ + { + while((n=state[*(unsigned char*)cp++])==0); + c = (cp-1) - first; + } switch(n) { case S_ESC: --- src/cmd/ksh93/sh/string.c +++ src/cmd/ksh93/sh/string.c 2006-04-19 12:17:59.000000000 +0200 @@ -24,6 +24,7 @@ */ #include <ast.h> +#include <ast_wchar.h> #include "defs.h" #include <stak.h> #include <ctype.h> @@ -36,8 +37,12 @@ #define mbchar(p) (*(unsigned char*)p++) #endif +#if _hdr_wctype +# include <wctype.h> +#endif + #if !_lib_iswprint && !defined(iswprint) -# define iswprint(c) ((c&~0377) || isprint(c)) +# define iswprint(c) (((c)&~0377) || isprint((c))) #endif --- src/lib/libast/comp/setlocale.c +++ src/lib/libast/comp/setlocale.c 2006-04-19 12:17:59.000000000 +0200 @@ -30,6 +30,8 @@ #include "lclib.h" +#include <ast.h> +#include <ast_wchar.h> #include <ctype.h> #include <mc.h> #include <namval.h> --- src/lib/libast/comp/wc.c +++ src/lib/libast/comp/wc.c 2006-04-19 12:17:59.000000000 +0200 @@ -26,6 +26,7 @@ */ #include <ast.h> +#include <ast_wchar.h> #include <wchar.h> #if !_lib_mbtowc --- src/lib/libast/features/wchar +++ src/lib/libast/features/wchar 2006-04-19 12:25:08.000000000 +0200 @@ -1,5 +1,6 @@ set prototyped -lib mbstowcs,wctomb,wcrtomb,wcslen,wcstombs,wcwidth stdlib.h stdio.h wchar.h +lib mbstowcs,wctomb,wcrtomb,wcslen,wcstombs,wcscpy,wcwidth stdlib.h stdio.h wchar.h wctype.h +lib iswprint,iswalnum stdlib.h stdio.h ctype.h wctype.h lib towlower,towupper stdlib.h stdio.h wchar.h typ mbstate_t stdlib.h stdio.h wchar.h nxt wchar @@ -30,6 +31,12 @@ #undef putwc #undef putwchar #undef ungetwc + #undef fwprintf + #undef swprintf + #undef vfwprintf + #undef vswprintf + #undef vwprintf + #undef wprintf #define fgetwc _ast_fgetwc #define fgetws _ast_fgetws @@ -79,6 +86,12 @@ #if !_lib_wcstombs extern size_t wcstombs(char*, const wchar_t*, size_t); #endif + #if !_lib_wcscpy + extern wchar_t *wcscpy(wchar_t*t, const wchar_t*); + #endif + #if !_lib_wcwidth + extern int int wcwidth(wchar_t c); + #endif extern int fwprintf(FILE*, const wchar_t*, ...); extern int fwscanf(FILE*, const wchar_t*, ...); --- src/lib/libast/regex/reglib.h +++ src/lib/libast/regex/reglib.h 2006-04-19 12:17:59.000000000 +0200 @@ -57,6 +57,7 @@ char re_rhs[1]; /* substitution rhs */ #include <ast.h> +#include <ast_wchar.h> #include <cdt.h> #include <stk.h> --- src/lib/libcmd/Mamfile +++ src/lib/libcmd/Mamfile 2006-04-19 12:17:54.000000000 +0200 @@ -444,7 +444,7 @@ prev cat.c meta cat.o %.c>%.o cat.c cat prev cat.c -exec - ${CC} ${mam_cc_FLAGS} ${CCFLAGS} -I. -I${PACKAGE_ast_INCLUDE} -DERROR_CATALOG=\""libcmd"\" -DUSAGE_LICENSE=\""[-author?Glenn Fowler <gsf at research.att.com>][-author?David Korn <dgk at research.att.com>][-copyright?Copyright (c) 1992-2006 AT&T Knowledge Ventures][-license?http://www.opensource.org/licenses/cpl1.0.txt][--catalog?libcmd]"\" -D_PACKAGE_ast -D_BLD_cmd -c cat.c +exec - ${CC} ${mam_cc_FLAGS} ${CCFLAGS} -I. -I${PACKAGE_ast_INCLUDE} -DERROR_CATALOG=\""libcmd"\" -DUSAGE_LICENSE=\""[-author?Glenn Fowler <gsf at research.att.com>][-author?David Korn <dgk at research.att.com>][-copyright?Copyright (c) 1992-2006 AT&T Knowledge Ventures][-license?http://www.opensource.org/licenses/cpl1.0.txt][--catalog?libcmd]"\" -D_PACKAGE_ast -D_BLD_cmd -DSHOPT_MULTIBYTE -c cat.c done cat.o generated make chgrp.o prev chgrp.c --- src/lib/libcmd/cat.c +++ src/lib/libcmd/cat.c 2006-04-19 12:17:54.000000000 +0200 @@ -133,8 +133,39 @@ while (endbuff) { cpold = cp; - /* skip over ASCII characters */ + /* skip over ASCII and multi byte characters */ +#if SHOPT_MULTIBYTE + if(mbwide()) + { + do + { + ssize_t len; + switch((len = mbsize(cp))) + { + case -1: /* bogus multiByte char - parse as bytes? */ + case 0: /* NULL byte */ + case 1: + n = states[*cp++]; + break; + default: + /* + * None of the state tables contain + * entries for multibyte characters, + * however, they should be treated + * the same as any other alph + * character. Therefore, we'll use + * the state of the 'a' character. + */ + cp += len; + n = states['a']; + } + } + while(n == 0); + } + else +#endif /* SHOPT_MULTIBYTE */ while ((n = states[*cp++]) == 0); + if (n==T_ENDBUF) { if (cp>endbuff)