I have now completed a shell script that tells the terminal via ISO 2022, what the current locale's encoding is. It signals even, which ISO 2022 facilities the terminal may use in the return channel from the keyboard to distinguish EUC-* and ISO-2022-* (which I hope answers Juliusz's question on how to handle this in luit). Enjoy ... Markus -- Markus G. Kuhn, Computer Laboratory, University of Cambridge, UK Email: mkuhn at acm.org, WWW: <http://www.cl.cam.ac.uk/~mgk25/>
#!/bin/sh # # setcode -- Markus Kuhn <[EMAIL PROTECTED]> 2001-05-26 -- share and enjoy # # Adjust the character set of the terminal according to current locale # by sending the appropriate ISO 2022 sequence to stdout. # # Usage: # # First set LANG or LC_CTYPE to the desired locale, then call this # script to inform your terminal/tty about the current encoding. # # Health warning: # # Long exposure to ISO 2022 can cause states of permanent # head damage. Better lock your terminal to UTF-8 forever # and use UTF-8 exclusively! # # References: # # - ISO 2022 = ECMA-35, http://www.ecma.ch/ecma1/STAND/ECMA-035.HTM # - http://www.itscj.ipsj.or.jp/ISO-IR/ # - Ken Lunde: CJKV Information Processing. O'Reilly, 1999 # charmap=$(locale charmap) # Select coding system case $charmap in UTF-8 ) echo -ne '\033%G' exit ;; *) # return to ISO 2022 and set G0 = US-ASCII echo -ne '\033%@\033(B' ;; esac # G0-G3 Designators case $charmap in ASCII | US-ASCII | ANSI_X3\.4-1968 | 646 | ISO646 | ISO_646.IRV | \ ISO2022* | ISO-2022* ) ;; ISO8859-1 | ISO-8859-1 ) echo -ne '\033-A' ;; ISO8859-2 | ISO-8859-2 ) echo -ne '\033-B' ;; ISO8859-3 | ISO-8859-3 ) echo -ne '\033-C' ;; ISO8859-4 | ISO-8859-4 ) echo -ne '\033-D' ;; ISO8859-5 | ISO-8859-5 ) echo -ne '\033-L' ;; ISO8859-6 | ISO-8859-6 ) echo -ne '\033-G' ;; ISO8859-7 | ISO-8859-7 ) echo -ne '\033-F' ;; ISO8859-8 | ISO-8859-8 ) echo -ne '\033-H' ;; ISO8859-9 | ISO-8859-9 ) echo -ne '\033-M' ;; ISO8859-10 | ISO-8859-10 ) echo -ne '\033-V' ;; ISO8859-13 | ISO-8859-13 ) echo -ne '\033-Y' ;; ISO8859-14 | ISO-8859-14 ) echo -ne '\033-_' ;; ISO8859-15 | ISO-8859-15 ) echo -ne '\033-b' ;; ISO8859-16 | ISO-8859-16 ) echo -ne '\033-f' ;; EUC-CN ) # G1DM4: GB 2312-80 = ISO IR 58 echo -ne '\033$)A' ;; EUC-JP ) # G1DM4: JIS X 0208-1990 = ISO IR 87/168 # G2D4: JIS X 0201-1997 = ISO IR 13 # G3DM4: JIS X 0212-1990 = ISO IR 159 # ACS: 28 echo -ne '\033$)B\033)I\033$+D' ;; EUC-KR ) # G1DM4: KS X 1001:1992 = KSC 5601-1987 = ISO IR 149 echo -ne '\033$)C' ;; EUC-TW ) # G1DM4: CNS 11643-1992 Plane 1 = ISO IR 171 # G2DM4: CNS 11643-1992 Planes 1-7 = ISO IR ??? echo -ne '\033$)G' echo 'Warning: no ESC sequence for designating G2 in EUC-TW known!' 1>&2 ;; TIS-620 | TIS620.2533 ) # G1D6: TIS 620-2533 (1990) = ISO IR 166 echo -ne '\033-T' ;; VSCII ) # G1D6: TCVN 5712-1993 (VSCII) = ISO IR 180 echo -ne '\033-Z' ;; GB18030 | GBK | BIG5 | Big5 | KOI8-R ) echo 'Character map '$charmap' not ISO registered and not supported' 1>&2 exit 1 ;; * ) echo 'Character map '$charmap' not known' 1>&2 exit 1 ;; esac # Announce code structure facilities # This is needed to tell the terminal, what kind of facilities it can use # in the characters that it sends back. This setting is only valid until # the terminal receives another G0-G3 designation or 'ESC %' sequence. case $charmap in ASCII | US-ASCII | ANSI_X3\.4-1968 | 646 | ISO646 | ISO_646.IRV ) # ACS 1: GL=G0, no shifts echo -ne '\033 A' ;; ISO8859-* | ISO-8859-* | TIS-620 | TIS620.2533 | VSCII ) # ACS 3: GL=G0, GR=G1, no shifts echo -ne '\033 C' ;; EUC-* ) # ACS 28: Use SS2 (0x8e) and SS3 (0x8f) followed by GR codes only echo -ne '\033 \\' ;; ISO-2022-* | ISO2022-* ) # ACS 10: Use 7-bit characters only (but allow redesignation and shifts) echo -ne '\033 J' ;; esac
