more utf8 less ebcdic

Ted Unangst Fri, 06 Nov 2015 08:37:04 -0800

Having to define LESSCHARSET to print the special characters doesn't seem that
useful. It doesn't do translation. So setting it to ebcdic isn't going to
magically make such files appear correctly in my xterm.


There seem to be two possibilities:
1. You have LESSCHARSET and LC_CTYPE set to the same thing. Things work.
2. They are different. Things will not work.

Removing the possibility that they are different would seem to be an
improvement.


Index: charset.c
===================================================================
RCS file: /cvs/src/usr.bin/less/charset.c,v
retrieving revision 1.14
diff -u -p -r1.14 charset.c
--- charset.c   6 Nov 2015 16:20:37 -0000       1.14
+++ charset.c   6 Nov 2015 16:30:10 -0000
@@ -23,92 +23,6 @@
 
 int utf_mode = 0;
 
-/*
- * Predefined character sets,
- * selected by the LESSCHARSET environment variable.
- */
-struct charset {
-       char *name;
-       int *p_flag;
-       char *desc;
-} charsets[] = {
-       /* BEGIN CSTYLED */
-       { "ascii",              NULL,   "8bcccbcc18b95.b" },
-       { "utf-8",              &utf_mode,       "8bcccbcc18b95.b126.bb" },
-       { "iso8859",            NULL,   "8bcccbcc18b95.33b." },
-       { "latin3",             NULL,   
"8bcccbcc18b95.33b5.b8.b15.b4.b12.b18.b12.b." },
-       { "arabic",             NULL,   
"8bcccbcc18b95.33b.3b.7b2.13b.3b.b26.5b19.b" },
-       { "greek",              NULL,   "8bcccbcc18b95.33b4.2b4.b3.b35.b44.b" },
-       { "greek2005",          NULL,   "8bcccbcc18b95.33b14.b35.b44.b" },
-       { "hebrew",             NULL,   "8bcccbcc18b95.33b.b29.32b28.2b2.b" },
-       { "koi8-r",             NULL,   "8bcccbcc18b95.b." },
-       { "KOI8-T",             NULL,   
"8bcccbcc18b95.b8.b6.b8.b.b.5b7.3b4.b4.b3.b.b.3b." },
-       { "georgianps",         NULL,   "8bcccbcc18b95.3b11.4b12.2b." },
-       { "tcvn",               NULL,   "b..b...bcccbccbbb7.8b95.b48.5b." },
-       { "TIS-620",            NULL,   "8bcccbcc18b95.b.4b.11b7.8b." },
-       { "next",               NULL,   "8bcccbcc18b95.bb125.bb" },
-       { "dos",                NULL,   "8bcccbcc12bc5b95.b." },
-       { "windows-1251",       NULL,   "8bcccbcc12bc5b95.b24.b." },
-       { "windows-1252",       NULL,   "8bcccbcc12bc5b95.b.b11.b.2b12.b." },
-       { "windows-1255",       NULL,   "8bcccbcc12bc5b95.b.b8.b.5b9.b.4b." },
-       { "ebcdic",             NULL,   
"5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b." },
-       { "IBM-1047",           NULL,   
"4cbcbc3b9cbccbccbb4c6bcc5b3cbbc4bc4bccbc191.b" },
-       { NULL, NULL, NULL }
-       /* END CSTYLED */
-};
-
-/*
- * Support "locale charmap"/nl_langinfo(CODESET) values, as well as others.
- */
-struct cs_alias {
-       char *name;
-       char *oname;
-} cs_aliases[] = {
-       { "UTF-8",              "utf-8" },
-       { "ANSI_X3.4-1968",     "ascii" },
-       { "US-ASCII",           "ascii" },
-       { "646",                "ascii" },
-       { "C",                  "ascii" },
-       { "latin1",             "iso8859" },
-       { "ISO-8859-1",         "iso8859" },
-       { "latin9",             "iso8859" },
-       { "ISO-8859-15",        "iso8859" },
-       { "latin2",             "iso8859" },
-       { "ISO-8859-2",         "iso8859" },
-       { "ISO-8859-3",         "latin3" },
-       { "latin4",             "iso8859" },
-       { "ISO-8859-4",         "iso8859" },
-       { "cyrillic",           "iso8859" },
-       { "ISO-8859-5",         "iso8859" },
-       { "ISO-8859-6",         "arabic" },
-       { "ISO-8859-7",         "greek" },
-       { "IBM9005",            "greek2005" },
-       { "ISO-8859-8",         "hebrew" },
-       { "latin5",             "iso8859" },
-       { "ISO-8859-9",         "iso8859" },
-       { "latin6",             "iso8859" },
-       { "ISO-8859-10",        "iso8859" },
-       { "latin7",             "iso8859" },
-       { "ISO-8859-13",        "iso8859" },
-       { "latin8",             "iso8859" },
-       { "ISO-8859-14",        "iso8859" },
-       { "latin10",            "iso8859" },
-       { "ISO-8859-16",        "iso8859" },
-       { "IBM437",             "dos" },
-       { "EBCDIC-US",          "ebcdic" },
-       { "IBM1047",            "IBM-1047" },
-       { "KOI8-R",             "koi8-r" },
-       { "KOI8-U",             "koi8-r" },
-       { "GEORGIAN-PS",        "georgianps" },
-       { "TCVN5712-1",         "tcvn" },
-       { "NEXTSTEP",           "next" },
-       { "windows",            "windows-1252" }, /* backward compatibility */
-       { "CP1251",             "windows-1251" },
-       { "CP1252",             "windows-1252" },
-       { "CP1255",             "windows-1255" },
-       { NULL, NULL }
-};
-
 #define        IS_BINARY_CHAR  01
 #define        IS_CONTROL_CHAR 02
 
@@ -119,105 +33,6 @@ int binattr = AT_STANDOUT;
 
 
 /*
- * Define a charset, given a description string.
- * The string consists of 256 letters,
- * one for each character in the charset.
- * If the string is shorter than 256 letters, missing letters
- * are taken to be identical to the last one.
- * A decimal number followed by a letter is taken to be a
- * repetition of the letter.
- *
- * Each letter is one of:
- *     . normal character
- *     b binary character
- *     c control character
- */
-static void
-ichardef(char *s)
-{
-       char *cp;
-       int n;
-       char v;
-
-       n = 0;
-       v = 0;
-       cp = chardef;
-       while (*s != '\0') {
-               switch (*s++) {
-               case '.':
-                       v = 0;
-                       break;
-               case 'c':
-                       v = IS_CONTROL_CHAR;
-                       break;
-               case 'b':
-                       v = IS_BINARY_CHAR|IS_CONTROL_CHAR;
-                       break;
-
-               case '0': case '1': case '2': case '3': case '4':
-               case '5': case '6': case '7': case '8': case '9':
-                       n = (10 * n) + (s[-1] - '0');
-                       continue;
-
-               default:
-                       error("invalid chardef", NULL_PARG);
-                       quit(QUIT_ERROR);
-                       /*NOTREACHED*/
-               }
-
-               do {
-                       if (cp >= chardef + sizeof (chardef)) {
-                               error("chardef longer than 256", NULL_PARG);
-                               quit(QUIT_ERROR);
-                               /*NOTREACHED*/
-                       }
-                       *cp++ = v;
-               } while (--n > 0);
-               n = 0;
-       }
-
-       while (cp < chardef + sizeof (chardef))
-               *cp++ = v;
-}
-
-/*
- * Define a charset, given a charset name.
- * The valid charset names are listed in the "charsets" array.
- */
-static int
-icharset(char *name, int no_error)
-{
-       struct charset *p;
-       struct cs_alias *a;
-
-       if (name == NULL || *name == '\0')
-               return (0);
-
-       /* First see if the name is an alias. */
-       for (a = cs_aliases;  a->name != NULL;  a++) {
-               if (strcmp(name, a->name) == 0) {
-                       name = a->oname;
-                       break;
-               }
-       }
-
-       for (p = charsets;  p->name != NULL;  p++) {
-               if (strcmp(name, p->name) == 0) {
-                       ichardef(p->desc);
-                       if (p->p_flag != NULL)
-                               *(p->p_flag) = 1;
-                       return (1);
-               }
-       }
-
-       if (!no_error) {
-               error("invalid charset name", NULL_PARG);
-               quit(QUIT_ERROR);
-       }
-       return (0);
-}
-
-/*
  * Define a charset, given a locale name.
  */
 static void
@@ -340,26 +155,6 @@ attr:
 static void
 set_charset(void)
 {
-       char *s;
-
-       /*
-        * See if environment variable LESSCHARSET is defined.
-        */
-       s = lgetenv("LESSCHARSET");
-       if (icharset(s, 0))
-               return;
-
-       /*
-        * Try using the codeset name as the charset name.
-        */
-       s = nl_langinfo(CODESET);
-       if (icharset(s, 1))
-               return;
-
-       /*
-        * Get character definitions from locale functions,
-        * rather than from predefined charset entry.
-        */
        ilocale();
 }

more utf8 less ebcdic

Reply via email to