terminal UTF-8: Translate all input via UCS-4, #ifdef CONFIG_UTF_8. --- commit 49f5b0819eefd68261ea2f2aa2bbca166dd0ae4a tree d10e197348fffd550ebaab331b6d81c6a3a8094b parent 8c24d6f73005dd6650fa5feec77f7dc411a28551 author Kalle Olavi Niemitalo <[EMAIL PROTECTED]> Sat, 05 Aug 2006 14:01:49 +0300 committer Kalle Olavi Niemitalo <[EMAIL PROTECTED]> Sat, 05 Aug 2006 20:25:45 +0300
src/intl/charsets.c | 38 +++++++++++++++++++++++++++++++-------
src/intl/charsets.h | 1 +
src/terminal/event.c | 47 +++++++++++++++++++++++++++++++++++++++--------
3 files changed, 71 insertions(+), 15 deletions(-)
diff --git a/src/intl/charsets.c b/src/intl/charsets.c
index dd0ee5b..31905ba 100644
--- a/src/intl/charsets.c
+++ b/src/intl/charsets.c
@@ -458,22 +458,46 @@ utf_8_to_unicode(unsigned char **string,
}
#endif /* CONFIG_UTF_8 */
+/* Slow algorithm, the common part of cp2u and cp2utf_8. */
+static unicode_val_T
+cp2u_shared(const struct codepage_desc *from, unsigned char c)
+{
+ int j;
+
+ for (j = 0; from->table[j].c; j++)
+ if (from->table[j].c == c)
+ return from->table[j].u;
+
+ return UCS_NO_CHAR;
+}
+
+#ifdef CONFIG_UTF_8
+/* Slow algorithm, used for converting input from the terminal. */
+unicode_val_T
+cp2u(int from, unsigned char c)
+{
+ from &= ~SYSTEM_CHARSET_FLAG;
+
+ /* UTF-8 is a multibyte codepage and cannot be handled with
+ * this function. */
+ assert(codepages[from].table != table_utf_8);
+ if_assert_failed return UCS_NO_CHAR;
+
+ if (c < 0x80) return c;
+ else return cp2u_shared(&codepages[from], c);
+}
+#endif /* CONFIG_UTF_8 */
+
/* This slow and ugly code is used by the terminal utf_8_io */
unsigned char *
cp2utf_8(int from, int c)
{
- int j;
-
from &= ~SYSTEM_CHARSET_FLAG;
if (codepages[from].table == table_utf_8 || c < 128)
return strings[c];
- for (j = 0; codepages[from].table[j].c; j++)
- if (codepages[from].table[j].c == c)
- return encode_utf_8(codepages[from].table[j].u);
-
- return encode_utf_8(UCS_NO_CHAR);
+ return encode_utf_8(cp2u_shared(&codepages[from], c));
}
static void
diff --git a/src/intl/charsets.h b/src/intl/charsets.h
index 246606b..8d11707 100644
--- a/src/intl/charsets.h
+++ b/src/intl/charsets.h
@@ -64,6 +64,7 @@ int utf8_cells2bytes(unsigned char *, in
inline int unicode_to_cell(unicode_val_T);
inline int strlen_utf8(unsigned char **);
inline unicode_val_T utf_8_to_unicode(unsigned char **, unsigned char *);
+unicode_val_T cp2u(int, unsigned char);
#endif /* CONFIG_UTF_8 */
unsigned char *cp2utf_8(int, int);
diff --git a/src/terminal/event.c b/src/terminal/event.c
index c45d0b9..7737cec 100644
--- a/src/terminal/event.c
+++ b/src/terminal/event.c
@@ -264,14 +264,14 @@ #endif
{
int utf8_io = -1;
int key = ilev->info.keyboard.key;
+ int modifier = ilev->info.keyboard.modifier;
if (key >= 0x100)
key = -key;
- set_kbd_term_event(&tev, key, ilev->info.keyboard.modifier);
reset_timer();
- if (check_kbd_modifier(&tev, KBD_MOD_CTRL) && (key == 'l' || key == 'L')) {
+ if (modifier == KBD_MOD_CTRL && (key == 'l' || key == 'L')) {
redraw_terminal_cls(term);
break;
@@ -280,9 +280,32 @@ #endif
return 0;
}
+ /* Character Conversions. */
#ifdef CONFIG_UTF_8
- utf8_io = !!term->utf8;
+ /* struct term_event_keyboard carries bytes in the
+ * charset of the terminal.
+ * - If the "utf_8_io" option (i.e. term->utf8) is
+ * true or the "charset" option refers to UTF-8,
+ * then handle_interlink_event() converts from UTF-8
+ * to UCS-4, and term_send_ucs() converts from UCS-4
+ * to the codepage specified with the "charset" option.
+ * - Otherwise, handle_interlink_event() converts from
+ * the codepage specified with the "charset" option
+ * to UCS-4, and term_send_ucs() converts right back.
+ * TO DO: Change struct term_event_keyboard to carry
+ * UCS-4 instead, reducing these conversions. */
+ utf8_io = term->utf8
+ || is_cp_utf8(get_opt_codepage_tree(term->spec, "charset"));
#else
+ /* struct term_event_keyboard carries bytes in the
+ * charset of the terminal.
+ * - If the "utf_8_io" option is true, then
+ * handle_interlink_event() converts from UTF-8 to
+ * UCS-4, and term_send_ucs() converts from UCS-4 to
+ * the codepage specified with the "charset" option;
+ * this codepage cannot be UTF-8.
+ * - Otherwise, handle_interlink_event() passes the
+ * bytes straight through. */
utf8_io = get_opt_bool_tree(term->spec, "utf_8_io");
#endif /* CONFIG_UTF_8 */
@@ -295,19 +318,27 @@ #endif /* CONFIG_UTF_8 */
if (u < interlink->utf_8.min)
u = UCS_NO_CHAR;
- term_send_ucs(term, u,
- get_kbd_modifier(&tev));
+ term_send_ucs(term, u, modifier);
}
break;
} else {
interlink->utf_8.len = 0;
- term_send_ucs(term, UCS_NO_CHAR,
- get_kbd_modifier(&tev));
+ term_send_ucs(term, UCS_NO_CHAR, modifier);
}
}
if (key < 0x80 || key > 0xFF || !utf8_io) {
+#ifdef CONFIG_UTF_8
+ if (key >= 0 && key <= 0xFF && !utf8_io) {
+ key = cp2u(get_opt_codepage_tree(term->spec,
+ "charset"),
+ key);
+ term_send_ucs(term, key, modifier);
+ break;
+ }
+#endif /* !CONFIG_UTF_8 */
+ set_kbd_term_event(&tev, key, modifier);
term_send_event(term, &tev);
break;
@@ -326,7 +357,7 @@ #endif /* CONFIG_UTF_8 */
break;
}
- term_send_ucs(term, UCS_NO_CHAR, get_kbd_modifier(&tev));
+ term_send_ucs(term, UCS_NO_CHAR, modifier);
break;
}
pgpu0AHcRYBX4.pgp
Description: PGP signature
_______________________________________________ elinks-dev mailing list [email protected] http://linuxfromscratch.org/mailman/listinfo/elinks-dev
