#ifdef CONFIG_UTF_8, translate all terminal input via UCS-4. --- commit f630e1e0081ffc6d0974f43868e5ae3d1c076847 tree 7428ac527293a5cd0efe2ee188fc2d7c2e0622b8 parent d25b037cf7e09aff4b0051804355e48d4fcc7460 author Kalle Olavi Niemitalo <[EMAIL PROTECTED]> Thu, 03 Aug 2006 23:55:05 +0300 committer Kalle Olavi Niemitalo <[EMAIL PROTECTED]> Thu, 03 Aug 2006 23:55:05 +0300
src/intl/charsets.c | 38 ++++++++++++++++++++++++++++------
src/intl/charsets.h | 1 +
src/terminal/event.c | 56 +++++++++++++++++++++++++++++++++++++++-----------
3 files changed, 76 insertions(+), 19 deletions(-)
diff --git a/src/intl/charsets.c b/src/intl/charsets.c
index 33bc507..912ea09 100644
--- a/src/intl/charsets.c
+++ b/src/intl/charsets.c
@@ -458,22 +458,46 @@ utf_8_to_unicode(unsigned char **string,
}
#endif /* CONFIG_UTF_8 */
+/* Slow algorithm, the common part of cp2u and cp2utf_8. */
+static unicode_val_T
+cp2u_shared(const struct codepage_desc *from, unsigned char c)
+{
+ int j;
+
+ for (j = 0; from->table[j].c; j++)
+ if (from->table[j].c == c)
+ return from->table[j].u;
+
+ return UCS_NO_CHAR;
+}
+
+#ifdef CONFIG_UTF_8
+/* Slow algorithm, used for converting input from the terminal. */
+unicode_val_T
+cp2u(int from, unsigned char c)
+{
+ from &= ~SYSTEM_CHARSET_FLAG;
+
+ /* UTF-8 is a multibyte codepage and cannot be handled with
+ * this function. */
+ assert(codepages[from].table != table_utf_8);
+ if_assert_failed return UCS_NO_CHAR;
+
+ if (c < 0x80) return c;
+ else return cp2u_shared(&codepages[from], c);
+}
+#endif /* CONFIG_UTF_8 */
+
/* This slow and ugly code is used by the terminal utf_8_io */
unsigned char *
cp2utf_8(int from, int c)
{
- int j;
-
from &= ~SYSTEM_CHARSET_FLAG;
if (codepages[from].table == table_utf_8 || c < 128)
return strings[c];
- for (j = 0; codepages[from].table[j].c; j++)
- if (codepages[from].table[j].c == c)
- return encode_utf_8(codepages[from].table[j].u);
-
- return encode_utf_8(UCS_NO_CHAR);
+ return encode_utf_8(cp2u_shared(&codepages[from], c));
}
static void
diff --git a/src/intl/charsets.h b/src/intl/charsets.h
index 246606b..8d11707 100644
--- a/src/intl/charsets.h
+++ b/src/intl/charsets.h
@@ -64,6 +64,7 @@ int utf8_cells2bytes(unsigned char *, in
inline int unicode_to_cell(unicode_val_T);
inline int strlen_utf8(unsigned char **);
inline unicode_val_T utf_8_to_unicode(unsigned char **, unsigned char *);
+unicode_val_T cp2u(int, unsigned char);
#endif /* CONFIG_UTF_8 */
unsigned char *cp2utf_8(int, int);
diff --git a/src/terminal/event.c b/src/terminal/event.c
index 5943aea..2937998 100644
--- a/src/terminal/event.c
+++ b/src/terminal/event.c
@@ -133,16 +133,17 @@ term_send_event(struct terminal *term, s
}
static void
-term_send_ucs(struct terminal *term, struct term_event *ev, unicode_val_T u)
+term_send_ucs(struct terminal *term, unicode_val_T u, int modifier)
{
unsigned char *recoded;
+ struct term_event ev;
+ set_kbd_term_event(&ev, KBD_UNDEF, modifier);
recoded = u2cp_no_nbsp(u, get_opt_codepage_tree(term->spec, "charset"));
if (!recoded) recoded = "*";
while (*recoded) {
- ev->info.keyboard.modifier = term->interlink->utf_8.modifier;
- ev->info.keyboard.key = *recoded;
- term_send_event(term, ev);
+ ev.info.keyboard.key = *recoded;
+ term_send_event(term, &ev);
recoded++;
}
}
@@ -267,14 +268,14 @@ #endif
{
int utf8_io = -1;
int key = ilev->info.keyboard.key;
+ int modifier = ilev->info.keyboard.modifier;
if (key >= 0x100)
key = -key;
- set_kbd_term_event(&tev, key, ilev->info.keyboard.modifier);
reset_timer();
- if (check_kbd_modifier(&tev, KBD_MOD_CTRL) && toupper(key) == 'L') {
+ if (modifier == KBD_MOD_CTRL && (key == 'l' || key == 'L')) {
redraw_terminal_cls(term);
break;
@@ -283,14 +284,33 @@ #endif
return 0;
}
+ /* Character Conversions. */
#ifdef CONFIG_UTF_8
- utf8_io = !!term->utf8;
+ /* struct term_event_keyboard carries UCS-4.
+ * - If the "utf_8_io" option (i.e. term->utf8) is
+ * true or the "charset" option refers to UTF-8,
+ * then handle_interlink_event() converts from UTF-8
+ * to UCS-4.
+ * - Otherwise, handle_interlink_event() converts from
+ * the codepage specified with the "charset" option
+ * to UCS-4. */
+ utf8_io = term->utf8
+ || is_cp_utf8(get_opt_codepage_tree(term->spec, "charset"));
#else
+ /* struct term_event_keyboard carries bytes in the
+ * charset of the terminal.
+ * - If the "utf_8_io" option is true, then
+ * handle_interlink_event() converts from UTF-8 to
+ * UCS-4, and term_send_ucs() converts from UCS-4 to
+ * the codepage specified with the "charset" option;
+ * this codepage cannot be UTF-8.
+ * - Otherwise, handle_interlink_event() passes the
+ * bytes straight through. */
utf8_io = get_opt_bool_tree(term->spec, "utf_8_io");
#endif /* CONFIG_UTF_8 */
if (interlink->utf_8.len) {
- if ((key & 0xC0) == 0x80 && utf8_io) {
+ if (key >= 0x80 && key <= 0xBF && utf8_io) {
interlink->utf_8.ucs <<= 6;
interlink->utf_8.ucs |= key & 0x3F;
if (! --interlink->utf_8.len) {
@@ -298,17 +318,29 @@ #endif /* CONFIG_UTF_8 */
if (u < interlink->utf_8.min)
u = UCS_NO_CHAR;
- term_send_ucs(term, &tev, u);
+ term_send_ucs(term, u,
+ term->interlink->utf_8.modifier);
}
break;
} else {
interlink->utf_8.len = 0;
- term_send_ucs(term, &tev, UCS_NO_CHAR);
+ term_send_ucs(term, UCS_NO_CHAR,
+ term->interlink->utf_8.modifier);
}
}
if (key < 0x80 || key > 0xFF || !utf8_io) {
+#ifdef CONFIG_UTF_8
+ if (key >= 0 && key <= 0xFF && !utf8_io) {
+ key = cp2u(get_opt_codepage_tree(term->spec,
+ "charset"),
+ key);
+ term_send_ucs(term, key, modifier);
+ break;
+ }
+#endif /* !CONFIG_UTF_8 */
+ set_kbd_term_event(&tev, key, modifier);
term_send_event(term, &tev);
break;
@@ -324,11 +356,11 @@ #endif /* CONFIG_UTF_8 */
interlink->utf_8.len = len - 1;
interlink->utf_8.ucs = key & (mask - 1);
- interlink->utf_8.modifier = get_kbd_modifier(&tev);
+ interlink->utf_8.modifier = modifier;
break;
}
- term_send_ucs(term, &tev, UCS_NO_CHAR);
+ term_send_ucs(term, UCS_NO_CHAR, KBD_MOD_NONE);
break;
}
pgpIgW6ImDkTU.pgp
Description: PGP signature
_______________________________________________ elinks-dev mailing list [email protected] http://linuxfromscratch.org/mailman/listinfo/elinks-dev
