[PATCH] Can not input multibyte characters on Win32 console

Ken Takata Sat, 29 Sep 2012 21:54:13 -0700

Hi,

I can't input multibyte characters on Win32 console if 'encoding' is
different from current codepage. (e.g. :set enc=utf-8 tenc=cp932)
The encoding of typed characters is not properly converted from
'termencoding' to 'encoding'.


There are two problems in the current implementation.

1. mch_inchar() in os_win32.c uses convert_input() to convert typed characters,
   but convert_input() discards the input if it is incomplete.
   convert_input_safe() should be used instead of convert_input().
2. string_convert_ext() in mbyte.c does not return an error even if the input
   is incomplete (e.g. only a leading byte of multibyte character is contained)
   when CONV_CODEPAGE is used.

Attached patch fixes these problem.
For the first problem, mch_inchar() uses convert_input_safe() as
fill_input_buf() in ui.c.
For the second problem, MB_ERR_INVALID_CHARS flag is specified for
MultiByteToWideChar() to detect incomplete inputs if string_convert_ext()
is called from convert_input_safe().

Best regards,
Ken Takata

-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

# HG changeset patch
# Parent 04592728474a35794ae6397a0b90f2989864fb66

diff --git a/src/mbyte.c b/src/mbyte.c
--- a/src/mbyte.c
+++ b/src/mbyte.c
@@ -6205,8 +6205,23 @@
 	    if (vcp->vc_cpfrom == 0)
 		tmp_len = utf8_to_utf16(ptr, len, NULL, NULL);
 	    else
-		tmp_len = MultiByteToWideChar(vcp->vc_cpfrom, 0,
-							      ptr, len, 0, 0);
+	    {
+		tmp_len = MultiByteToWideChar(vcp->vc_cpfrom,
+					unconvlenp ? MB_ERR_INVALID_CHARS : 0,
+					ptr, len, 0, 0);
+		if (tmp_len == 0 &&
+			GetLastError() == ERROR_NO_UNICODE_TRANSLATION)
+		{
+		    if (lenp != NULL)
+			*lenp = 0;
+		    if (unconvlenp != NULL)
+			*unconvlenp = len;
+		    retval = alloc(1);
+		    if (retval)
+			retval[0] = NUL;
+		    return retval;
+		}
+	    }
 	    tmp = (short_u *)alloc(sizeof(short_u) * tmp_len);
 	    if (tmp == NULL)
 		break;
diff --git a/src/os_win32.c b/src/os_win32.c
--- a/src/os_win32.c
+++ b/src/os_win32.c
@@ -1431,6 +1431,11 @@
 #define TYPEAHEADLEN 20
     static char_u   typeahead[TYPEAHEADLEN];	/* previously typed bytes. */
     static int	    typeaheadlen = 0;
+#ifdef FEAT_MBYTE
+    static char_u *rest = NULL;	    /* unconverted rest of previous read */
+    static int	restlen = 0;
+    int		unconverted;
+#endif
 
     /* First use any typeahead that was kept because "buf" was too small. */
     if (typeaheadlen > 0)
@@ -1534,6 +1539,33 @@
 
 	    c = tgetch(&modifiers, &ch2);
 
+# ifdef FEAT_MBYTE
+	    /* stolen from fill_input_buf() in ui.c */
+	    if (rest != NULL)
+	    {
+		/* Use remainder of previous call, starts with an invalid
+		 * character that may become valid when reading more. */
+		if (restlen > TYPEAHEADLEN - typeaheadlen)
+		    unconverted = TYPEAHEADLEN - typeaheadlen;
+		else
+		    unconverted = restlen;
+		mch_memmove(typeahead + typeaheadlen, rest, unconverted);
+		if (unconverted == restlen)
+		{
+		    vim_free(rest);
+		    rest = NULL;
+		}
+		else
+		{
+		    restlen -= unconverted;
+		    mch_memmove(rest, rest + unconverted, restlen);
+		}
+		typeaheadlen += unconverted;
+	    }
+	    else
+		unconverted = 0;
+#endif
+
 	    if (typebuf_changed(tb_change_cnt))
 	    {
 		/* "buf" may be invalid now if a client put something in the
@@ -1569,8 +1601,12 @@
 		 * when 'tenc' is set. */
 		if (input_conv.vc_type != CONV_NONE
 						&& (ch2 == NUL || c != K_NUL))
-		    n = convert_input(typeahead + typeaheadlen, n,
-						 TYPEAHEADLEN - typeaheadlen);
+		{
+		    typeaheadlen -= unconverted;
+		    n = convert_input_safe(typeahead + typeaheadlen,
+				n + unconverted, TYPEAHEADLEN - typeaheadlen,
+				rest == NULL ? &rest : NULL, &restlen);
+		}
 #endif
 
 		/* Use the ALT key to set the 8th bit of the character

[PATCH] Can not input multibyte characters on Win32 console

Raspunde prin e-mail lui