On Tue, Dec 01, 2020 at 04:18:48AM -0500, Thomas Dickey wrote:
> On Mon, Nov 30, 2020 at 02:06:40PM +0000, Thomas Dupond wrote:
> > Thank you very much for your fast reply and this small patch.
> > I downloaded version 9.8u and applied the patch but now
> > whenever I try to insert an é interactively I only get : \?E9
> 
> ouch - I'll continue investigating a fix for this (thanks)

Here's a followup (apply after the previous patch) which seems to work.

I've attached the test-scripts that I used:
        I is mapped to an insert (using UTF-8)
        J is mapped to an insert using ^Vu
        K sets the buffer to 8bit
        M uses the insert-string command.
  
> > And this goes for every character like èéêîïôöûù they do not
> > appear correctly. When I switch to 8bit encoding with
> > `setl fk=8bit` \?E9 appears correctly as é.
> > 
> > On the bright side, when I use insert-string with UTF-8
> > encoding, the characters appear normally in the command prompt
> > and insert-string works as intended.
> 
> halfway there :-)
>  
> > I realised that you use `setl fk=8bit` and with this encoding
> > everything work as intended, interactive and insert-string. But
> > I would rather use UTF-8 than ISO-8859.
> > 
> > I'm sorry I cannot be of much help, I have very little
> > knowledge of C programming.
> > 
> > Regards,
> > Thomas Dupond
> > 
> > ‐‐‐‐‐‐‐ Original Message ‐‐‐‐‐‐‐
> > Le lundi, novembre 30, 2020 1:59 AM, Thomas Dickey <[email protected]> a écrit 
> > :
> > 
> > > On Sun, Nov 29, 2020 at 04:21:24PM +0000, Thomas Dupond wrote:
> > >
> > > > Hello,
> > > > I'm just starting on exploring vile and I fell on something I
> > > > cannot solve. I was trying to write a macro and while vile
> > > > seems to handle UTF8 really well it doesn't seem to work well
> > > > with the function insert-string.
> > > > I can insert "é" but when I use "insert-string é" it prints this
> > > > mess : ᅢᄅ
> > >
> > > yes... scripting hasn't been as well-tested as interactive stuff :-(
> > >
> > > > Any idea on how to solve this ?
> > >
> > > Here's a fix for the most common case (it won't handle a special
> > > case where the buffer is non-UTF-8), which should work for you.
> > >
> > > also attaching a script I used for testing, e.g.,
> > > ./configure --enable-trace --with-builtin-filters && make
> > > ./vile @foo.rc makefile
> > >
> > > > My locale is :
> > > > LANG=en_GB.UTF-8
> > > > LANGUAGE=en_GB:en
> > >
> > > ...
> > >
> > > > I'm on debian 4.19 and I compiled vile-9.8 from source with
> > >
> > > 9.8's getting a little stale - 9.8u is current.
> > >
> > > I put snapshots in github (but have too many concurrent things to
> > > polish off 9.8v)
> > >
> > > > ./configure --with-builtin-filters
> > > > And $cfgopts = hypertext,locale,iconv,multibyte,terminfo
> > > > Kind regards,
> > > > Thomas
> > >
> > > --
> > >
> > > Thomas E. Dickey [email protected]
> > > https://invisible-island.net
> > > ftp://ftp.invisible-island.net
> > 
> > 
> 
> -- 
> Thomas E. Dickey <[email protected]>
> https://invisible-island.net
> ftp://ftp.invisible-island.net



-- 
Thomas E. Dickey <[email protected]>
https://invisible-island.net
ftp://ftp.invisible-island.net
--- CHANGES	2020/11/30 00:56:48	1.1972
+++ CHANGES	2020/12/01 22:53:53
@@ -1,9 +1,10 @@
 Changes for vile 9.9 (released ??? ??? ?? ????)
 
- 20201129 (v)
+ 20201201 (v)
  	> Stephan Schulz:
 	+ add popup-choices mode (Savannah #58999)
 	> Tom Dickey:
+	+ add check for 'U' in read_quoted() needed to make ^VUxxxx work.
 	+ modify lins_chars() to handle a case where a script inserts a UTF-8
 	  character (report by Thomas Dupond).
 	+ modify configure/makefiles to support ".PHONY" feature.
--- input.c	2020/08/30 23:46:08	1.371
+++ input.c	2020/12/01 22:47:44
@@ -44,7 +44,7 @@
  *	tgetc_avail()     true if a key is avail from tgetc() or below.
  *	keystroke_avail() true if a key is avail from keystroke() or below.
  *
- * $Id: input.c,v 1.371 2020/08/30 23:46:08 tom Exp $
+ * $Id: input.c,v 1.372 2020/12/01 22:47:44 tom Exp $
  */
 
 #include	"estruct.h"
@@ -1852,10 +1852,11 @@
     if (count <= 0)
 	returnCode(c);
 
-    /* accumulate up to 3 digits for a single byte */
+    /* accumulate digits for a character */
     if (isDigit(c)
 #if OPT_MULTIBYTE
 	|| (c == 'u')
+	|| (c == 'U')
 #endif
 	|| (c == 'x')) {
 	if (!inscreen) {
--- insert.c	2020/11/29 22:44:16	1.185
+++ insert.c	2020/12/02 00:52:02
@@ -4,7 +4,7 @@
  * Most code probably by Dan Lawrence or Dave Conroy for MicroEMACS
  * Extensions for vile by Paul Fox
  *
- * $Header: /usr/build/vile/vile/RCS/insert.c,v 1.185 2020/11/29 22:44:16 tom Exp $
+ * $Id: insert.c,v 1.187 2020/12/02 00:52:02 tom Exp $
  */
 
 #include	"estruct.h"
@@ -446,7 +446,7 @@
 		if (isbackspace(c)) {	/* vi beeps here */
 		    s = TRUE;	/* replaced with nothing */
 		} else {
-		    t = s = lins_chars(n, c);
+		    t = s = lins_chars(n, c, FALSE);
 		}
 	    }
 	}
@@ -981,11 +981,11 @@
 		rc = inspound();
 	    } else {
 		autoindented = -1;
-		rc = lins_chars(1, c);
+		rc = lins_chars(1, c, FALSE);
 	    }
 	} else {
 	    autoindented = -1;
-	    rc = lins_chars(1, c);
+	    rc = lins_chars(1, c, FALSE);
 	}
     }
     return rc;
@@ -1487,10 +1487,13 @@
 }
 
 /*
- * Quote the next character, and insert it into the buffer. All the characters
- * are taken literally, with the exception of a) the newline, which always has
- * its line splitting meaning, and b) decimal digits, which are accumulated
- * (up to three of them) and the resulting value put in the buffer.
+ * Quote the next character, and insert it into the buffer.  That character may
+ * be literal, or composed of decimal or hexadecimal digits:
+ *
+ * a) the newline, which always has its line splitting meaning, and
+ * b) the digits are accumulated (up to a radix-based limit).
+ *
+ * The resulting value is inserted into the buffer.
  *
  * A character is always read, even if it is inserted 0 times, for regularity.
  */
@@ -1512,7 +1515,7 @@
 		s = lnewline();
 	    } while ((s == TRUE) && (--n != 0));
 	} else {
-	    s = lins_chars(n, c);
+	    s = lins_chars(n, c, TRUE);
 	}
     }
     return s;
--- line.c	2020/11/30 00:46:44	1.236
+++ line.c	2020/12/01 23:27:32
@@ -10,7 +10,7 @@
  * editing must be being displayed, which means that "b_nwnd" is non zero,
  * which means that the dot and mark values in the buffer headers are nonsense.
  *
- * $Id: line.c,v 1.236 2020/11/30 00:46:44 tom Exp $
+ * $Id: line.c,v 1.238 2020/12/01 23:27:32 tom Exp $
  */
 
 /* #define POISON */
@@ -534,7 +534,7 @@
  * or in insert-mode.
  */
 int
-lins_chars(int n, int c)
+lins_chars(int n, int c, int wide)
 {
     int rc = FALSE;
     UCHAR target[10];
@@ -542,7 +542,7 @@
     int nn;
     int mapped;
 
-    if (!global_is_utfXX() && (c > 127) && b_is_utfXX(curbp)) {
+    if (wide && (c > 127) && b_is_utfXX(curbp)) {
 	nbytes = vl_conv_to_utf8(target, (UINT) c, sizeof(target));
     } else if (okCTYPE2(vl_wide_enc) && !vl_mb_is_8bit(c)) {
 	nbytes = 1;
--- proto.h	2020/08/30 23:57:00	1.751
+++ proto.h	2020/12/01 23:24:17
@@ -4,7 +4,7 @@
  *
  *   Created: Thu May 14 15:44:40 1992
  *
- * $Id: proto.h,v 1.751 2020/08/30 23:57:00 tom Exp $
+ * $Id: proto.h,v 1.752 2020/12/01 23:24:17 tom Exp $
  */
 
 #ifndef VILE_PROTO_H
@@ -873,10 +873,10 @@
 
 #if OPT_MULTIBYTE
 extern int ldel_chars (B_COUNT n, int kflag);
-extern int lins_chars (int n, int c);
+extern int lins_chars (int n, int c, int wide);
 #else
 #define ldel_chars(n, kflag) ldel_bytes(n, kflag)
-#define lins_chars(n, c)     lins_bytes(n, c)
+#define lins_chars(n, c, wide)  lins_bytes(n, c)
 #endif
 
 #if OPT_REGS_CMPL
store-procedure oops 
insert-string "é"
~endm
map I ié
map J iue9
map K :setl fk=8bit
map M :oops
store-procedure oops 
insert-string "ǩ"
~endm
map I iǩ
map J iu1e9
map K :setl fk=8bit
map M :oops
store-procedure oops 
insert-string "⇩"
~endm
map I i⇩
map J iu21e9
map K :setl fk=8bit
map M :oops

Attachment: signature.asc
Description: PGP signature

Reply via email to