On Tue, Dec 01, 2020 at 04:18:48AM -0500, Thomas Dickey wrote: > On Mon, Nov 30, 2020 at 02:06:40PM +0000, Thomas Dupond wrote: > > Thank you very much for your fast reply and this small patch. > > I downloaded version 9.8u and applied the patch but now > > whenever I try to insert an é interactively I only get : \?E9 > > ouch - I'll continue investigating a fix for this (thanks)
Here's a followup (apply after the previous patch) which seems to work.
I've attached the test-scripts that I used:
I is mapped to an insert (using UTF-8)
J is mapped to an insert using ^Vu
K sets the buffer to 8bit
M uses the insert-string command.
> > And this goes for every character like èéêîïôöûù they do not
> > appear correctly. When I switch to 8bit encoding with
> > `setl fk=8bit` \?E9 appears correctly as é.
> >
> > On the bright side, when I use insert-string with UTF-8
> > encoding, the characters appear normally in the command prompt
> > and insert-string works as intended.
>
> halfway there :-)
>
> > I realised that you use `setl fk=8bit` and with this encoding
> > everything work as intended, interactive and insert-string. But
> > I would rather use UTF-8 than ISO-8859.
> >
> > I'm sorry I cannot be of much help, I have very little
> > knowledge of C programming.
> >
> > Regards,
> > Thomas Dupond
> >
> > ‐‐‐‐‐‐‐ Original Message ‐‐‐‐‐‐‐
> > Le lundi, novembre 30, 2020 1:59 AM, Thomas Dickey <[email protected]> a écrit
> > :
> >
> > > On Sun, Nov 29, 2020 at 04:21:24PM +0000, Thomas Dupond wrote:
> > >
> > > > Hello,
> > > > I'm just starting on exploring vile and I fell on something I
> > > > cannot solve. I was trying to write a macro and while vile
> > > > seems to handle UTF8 really well it doesn't seem to work well
> > > > with the function insert-string.
> > > > I can insert "é" but when I use "insert-string é" it prints this
> > > > mess : ᅢᄅ
> > >
> > > yes... scripting hasn't been as well-tested as interactive stuff :-(
> > >
> > > > Any idea on how to solve this ?
> > >
> > > Here's a fix for the most common case (it won't handle a special
> > > case where the buffer is non-UTF-8), which should work for you.
> > >
> > > also attaching a script I used for testing, e.g.,
> > > ./configure --enable-trace --with-builtin-filters && make
> > > ./vile @foo.rc makefile
> > >
> > > > My locale is :
> > > > LANG=en_GB.UTF-8
> > > > LANGUAGE=en_GB:en
> > >
> > > ...
> > >
> > > > I'm on debian 4.19 and I compiled vile-9.8 from source with
> > >
> > > 9.8's getting a little stale - 9.8u is current.
> > >
> > > I put snapshots in github (but have too many concurrent things to
> > > polish off 9.8v)
> > >
> > > > ./configure --with-builtin-filters
> > > > And $cfgopts = hypertext,locale,iconv,multibyte,terminfo
> > > > Kind regards,
> > > > Thomas
> > >
> > > --
> > >
> > > Thomas E. Dickey [email protected]
> > > https://invisible-island.net
> > > ftp://ftp.invisible-island.net
> >
> >
>
> --
> Thomas E. Dickey <[email protected]>
> https://invisible-island.net
> ftp://ftp.invisible-island.net
--
Thomas E. Dickey <[email protected]>
https://invisible-island.net
ftp://ftp.invisible-island.net
--- CHANGES 2020/11/30 00:56:48 1.1972
+++ CHANGES 2020/12/01 22:53:53
@@ -1,9 +1,10 @@
Changes for vile 9.9 (released ??? ??? ?? ????)
- 20201129 (v)
+ 20201201 (v)
> Stephan Schulz:
+ add popup-choices mode (Savannah #58999)
> Tom Dickey:
+ + add check for 'U' in read_quoted() needed to make ^VUxxxx work.
+ modify lins_chars() to handle a case where a script inserts a UTF-8
character (report by Thomas Dupond).
+ modify configure/makefiles to support ".PHONY" feature.
--- input.c 2020/08/30 23:46:08 1.371
+++ input.c 2020/12/01 22:47:44
@@ -44,7 +44,7 @@
* tgetc_avail() true if a key is avail from tgetc() or below.
* keystroke_avail() true if a key is avail from keystroke() or below.
*
- * $Id: input.c,v 1.371 2020/08/30 23:46:08 tom Exp $
+ * $Id: input.c,v 1.372 2020/12/01 22:47:44 tom Exp $
*/
#include "estruct.h"
@@ -1852,10 +1852,11 @@
if (count <= 0)
returnCode(c);
- /* accumulate up to 3 digits for a single byte */
+ /* accumulate digits for a character */
if (isDigit(c)
#if OPT_MULTIBYTE
|| (c == 'u')
+ || (c == 'U')
#endif
|| (c == 'x')) {
if (!inscreen) {
--- insert.c 2020/11/29 22:44:16 1.185
+++ insert.c 2020/12/02 00:52:02
@@ -4,7 +4,7 @@
* Most code probably by Dan Lawrence or Dave Conroy for MicroEMACS
* Extensions for vile by Paul Fox
*
- * $Header: /usr/build/vile/vile/RCS/insert.c,v 1.185 2020/11/29 22:44:16 tom Exp $
+ * $Id: insert.c,v 1.187 2020/12/02 00:52:02 tom Exp $
*/
#include "estruct.h"
@@ -446,7 +446,7 @@
if (isbackspace(c)) { /* vi beeps here */
s = TRUE; /* replaced with nothing */
} else {
- t = s = lins_chars(n, c);
+ t = s = lins_chars(n, c, FALSE);
}
}
}
@@ -981,11 +981,11 @@
rc = inspound();
} else {
autoindented = -1;
- rc = lins_chars(1, c);
+ rc = lins_chars(1, c, FALSE);
}
} else {
autoindented = -1;
- rc = lins_chars(1, c);
+ rc = lins_chars(1, c, FALSE);
}
}
return rc;
@@ -1487,10 +1487,13 @@
}
/*
- * Quote the next character, and insert it into the buffer. All the characters
- * are taken literally, with the exception of a) the newline, which always has
- * its line splitting meaning, and b) decimal digits, which are accumulated
- * (up to three of them) and the resulting value put in the buffer.
+ * Quote the next character, and insert it into the buffer. That character may
+ * be literal, or composed of decimal or hexadecimal digits:
+ *
+ * a) the newline, which always has its line splitting meaning, and
+ * b) the digits are accumulated (up to a radix-based limit).
+ *
+ * The resulting value is inserted into the buffer.
*
* A character is always read, even if it is inserted 0 times, for regularity.
*/
@@ -1512,7 +1515,7 @@
s = lnewline();
} while ((s == TRUE) && (--n != 0));
} else {
- s = lins_chars(n, c);
+ s = lins_chars(n, c, TRUE);
}
}
return s;
--- line.c 2020/11/30 00:46:44 1.236
+++ line.c 2020/12/01 23:27:32
@@ -10,7 +10,7 @@
* editing must be being displayed, which means that "b_nwnd" is non zero,
* which means that the dot and mark values in the buffer headers are nonsense.
*
- * $Id: line.c,v 1.236 2020/11/30 00:46:44 tom Exp $
+ * $Id: line.c,v 1.238 2020/12/01 23:27:32 tom Exp $
*/
/* #define POISON */
@@ -534,7 +534,7 @@
* or in insert-mode.
*/
int
-lins_chars(int n, int c)
+lins_chars(int n, int c, int wide)
{
int rc = FALSE;
UCHAR target[10];
@@ -542,7 +542,7 @@
int nn;
int mapped;
- if (!global_is_utfXX() && (c > 127) && b_is_utfXX(curbp)) {
+ if (wide && (c > 127) && b_is_utfXX(curbp)) {
nbytes = vl_conv_to_utf8(target, (UINT) c, sizeof(target));
} else if (okCTYPE2(vl_wide_enc) && !vl_mb_is_8bit(c)) {
nbytes = 1;
--- proto.h 2020/08/30 23:57:00 1.751
+++ proto.h 2020/12/01 23:24:17
@@ -4,7 +4,7 @@
*
* Created: Thu May 14 15:44:40 1992
*
- * $Id: proto.h,v 1.751 2020/08/30 23:57:00 tom Exp $
+ * $Id: proto.h,v 1.752 2020/12/01 23:24:17 tom Exp $
*/
#ifndef VILE_PROTO_H
@@ -873,10 +873,10 @@
#if OPT_MULTIBYTE
extern int ldel_chars (B_COUNT n, int kflag);
-extern int lins_chars (int n, int c);
+extern int lins_chars (int n, int c, int wide);
#else
#define ldel_chars(n, kflag) ldel_bytes(n, kflag)
-#define lins_chars(n, c) lins_bytes(n, c)
+#define lins_chars(n, c, wide) lins_bytes(n, c)
#endif
#if OPT_REGS_CMPL
store-procedure oops insert-string "é" ~endm map I ié map J iue9 map K :setl fk=8bit map M :oops
store-procedure oops insert-string "ǩ" ~endm map I iǩ map J iu1e9 map K :setl fk=8bit map M :oops
store-procedure oops insert-string "⇩" ~endm map I i⇩ map J iu21e9 map K :setl fk=8bit map M :oops
signature.asc
Description: PGP signature
