Hello tech@,

I've been working on this patch with schwarze@. It introduces UTF-8 support in write(1), or more precise lack thereof.

The specifications say that write(1) should write characters to the console in conformance with the senders locale. Since the receiving tty might not be UTF-8 compatible it could cause undefined behaviour.

We settled on the fact that it would be wise to transform every non-ascii character to a '?' (this removes vis(3) support, since it doesn't make the text more readable) and detect UTF-8 continuation bytes so a single character doesn't cause a ? flooding.

I've also added support for bell, as specified by POSIX.

This is already OK schwarze@. Any other OKs/objections?

Index: write.1
===================================================================
RCS file: /cvs/src/usr.bin/write/write.1,v
retrieving revision 1.17
diff -u -p -r1.17 write.1
--- write.1     4 Jun 2014 06:07:32 -0000       1.17
+++ write.1     1 Feb 2016 18:47:44 -0000
@@ -117,9 +117,11 @@ The specified user is either not logged
 .Sh STANDARDS
 The
 .Nm
-utility is compliant with the
+utility is described in the
 .St -p1003.1-2008
-specification.
+specification. We break compliance in that we don't listen to the senders
+locale. Non ASCII characters will be transformed to a
+.Sq \? .
 .Sh HISTORY
 A
 .Nm
Index: write.c
===================================================================
RCS file: /cvs/src/usr.bin/write/write.c,v
retrieving revision 1.32
diff -u -p -r1.32 write.c
--- write.c     20 Oct 2015 20:21:18 -0000      1.32
+++ write.c     1 Feb 2016 18:47:44 -0000
@@ -34,20 +34,20 @@
  */

 #include <sys/stat.h>
+
 #include <ctype.h>
+#include <err.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <paths.h>
+#include <pwd.h>
+#include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <signal.h>
 #include <time.h>
-#include <fcntl.h>
-#include <paths.h>
-#include <pwd.h>
 #include <unistd.h>
-#include <limits.h>
 #include <utmp.h>
-#include <err.h>
-#include <vis.h>

 void done(int sig);
 void do_write(char *, char *, uid_t);
@@ -55,6 +55,7 @@ void wr_fputs(char *);
 void search_utmp(char *, char *, int, char *, uid_t);
 int term_chk(char *, int *, time_t *, int);
 int utmp_chk(char *, char *);
+static int isu8cont(unsigned char c);

 int
 main(int argc, char *argv[])
@@ -296,24 +297,32 @@ done(int sig)
 void
 wr_fputs(char *s)
 {
-       u_char c;
-       char visout[5], *s2;

 #define        PUTC(c) if (putchar(c) == EOF) goto err;

        for (; *s != '\0'; ++s) {
-               c = toascii(*s);
-               if (c == '\n') {
+               if (*s == '\n') {
                        PUTC('\r');
                        PUTC('\n');
                        continue;
                }
-               vis(visout, c, VIS_SAFE|VIS_NOSLASH, s[1]);
-               for (s2 = visout; *s2; s2++)
-                       PUTC(*s2);
+               if (isu8cont(*s))
+                       continue;
+               if (isprint(*s) || isspace(*s) || *s == '\a') {
+                       PUTC(*s);
+               } else {
+                       PUTC('?');
+               }
+
        }
        return;

 err:   err(1, NULL);
 #undef PUTC
+}
+
+static int
+isu8cont(unsigned char c)
+{
+       return (c & (0x80 | 0x40)) == 0x80;
 }

Reply via email to