This patch implements the %ls and %lc format directives (wchar_t * and
wint_t arguments, respectively). Based on NetBSD and FreeBSD code.
The vfprintf(3) man page has wrongly been claiming that we already
support them.

Because vfprintf(3) is used on ramdisks, the code is #ifdef PRINTF_WIDE_CHAR
which is defined when the system libc is built but not when the
vfprintf() stub for the ramdisk is built. This way, ramdisks should
not be affected.

The alternative to having the #ifdef would be adding libstubs for
wcsrtombs() and wcrtomb() (to avoid pulling citrus stuff onto the
ramdisk) and tolerate some useless growth of the vfprintf() stub.
While conditional compilation sucks in general I think we're better
off with the #ifdef in this case.
I'll happily implement the alternative if it's preferred though.

Index: stdio/Makefile.inc
===================================================================
RCS file: /cvs/src/lib/libc/stdio/Makefile.inc,v
retrieving revision 1.13
diff -u -p -r1.13 Makefile.inc
--- stdio/Makefile.inc  17 Jun 2005 20:40:32 -0000      1.13
+++ stdio/Makefile.inc  23 Apr 2011 20:01:40 -0000
@@ -3,7 +3,7 @@
 # stdio sources
 .PATH: ${LIBCSRCDIR}/stdio
 
-CFLAGS+=-DFLOATING_POINT
+CFLAGS+=-DFLOATING_POINT -DPRINTF_WIDE_CHAR
 
 SRCS+= asprintf.c clrerr.c fclose.c fdopen.c feof.c ferror.c fflush.c fgetc.c \
        fgetln.c fgetpos.c fgets.c fileno.c findfp.c flags.c fopen.c \
Index: stdio/vfprintf.c
===================================================================
RCS file: /cvs/src/lib/libc/stdio/vfprintf.c,v
retrieving revision 1.60
diff -u -p -r1.60 vfprintf.c
--- stdio/vfprintf.c    22 Dec 2010 14:54:44 -0000      1.60
+++ stdio/vfprintf.c    23 Apr 2011 20:36:24 -0000
@@ -49,6 +49,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <wchar.h>
 
 #include "local.h"
 #include "fvwrite.h"
@@ -79,6 +80,8 @@ union arg {
        double                  doublearg;
        long double             longdoublearg;
 #endif
+       wint_t                  wintarg;
+       wchar_t                 *pwchararg;
 };
 
 static int __find_arguments(const char *fmt0, va_list ap, union arg **argtable,
@@ -138,6 +141,72 @@ __sbprintf(FILE *fp, const char *fmt, va
        return (ret);
 }
 
+#ifdef PRINTF_WIDE_CHAR
+/*
+ * Convert a wide character string argument for the %ls format to a multibyte
+ * string representation. If not -1, prec specifies the maximum number of
+ * bytes to output, and also means that we can't assume that the wide char
+ * string is null-terminated.
+ */
+static char *
+__wcsconv(wchar_t *wcsarg, int prec)
+{
+       mbstate_t mbs;
+       char buf[MB_LEN_MAX];
+       wchar_t *p;
+       char *convbuf;
+       size_t clen, nbytes;
+
+       /* Allocate space for the maximum number of bytes we could output. */
+       if (prec < 0) {
+               memset(&mbs, 0, sizeof(mbs));
+               p = wcsarg;
+               nbytes = wcsrtombs(NULL, (const wchar_t **)&p, 0, &mbs);
+               if (nbytes == (size_t)-1) {
+                       errno = EILSEQ;
+                       return (NULL);
+               }
+       } else {
+               /*
+                * Optimisation: if the output precision is small enough,
+                * just allocate enough memory for the maximum instead of
+                * scanning the string.
+                */
+               if (prec < 128)
+                       nbytes = prec;
+               else {
+                       nbytes = 0;
+                       p = wcsarg;
+                       memset(&mbs, 0, sizeof(mbs));
+                       for (;;) {
+                               clen = wcrtomb(buf, *p++, &mbs);
+                               if (clen == 0 || clen == (size_t)-1 ||
+                                   nbytes + clen > (size_t)prec)
+                                       break;
+                               nbytes += clen;
+                       }
+                       if (clen == (size_t)-1) {
+                               errno = EILSEQ;
+                               return (NULL);
+                       }
+               }
+       }
+       if ((convbuf = malloc(nbytes + 1)) == NULL)
+               return (NULL);
+
+       /* Fill the output buffer. */
+       p = wcsarg;
+       memset(&mbs, 0, sizeof(mbs));
+       if ((nbytes = wcsrtombs(convbuf, (const wchar_t **)&p,
+           nbytes, &mbs)) == (size_t)-1) {
+               free(convbuf);
+               errno = EILSEQ;
+               return (NULL);
+       }
+       convbuf[nbytes] = '\0';
+       return (convbuf);
+}
+#endif
 
 #ifdef FLOATING_POINT
 #include <float.h>
@@ -260,7 +329,9 @@ __vfprintf(FILE *fp, const char *fmt0, _
        size_t argtablesiz;
        int nextarg;            /* 1-based argument index */
        va_list orgap;          /* original argument pointer */
-
+#ifdef PRINTF_WIDE_CHAR
+       char *convbuf;          /* buffer for wide to multi-byte conversion */
+#endif
        /*
         * Choose PADSIZE to trade efficiency vs. size.  If larger printf
         * fields occur frequently, increase PADSIZE and make the initialisers
@@ -402,7 +473,9 @@ __vfprintf(FILE *fp, const char *fmt0, _
        uio.uio_resid = 0;
        uio.uio_iovcnt = 0;
        ret = 0;
-
+#ifdef PRINTF_WIDE_CHAR
+       convbuf = NULL;
+#endif
        memset(&ps, 0, sizeof(ps));
        /*
         * Scan the format for conversions (`%' character).
@@ -553,8 +626,28 @@ reswitch:  switch (ch) {
                        flags |= SIZEINT;
                        goto rflag;
                case 'c':
-                       *(cp = buf) = GETARG(int);
-                       size = 1;
+#ifdef PRINTF_WIDE_CHAR
+                       if (flags & LONGINT) {
+                               mbstate_t mbs;
+                               size_t mbseqlen;
+
+                               memset(&mbs, 0, sizeof(mbs));
+                               mbseqlen = wcrtomb(buf,
+                                   (wchar_t)GETARG(wint_t), &mbs);
+                               if (mbseqlen == (size_t)-1) {
+                                       fp->_flags |= __SERR;
+                                       errno = EILSEQ;
+                                       goto error;
+                               }
+                               cp = buf;
+                               size = (int)mbseqlen;
+                       } else {
+#endif
+                               *(cp = buf) = GETARG(int);
+                               size = 1;
+#ifdef PRINTF_WIDE_CHAR
+                       }
+#endif
                        sign = '\0';
                        break;
                case 'D':
@@ -744,6 +837,26 @@ fp_common:
                        ox[1] = 'x';
                        goto nosign;
                case 's':
+#ifdef PRINTF_WIDE_CHAR
+                       if (flags & LONGINT) {
+                               wchar_t *wcp;
+
+                               if (convbuf != NULL) {
+                                       free(convbuf);
+                                       convbuf = NULL;
+                               }
+                               if ((wcp = GETARG(wchar_t *)) == NULL) {
+                                       cp = "(null)";
+                               } else {
+                                       convbuf = __wcsconv(wcp, prec);
+                                       if (convbuf == NULL) {
+                                               fp->_flags = __SERR;
+                                               goto error;
+                                       }
+                                       cp = convbuf;
+                               }
+                       } else
+#endif /* PRINTF_WIDE_CHAR */
                        if ((cp = GETARG(char *)) == NULL)
                                cp = "(null)";
                        if (prec >= 0) {
@@ -995,6 +1108,8 @@ finish:
 #define TP_MAXINT      24
 #define T_CHAR         25
 #define T_U_CHAR       26
+#define T_WINT         27
+#define TP_WCHAR       28
 
 /*
  * Find all arguments when a positional parameter is encountered.  Returns a
@@ -1160,7 +1275,12 @@ reswitch:        switch (ch) {
                        flags |= SIZEINT;
                        goto rflag;
                case 'c':
-                       ADDTYPE(T_INT);
+#ifdef PRINTF_WIDE_CHAR
+                       if (flags & LONGINT)
+                               ADDTYPE(T_WINT);
+                       else
+#endif
+                               ADDTYPE(T_INT);
                        break;
                case 'D':
                        flags |= LONGINT;
@@ -1210,7 +1330,12 @@ reswitch:        switch (ch) {
                        ADDTYPE(TP_VOID);
                        break;
                case 's':
-                       ADDTYPE(TP_CHAR);
+#ifdef PRINTF_WIDE_CHAR
+                       if (flags & LONGINT)
+                               ADDTYPE(TP_WCHAR);
+                       else
+#endif
+                               ADDTYPE(TP_CHAR);
                        break;
                case 'U':
                        flags |= LONGINT;
@@ -1311,6 +1436,14 @@ done:
                case TP_MAXINT:
                        (*argtable)[n].intmaxarg = va_arg(ap, intmax_t);
                        break;
+#ifdef PRINTF_WIDE_CHAR
+               case T_WINT:
+                       (*argtable)[n].wintarg = va_arg(ap, wint_t);
+                       break;
+               case TP_WCHAR:
+                       (*argtable)[n].pwchararg = va_arg(ap, wchar_t *);
+                       break;
+#endif
                }
        }
        goto finish;

Reply via email to