The -w flag to jot() allows the user to specify a format string that
will be passed to printf after it was verified that it contains only one
conversion specifier.  There are some subtle differences what jot's
getformat() accepts and what printf will do. Thus, I took vfprintf.c and
carved out whatever is unneeded for jot itself. The result is a slightly
less ugly function in a separate file.

Index: Makefile
===================================================================
RCS file: /var/cvs/src/usr.bin/jot/Makefile,v
retrieving revision 1.5
diff -u -p -r1.5 Makefile
--- Makefile    10 Jan 2016 01:15:52 -0000      1.5
+++ Makefile    3 Sep 2016 15:48:06 -0000
@@ -1,6 +1,7 @@
 #      $OpenBSD: Makefile,v 1.5 2016/01/10 01:15:52 tb Exp $
 
 PROG=  jot
+SRCS=  getformat.c jot.c
 CFLAGS+= -Wall
 LDADD+=        -lm
 DPADD+=        ${LIBM}
Index: getformat.c
===================================================================
RCS file: getformat.c
diff -N getformat.c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ getformat.c 3 Sep 2016 15:55:21 -0000
@@ -0,0 +1,188 @@
+/*     $OpenBSD$       */
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Based on src/lib/libc/stdio/vfprintf.c r1.77 */
+
+#include <err.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+
+extern int     prec;
+extern bool    boring;
+extern bool    chardata;
+extern bool    infinity;
+extern bool    intdata;
+extern bool    longdata;
+extern bool    nosign;
+extern bool    randomize;
+
+int getformat(char *);
+
+/*
+ * Macros for converting digits to letters and vice versa
+ */
+#define        to_digit(c)     ((c) - '0')
+#define        is_digit(c)     ((unsigned)to_digit(c) <= 9)
+
+int
+getformat(char *fmt)
+{
+       int ch;                 /* character from fmt */
+       wchar_t wc;
+       mbstate_t ps;
+       size_t sz;
+       bool firsttime = true;
+
+       sz = sizeof(fmt) - strlen(fmt) - 1;
+
+       memset(&ps, 0, sizeof(ps));
+       /*
+        * Scan the format for conversions (`%' character).
+        */
+       for (;;) {
+               size_t len;
+
+               while ((len = mbrtowc(&wc, fmt, MB_CUR_MAX, &ps)) != 0) {
+                       if (len == (size_t)-1 || len == (size_t)-2) {
+                               err(1, "illegal or incomplete byte sequence "
+                                   "in format string");
+                       }
+                       if (wc == '%') {
+                               if (*(fmt + 1) == '\0') {
+                                       /* no trailing '%' allowed */
+                                       if (sz <= 0)
+                                               errx(1, "-w word too long");
+                                       strlcat(fmt, "%", sizeof fmt);
+                                       return 0;
+                               } else if (*(fmt + 1) != '%')
+                                       break;
+                               fmt += 2;       /* skip over "%%" */
+                       } else
+                               fmt += len;
+               }
+
+               if (!firsttime) {
+                       if (len == 0)
+                               return 0;
+                       return 1;
+               }
+
+               if (len == 0) {
+                       if (chardata) {
+                               if (strlcpy(fmt, "%c", sz) >= sz)
+                                       errx(1, "-w word too long");
+                               intdata = true;
+                       } else {
+                               int n;
+                               n = snprintf(fmt, sz, "%%.%df", prec);
+                               if (n == -1 || n >= (int)sz)
+                                       errx(1, "-w word too long");
+                       }
+                       return 0;
+               }
+
+               fmt++; /* skip over % */
+rflag:         ch = *fmt++;
+reswitch:      switch (ch) {
+               case ' ':
+               case '#':
+               case '-':
+               case '+':
+                       goto rflag;
+               case '.':
+                       if ((ch = *fmt++) == '*')
+                               return 1;
+                       while (is_digit(ch))
+                               ch = *fmt++;
+                       if (ch == '$')
+                               return 1;
+                       goto reswitch;
+               case '0':
+                       /*
+                        * ``Note that 0 is taken as a flag, not as the
+                        * beginning of a field width.''
+                        *      -- ANSI X3J11
+                        */
+                       goto rflag;
+               case '1': case '2': case '3': case '4':
+               case '5': case '6': case '7': case '8': case '9':
+                       do {
+                               ch = *fmt++;
+                       } while (is_digit(ch));
+                       if (ch == '$')
+                               return 1;
+                       goto reswitch;
+               case 'l':
+                       if (*fmt == 'l')
+                               return 1;
+                       longdata = true;
+                       goto rflag;
+               case 'a':
+               case 'A':
+               case 'e':
+               case 'E':
+               case 'f':
+               case 'F':
+               case 'g':
+               case 'G':
+                       break;
+               case 'c':
+                       chardata = true;
+                       break;
+               case 'D':
+                       longdata = true;
+                       /* FALLTHROUGH */
+               case 'd':
+               case 'i':
+                       intdata = true;
+                       break;
+               case 'O':
+               case 'U':
+                       longdata = true;
+                       /* FALLTHROUGH */
+               case 'o':
+               case 'u':
+               case 'X':
+               case 'x':
+                       intdata = nosign = true;
+                       break;
+               default:
+                       return 1;
+               }
+
+               firsttime = false;
+       }
+}
Index: jot.c
===================================================================
RCS file: /var/cvs/src/usr.bin/jot/jot.c,v
retrieving revision 1.36
diff -u -p -r1.36 jot.c
--- jot.c       2 Sep 2016 14:23:09 -0000       1.36
+++ jot.c       3 Sep 2016 15:48:06 -0000
@@ -61,17 +61,18 @@ static double       step    = 1;
 
 static char    format[BUFSIZ];
 static char    sepstring[BUFSIZ] = "\n";
-static int     prec = -1;
-static bool    boring;
-static bool    chardata;
-static bool    finalnl = true;
-static bool    infinity;
-static bool    intdata;
-static bool    longdata;
-static bool    nosign;
-static bool    randomize;
 
-static void    getformat(void);
+int    prec;
+bool   boring;
+bool   chardata;
+bool   finalnl = true;
+bool   infinity;
+bool   intdata;
+bool   longdata;
+bool   nosign;
+bool   randomize;
+
+extern int     getformat(char *);
 static int     getprec(char *);
 static int     putdata(double, bool);
 static void __dead     usage(void);
@@ -90,6 +91,10 @@ main(int argc, char *argv[])
        if (pledge("stdio", NULL) == -1)
                err(1, "pledge");
 
+       boring = chardata = infinity = intdata = longdata = nosign = randomize
+           = false;
+       prec = -1;
+
        while ((ch = getopt(argc, argv, "b:cnp:rs:w:")) != -1) {
                switch (ch) {
                case 'b':
@@ -176,7 +181,8 @@ main(int argc, char *argv[])
                    argv[4]);
        }
 
-       getformat();
+       if (!boring && getformat(format))
+               errx(1, "illegal or unsupported format '%s'", format);
 
        if (!randomize) {
                /*
@@ -350,110 +356,4 @@ getprec(char *s)
        if ((s = strchr(s, '.')) == NULL)
                return 0;
        return strspn(s + 1, "0123456789");
-}
-
-static void
-getformat(void)
-{
-       char    *p, *p2;
-       int dot, hash, space, sign, numbers = 0;
-       size_t sz;
-
-       if (boring)                             /* no need to bother */
-               return;
-       for (p = format; *p != '\0'; p++)       /* look for '%' */
-               if (*p == '%') {
-                       if (*(p+1) != '%')
-                               break;
-                       p++;                    /* leave %% alone */
-               }
-       sz = sizeof(format) - strlen(format) - 1;
-       if (*p == '\0' && !chardata) {
-               int n;
-
-               n = snprintf(p, sz, "%%.%df", prec);
-               if (n == -1 || n >= (int)sz)
-                       errx(1, "-w word too long");
-       } else if (*p == '\0' && chardata) {
-               if (strlcpy(p, "%c", sz) >= sz)
-                       errx(1, "-w word too long");
-               intdata = true;
-       } else if (*(p+1) == '\0') {
-               if (sz <= 0)
-                       errx(1, "-w word too long");
-               /* cannot end in single '%' */
-               strlcat(format, "%", sizeof format);
-       } else {
-               /*
-                * Allow conversion format specifiers of the form
-                * %[#][ ][{+,-}][0-9]*[.[0-9]*]? where ? must be one of
-                * [l]{d,i,o,u,x} or {f,e,g,E,G,d,o,x,D,O,U,X,c,u}
-                */
-               p2 = p++;
-               dot = hash = space = sign = numbers = 0;
-               while (!isalpha((unsigned char)*p)) {
-                       if (isdigit((unsigned char)*p)) {
-                               numbers++;
-                               p++;
-                       } else if ((*p == '#' && !(numbers|dot|sign|space|
-                           hash++)) ||
-                           (*p == ' ' && !(numbers|dot|space++)) ||
-                           ((*p == '+' || *p == '-') && !(numbers|dot|sign++))
-                           || (*p == '.' && !(dot++)))
-                               p++;
-                       else
-                               goto fmt_broken;
-               }
-               if (*p == 'l') {
-                       longdata = true;
-                       if (*++p == 'l') {
-                               if (p[1] != '\0')
-                                       p++;
-                               goto fmt_broken;
-                       }
-               }
-               switch (*p) {
-               case 'o': case 'u': case 'x': case 'X':
-                       intdata = nosign = true;
-                       break;
-               case 'd': case 'i':
-                       intdata = true;
-                       break;
-               case 'D':
-                       if (!longdata) {
-                               intdata = true;
-                               break;
-                       }
-               case 'O': case 'U':
-                       if (!longdata) {
-                               intdata = nosign = true;
-                               break;
-                       }
-               case 'c':
-                       if (!(intdata | longdata)) {
-                               chardata = true;
-                               break;
-                       }
-               case 'h': case 'n': case 'p': case 'q': case 's': case 'L':
-               case '$': case '*':
-                       goto fmt_broken;
-               case 'f': case 'e': case 'g': case 'E': case 'G':
-                       if (!longdata)
-                               break;
-                       /* FALLTHROUGH */
-               default:
-fmt_broken:
-                       *++p = '\0';
-                       errx(1, "illegal or unsupported format '%s'", p2);
-               }
-               while (*++p != '\0')
-                       if (*p == '%' && *(p+1) != '\0' && *(p+1) != '%')
-                               errx(1, "too many conversions");
-                       else if (*p == '%' && *(p+1) == '%')
-                               p++;
-                       else if (*p == '%' && *(p+1) == '\0') {
-                               strlcat(format, "%", sizeof format);
-                               break;
-                       }
-       }
 }

Reply via email to