On Sat, Sep 03, 2016 at 04:55:59PM +0100, Theo Buehler wrote: > The -w flag to jot() allows the user to specify a format string that > will be passed to printf after it was verified that it contains only one > conversion specifier. There are some subtle differences what jot's > getformat() accepts and what printf will do. Thus, I took vfprintf.c and > carved out whatever is unneeded for jot itself. The result is a slightly > less ugly function in a separate file.
Please see some comments below. I really tried to understand all the corner cases in the getformat() function, but couldn't wrap my head around it. I believe it would be best to just axe the -w flag. Yes, there are probably scripts out there using it, but I think carrying that burden around is not worth it. Every possible implementation either is an adapted reimplementation of printf, or whitewashing the string before passing it to printf(). I would like to remind of the patch(1) ed script issue that resulted in shell injection, just because the whitewash code and the actual parser where not in sync. It's a losing game. natano > > Index: Makefile > =================================================================== > RCS file: /var/cvs/src/usr.bin/jot/Makefile,v > retrieving revision 1.5 > diff -u -p -r1.5 Makefile > --- Makefile 10 Jan 2016 01:15:52 -0000 1.5 > +++ Makefile 3 Sep 2016 15:48:06 -0000 > @@ -1,6 +1,7 @@ > # $OpenBSD: Makefile,v 1.5 2016/01/10 01:15:52 tb Exp $ > > PROG= jot > +SRCS= getformat.c jot.c > CFLAGS+= -Wall > LDADD+= -lm > DPADD+= ${LIBM} > Index: getformat.c > =================================================================== > RCS file: getformat.c > diff -N getformat.c > --- /dev/null 1 Jan 1970 00:00:00 -0000 > +++ getformat.c 3 Sep 2016 15:55:21 -0000 > @@ -0,0 +1,188 @@ > +/* $OpenBSD$ */ > +/*- > + * Copyright (c) 1990 The Regents of the University of California. > + * All rights reserved. > + * > + * This code is derived from software contributed to Berkeley by > + * Chris Torek. > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions > + * are met: > + * 1. Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * 2. Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * 3. Neither the name of the University nor the names of its contributors > + * may be used to endorse or promote products derived from this software > + * without specific prior written permission. > + * > + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND > + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE > + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE > + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE > + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL > + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS > + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) > + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT > + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY > + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF > + * SUCH DAMAGE. > + */ > + > +/* Based on src/lib/libc/stdio/vfprintf.c r1.77 */ > + > +#include <err.h> > +#include <stdbool.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <wchar.h> > + > +extern int prec; > +extern bool boring; > +extern bool chardata; > +extern bool infinity; > +extern bool intdata; > +extern bool longdata; > +extern bool nosign; > +extern bool randomize; 'boring', 'infinity' and 'randomize' are unused in getformat.c. > + > +int getformat(char *); > + > +/* > + * Macros for converting digits to letters and vice versa > + */ > +#define to_digit(c) ((c) - '0') > +#define is_digit(c) ((unsigned)to_digit(c) <= 9) I would prefer this to be less magic. How about something like this instead? #define is_digit(c) ((c) >= '0' && (c) <= '9') This would also allow to remove the to_digit() macro. > + > +int > +getformat(char *fmt) > +{ > + int ch; /* character from fmt */ > + wchar_t wc; > + mbstate_t ps; > + size_t sz; > + bool firsttime = true; > + > + sz = sizeof(fmt) - strlen(fmt) - 1; The sizeof() doesn't do what you expect it to do here. 'fmt' is just a pointer here, so the value returned is far to low. What we want is the size of the original array instead. $ jot -w 'xxx' 10 jot: -w word too long > + > + memset(&ps, 0, sizeof(ps)); > + /* > + * Scan the format for conversions (`%' character). > + */ > + for (;;) { > + size_t len; > + > + while ((len = mbrtowc(&wc, fmt, MB_CUR_MAX, &ps)) != 0) { > + if (len == (size_t)-1 || len == (size_t)-2) { > + err(1, "illegal or incomplete byte sequence " > + "in format string"); > + } > + if (wc == '%') { > + if (*(fmt + 1) == '\0') { > + /* no trailing '%' allowed */ > + if (sz <= 0) > + errx(1, "-w word too long"); > + strlcat(fmt, "%", sizeof fmt); > + return 0; > + } else if (*(fmt + 1) != '%') > + break; > + fmt += 2; /* skip over "%%" */ > + } else > + fmt += len; > + } > + > + if (!firsttime) { > + if (len == 0) > + return 0; > + return 1; Previously the error message for 'jot -w %d%d 10' was "jot: too many conversions", now it is "jot: illegal or unsupported format '%d%d'". I think the previous error was more helpful. > + } > + > + if (len == 0) { > + if (chardata) { > + if (strlcpy(fmt, "%c", sz) >= sz) > + errx(1, "-w word too long"); > + intdata = true; > + } else { > + int n; > + n = snprintf(fmt, sz, "%%.%df", prec); > + if (n == -1 || n >= (int)sz) > + errx(1, "-w word too long"); > + } > + return 0; > + } > + > + fmt++; /* skip over % */ > +rflag: ch = *fmt++; > +reswitch: switch (ch) { > + case ' ': > + case '#': > + case '-': > + case '+': > + goto rflag; > + case '.': > + if ((ch = *fmt++) == '*') > + return 1; > + while (is_digit(ch)) > + ch = *fmt++; > + if (ch == '$') What is this check supposed to do? There is no '$' case, so the default will be invoked after 'got reswitch;'. > + return 1; > + goto reswitch; > + case '0': > + /* > + * ``Note that 0 is taken as a flag, not as the > + * beginning of a field width.'' > + * -- ANSI X3J11 > + */ > + goto rflag; > + case '1': case '2': case '3': case '4': > + case '5': case '6': case '7': case '8': case '9': > + do { > + ch = *fmt++; > + } while (is_digit(ch)); > + if (ch == '$') ditto. > + return 1; > + goto reswitch; > + case 'l': > + if (*fmt == 'l') > + return 1; > + longdata = true; > + goto rflag; > + case 'a': > + case 'A': > + case 'e': > + case 'E': > + case 'f': > + case 'F': > + case 'g': > + case 'G': > + break; > + case 'c': > + chardata = true; > + break; > + case 'D': > + longdata = true; > + /* FALLTHROUGH */ > + case 'd': > + case 'i': > + intdata = true; > + break; > + case 'O': > + case 'U': > + longdata = true; > + /* FALLTHROUGH */ > + case 'o': > + case 'u': > + case 'X': > + case 'x': > + intdata = nosign = true; > + break; > + default: > + return 1; > + } > + > + firsttime = false; > + } > +} > Index: jot.c > =================================================================== > RCS file: /var/cvs/src/usr.bin/jot/jot.c,v > retrieving revision 1.36 > diff -u -p -r1.36 jot.c > --- jot.c 2 Sep 2016 14:23:09 -0000 1.36 > +++ jot.c 3 Sep 2016 15:48:06 -0000 > @@ -61,17 +61,18 @@ static double step = 1; > > static char format[BUFSIZ]; > static char sepstring[BUFSIZ] = "\n"; > -static int prec = -1; > -static bool boring; > -static bool chardata; > -static bool finalnl = true; > -static bool infinity; > -static bool intdata; > -static bool longdata; > -static bool nosign; > -static bool randomize; 'boring', 'infinity' and 'randomize' are not used in getformat.c, so they can stay static. > > -static void getformat(void); > +int prec; > +bool boring; > +bool chardata; > +bool finalnl = true; > +bool infinity; > +bool intdata; > +bool longdata; > +bool nosign; > +bool randomize; > + > +extern int getformat(char *); > static int getprec(char *); > static int putdata(double, bool); > static void __dead usage(void); > @@ -90,6 +91,10 @@ main(int argc, char *argv[]) > if (pledge("stdio", NULL) == -1) > err(1, "pledge"); > > + boring = chardata = infinity = intdata = longdata = nosign = randomize > + = false; > + prec = -1; > + > while ((ch = getopt(argc, argv, "b:cnp:rs:w:")) != -1) { > switch (ch) { > case 'b': > @@ -176,7 +181,8 @@ main(int argc, char *argv[]) > argv[4]); > } > > - getformat(); > + if (!boring && getformat(format)) > + errx(1, "illegal or unsupported format '%s'", format); > > if (!randomize) { > /* > @@ -350,110 +356,4 @@ getprec(char *s) > if ((s = strchr(s, '.')) == NULL) > return 0; > return strspn(s + 1, "0123456789"); > -} > - > -static void > -getformat(void) > -{ > - char *p, *p2; > - int dot, hash, space, sign, numbers = 0; > - size_t sz; > - > - if (boring) /* no need to bother */ > - return; > - for (p = format; *p != '\0'; p++) /* look for '%' */ > - if (*p == '%') { > - if (*(p+1) != '%') > - break; > - p++; /* leave %% alone */ > - } > - sz = sizeof(format) - strlen(format) - 1; > - if (*p == '\0' && !chardata) { > - int n; > - > - n = snprintf(p, sz, "%%.%df", prec); > - if (n == -1 || n >= (int)sz) > - errx(1, "-w word too long"); > - } else if (*p == '\0' && chardata) { > - if (strlcpy(p, "%c", sz) >= sz) > - errx(1, "-w word too long"); > - intdata = true; > - } else if (*(p+1) == '\0') { > - if (sz <= 0) > - errx(1, "-w word too long"); > - /* cannot end in single '%' */ > - strlcat(format, "%", sizeof format); > - } else { > - /* > - * Allow conversion format specifiers of the form > - * %[#][ ][{+,-}][0-9]*[.[0-9]*]? where ? must be one of > - * [l]{d,i,o,u,x} or {f,e,g,E,G,d,o,x,D,O,U,X,c,u} > - */ > - p2 = p++; > - dot = hash = space = sign = numbers = 0; > - while (!isalpha((unsigned char)*p)) { > - if (isdigit((unsigned char)*p)) { > - numbers++; > - p++; > - } else if ((*p == '#' && !(numbers|dot|sign|space| > - hash++)) || > - (*p == ' ' && !(numbers|dot|space++)) || > - ((*p == '+' || *p == '-') && !(numbers|dot|sign++)) > - || (*p == '.' && !(dot++))) > - p++; > - else > - goto fmt_broken; > - } > - if (*p == 'l') { > - longdata = true; > - if (*++p == 'l') { > - if (p[1] != '\0') > - p++; > - goto fmt_broken; > - } > - } > - switch (*p) { > - case 'o': case 'u': case 'x': case 'X': > - intdata = nosign = true; > - break; > - case 'd': case 'i': > - intdata = true; > - break; > - case 'D': > - if (!longdata) { > - intdata = true; > - break; > - } > - case 'O': case 'U': > - if (!longdata) { > - intdata = nosign = true; > - break; > - } > - case 'c': > - if (!(intdata | longdata)) { > - chardata = true; > - break; > - } > - case 'h': case 'n': case 'p': case 'q': case 's': case 'L': > - case '$': case '*': > - goto fmt_broken; > - case 'f': case 'e': case 'g': case 'E': case 'G': > - if (!longdata) > - break; > - /* FALLTHROUGH */ > - default: > -fmt_broken: > - *++p = '\0'; > - errx(1, "illegal or unsupported format '%s'", p2); > - } > - while (*++p != '\0') > - if (*p == '%' && *(p+1) != '\0' && *(p+1) != '%') > - errx(1, "too many conversions"); > - else if (*p == '%' && *(p+1) == '%') > - p++; > - else if (*p == '%' && *(p+1) == '\0') { > - strlcat(format, "%", sizeof format); > - break; > - } > - } > } >