On Sun, Aug 22, 2004 at 08:02:54PM +0200, Jan Minar wrote: > +/* vasprintf() requires _GNU_SOURCE. Which is OK with Debian. */ > +#ifndef _GNU_SOURCE > +#define _GNU_SOURCE
This must be done before stdio.h is included. > +#endif > +#include <ctype.h> > + > #ifndef errno > extern int errno; > #endif > @@ -345,7 +351,49 @@ > int expected_size; > int allocated; > }; > + > +/* XXX Where does the declaration belong?? */ > +void escape_buffer (char **src); > > +/* > + * escape_untrusted -- escape using '\NNN'. To be used wherever we want to > + * print untrusted data. > + * > + * Syntax: escape_buffer (&buf-to-escape); > + */ > +void escape_buffer (char **src) > +{ > + char *dest; > + int i, j; > + > + /* We encode each byte using at most 4 bytes, + trailing '\0'. */ > + dest = xmalloc (4 * strlen (*src) + 1); > + > + for (i = j = 0; (*src)[i] != '\0'; ++i) { > + /* > + * We allow any non-control character, because LINE TABULATION > + * & friends can't do more harm than SPACE. And someone > + * somewhere might be using these, so unless we actually can't > + * protect against spoofing attacks, we don't pretend we can. > + * > + * Note that '\n' is included both in the isspace() *and* > + * iscntrl() range. > + */ > + if (isprint((*src)[i]) || isspace((*src)[i])) { This lets '\r' thru, not good. BTW, (*src)[i] is quite a cypher. > + dest[j++] = (*src)[i]; > + } else { > + dest[j++] = '\\'; > + dest[j++] = '0' + (((*src)[i] & 0xff) >> 6); > + dest[j++] = '0' + (((*src)[i] & 0x3f) >> 3); > + dest[j++] = '0' + ((*src)[i] & 7); > + } > + } > + dest[j] = '\0'; > + > + xfree (*src); > + *src = dest; > +} Attached is version 2, which solves these problems. Please keep me CC'd. Jan. -- "To me, clowns aren't funny. In fact, they're kind of scary. I've wondered where this started and I think it goes back to the time I went to the circus, and a clown killed my dad."
--- wget-1.9.1.ORIG/src/log.c 2004-08-22 13:42:33.000000000 +0200 +++ wget-1.9.1-jan/src/log.c 2004-08-24 02:38:38.000000000 +0200 @@ -42,6 +42,12 @@ # endif #endif /* not WGET_USE_STDARG */ +/* vasprintf() requires _GNU_SOURCE. Which is OK with Debian. */ +/* This *must* be defined before stdio.h is included. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + #include <stdio.h> #ifdef HAVE_STRING_H # include <string.h> @@ -63,6 +69,8 @@ #include "wget.h" #include "utils.h" +#include <ctype.h> + #ifndef errno extern int errno; #endif @@ -345,7 +353,69 @@ int expected_size; int allocated; }; + +/* XXX Where does the declaration belong?? */ +void escape_buffer (char **src); +/* + * escape_buffer -- escape using '\NNN'. To be used wherever we want to print + * untrusted data. + * + * Syntax: escape_buffer (&buf-to-escape); + */ +void escape_buffer (char **src) +{ + char *dest, c; + int i, j; + + /* We encode each byte using at most 4 bytes, + trailing '\0'. */ + dest = xmalloc (4 * strlen (*src) + 1); + + for (i = j = 0; (c = (*src)[i]) != '\0'; ++i) { + /* + * We allow any non-control character, because '\t' & friends + * can't do more harm than SPACE. And someone somewhere might + * be using these, so unless we actually can protect against + * spoofing attacks, we don't pretend it. + * + * Note that '\n' is included both in the isspace() *and* + * iscntrl() range. + * + * We try not to allow '\r' & friends by using isblank() + * instead of isspace(). Let's hope noone will complain about + * '\v' & similar being filtered (the characters we may still + * let thru can vary among locales, so there is not much we can + * do about this *from within logvprintf()*. + */ + if (c == '\r' && *(&c + 1) == '\n') { + /* + * I've spotted wget printing CRLF line terminators + * while communicating with ftp://ftp.debian.org. This + * is a bug: wget should print whatever the platform + * line terminator is (CR on Mac, CRLF on CP/M, LF on + * Un*x, etc.) + * + * We work around this bug here by taking CRLF for a + * line terminator. A lone CR is still treated as a + * control character. + */ + i++; + dest[j++] = '\n'; + } else if (isprint(c) || isblank(c) || c == '\n') { + dest[j++] = c; + } else { + dest[j++] = '\\'; + dest[j++] = '0' + ((c & 0xff) >> 6); + dest[j++] = '0' + ((c & 0x3f) >> 3); + dest[j++] = '0' + (c & 7); + } + } + dest[j] = '\0'; + + xfree (*src); + *src = dest; +} + /* Print a message to the log. A copy of message will be saved to saved_log, for later reusal by log_dump_context(). @@ -364,15 +434,28 @@ int available_size = sizeof (smallmsg); int numwritten; FILE *fp = get_log_fp (); + char *buf; + + /* int vasprintf(char **strp, const char *fmt, va_list ap); */ + if (vasprintf (&buf , fmt, args) == -1) { + perror (_("Error")); + exit (1); + } + + escape_buffer (&buf); if (!save_context_p) { /* In the simple case just call vfprintf(), to avoid needless allocation and games with vsnprintf(). */ - vfprintf (fp, fmt, args); - goto flush; - } + /* vfprintf() didn't check return value, neither will we */ + (void) fprintf(fp, "%s", buf); + } + else /* goto flush; */ /* There's no need to use goto here */ +/* This else-clause purposefully shifted 4 columns to the left, so that the + * diff is easy to read --Jan */ +{ if (state->allocated != 0) { write_ptr = state->bigmsg; @@ -384,8 +467,12 @@ missing from legacy systems. Therefore I consider it safe to assume that its return value is meaningful. On the systems where vsnprintf() is not available, we use the implementation from - snprintf.c which does return the correct value. */ - numwritten = vsnprintf (write_ptr, available_size, fmt, args); + snprintf.c which does return the correct value. + + With snprintf(), this probably doesn't hold anymore. But this is Debian, + so who cares. */ + + numwritten = snprintf (write_ptr, available_size, "%s", buf); /* vsnprintf() will not step over the limit given by available_size. If it fails, it will return either -1 (POSIX?) or the number of @@ -420,7 +507,7 @@ if (state->bigmsg) xfree (state->bigmsg); - flush: +} /* flush: */ if (flush_log_p) logflush (); else
pgpEOeXj5QjBu.pgp
Description: PGP signature