On Sun, Aug 22, 2004 at 08:02:54PM +0200, Jan Minar wrote: > +/* vasprintf() requires _GNU_SOURCE. Which is OK with Debian. */ > +#ifndef _GNU_SOURCE > +#define _GNU_SOURCE
This must be done before stdio.h is included.
> +#endif
> +#include <ctype.h>
> +
> #ifndef errno
> extern int errno;
> #endif
> @@ -345,7 +351,49 @@
> int expected_size;
> int allocated;
> };
> +
> +/* XXX Where does the declaration belong?? */
> +void escape_buffer (char **src);
>
> +/*
> + * escape_untrusted -- escape using '\NNN'. To be used wherever we want to
> + * print untrusted data.
> + *
> + * Syntax: escape_buffer (&buf-to-escape);
> + */
> +void escape_buffer (char **src)
> +{
> + char *dest;
> + int i, j;
> +
> + /* We encode each byte using at most 4 bytes, + trailing '\0'. */
> + dest = xmalloc (4 * strlen (*src) + 1);
> +
> + for (i = j = 0; (*src)[i] != '\0'; ++i) {
> + /*
> + * We allow any non-control character, because LINE TABULATION
> + * & friends can't do more harm than SPACE. And someone
> + * somewhere might be using these, so unless we actually can't
> + * protect against spoofing attacks, we don't pretend we can.
> + *
> + * Note that '\n' is included both in the isspace() *and*
> + * iscntrl() range.
> + */
> + if (isprint((*src)[i]) || isspace((*src)[i])) {
This lets '\r' thru, not good. BTW, (*src)[i] is quite a cypher.
> + dest[j++] = (*src)[i];
> + } else {
> + dest[j++] = '\\';
> + dest[j++] = '0' + (((*src)[i] & 0xff) >> 6);
> + dest[j++] = '0' + (((*src)[i] & 0x3f) >> 3);
> + dest[j++] = '0' + ((*src)[i] & 7);
> + }
> + }
> + dest[j] = '\0';
> +
> + xfree (*src);
> + *src = dest;
> +}
Attached is version 2, which solves these problems.
Please keep me CC'd.
Jan.
--
"To me, clowns aren't funny. In fact, they're kind of scary. I've wondered
where this started and I think it goes back to the time I went to the circus,
and a clown killed my dad."
--- wget-1.9.1.ORIG/src/log.c 2004-08-22 13:42:33.000000000 +0200
+++ wget-1.9.1-jan/src/log.c 2004-08-24 02:38:38.000000000 +0200
@@ -42,6 +42,12 @@
# endif
#endif /* not WGET_USE_STDARG */
+/* vasprintf() requires _GNU_SOURCE. Which is OK with Debian. */
+/* This *must* be defined before stdio.h is included. */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+
#include <stdio.h>
#ifdef HAVE_STRING_H
# include <string.h>
@@ -63,6 +69,8 @@
#include "wget.h"
#include "utils.h"
+#include <ctype.h>
+
#ifndef errno
extern int errno;
#endif
@@ -345,7 +353,69 @@
int expected_size;
int allocated;
};
+
+/* XXX Where does the declaration belong?? */
+void escape_buffer (char **src);
+/*
+ * escape_buffer -- escape using '\NNN'. To be used wherever we want to print
+ * untrusted data.
+ *
+ * Syntax: escape_buffer (&buf-to-escape);
+ */
+void escape_buffer (char **src)
+{
+ char *dest, c;
+ int i, j;
+
+ /* We encode each byte using at most 4 bytes, + trailing '\0'. */
+ dest = xmalloc (4 * strlen (*src) + 1);
+
+ for (i = j = 0; (c = (*src)[i]) != '\0'; ++i) {
+ /*
+ * We allow any non-control character, because '\t' & friends
+ * can't do more harm than SPACE. And someone somewhere might
+ * be using these, so unless we actually can protect against
+ * spoofing attacks, we don't pretend it.
+ *
+ * Note that '\n' is included both in the isspace() *and*
+ * iscntrl() range.
+ *
+ * We try not to allow '\r' & friends by using isblank()
+ * instead of isspace(). Let's hope noone will complain about
+ * '\v' & similar being filtered (the characters we may still
+ * let thru can vary among locales, so there is not much we can
+ * do about this *from within logvprintf()*.
+ */
+ if (c == '\r' && *(&c + 1) == '\n') {
+ /*
+ * I've spotted wget printing CRLF line terminators
+ * while communicating with ftp://ftp.debian.org. This
+ * is a bug: wget should print whatever the platform
+ * line terminator is (CR on Mac, CRLF on CP/M, LF on
+ * Un*x, etc.)
+ *
+ * We work around this bug here by taking CRLF for a
+ * line terminator. A lone CR is still treated as a
+ * control character.
+ */
+ i++;
+ dest[j++] = '\n';
+ } else if (isprint(c) || isblank(c) || c == '\n') {
+ dest[j++] = c;
+ } else {
+ dest[j++] = '\\';
+ dest[j++] = '0' + ((c & 0xff) >> 6);
+ dest[j++] = '0' + ((c & 0x3f) >> 3);
+ dest[j++] = '0' + (c & 7);
+ }
+ }
+ dest[j] = '\0';
+
+ xfree (*src);
+ *src = dest;
+}
+
/* Print a message to the log. A copy of message will be saved to
saved_log, for later reusal by log_dump_context().
@@ -364,15 +434,28 @@
int available_size = sizeof (smallmsg);
int numwritten;
FILE *fp = get_log_fp ();
+ char *buf;
+
+ /* int vasprintf(char **strp, const char *fmt, va_list ap); */
+ if (vasprintf (&buf , fmt, args) == -1) {
+ perror (_("Error"));
+ exit (1);
+ }
+
+ escape_buffer (&buf);
if (!save_context_p)
{
/* In the simple case just call vfprintf(), to avoid needless
allocation and games with vsnprintf(). */
- vfprintf (fp, fmt, args);
- goto flush;
- }
+ /* vfprintf() didn't check return value, neither will we */
+ (void) fprintf(fp, "%s", buf);
+ }
+ else /* goto flush; */ /* There's no need to use goto here */
+/* This else-clause purposefully shifted 4 columns to the left, so that the
+ * diff is easy to read --Jan */
+{
if (state->allocated != 0)
{
write_ptr = state->bigmsg;
@@ -384,8 +467,12 @@
missing from legacy systems. Therefore I consider it safe to
assume that its return value is meaningful. On the systems where
vsnprintf() is not available, we use the implementation from
- snprintf.c which does return the correct value. */
- numwritten = vsnprintf (write_ptr, available_size, fmt, args);
+ snprintf.c which does return the correct value.
+
+ With snprintf(), this probably doesn't hold anymore. But this is Debian,
+ so who cares. */
+
+ numwritten = snprintf (write_ptr, available_size, "%s", buf);
/* vsnprintf() will not step over the limit given by available_size.
If it fails, it will return either -1 (POSIX?) or the number of
@@ -420,7 +507,7 @@
if (state->bigmsg)
xfree (state->bigmsg);
- flush:
+} /* flush: */
if (flush_log_p)
logflush ();
else
pgpEOeXj5QjBu.pgp
Description: PGP signature
