Okay, once again _with_ patch:

We have been discussing this topic in the past: a stricter check
should be applied to the request line, in order to prevent arbitrary
user input to end up in the access_log and error_log. It could be
misused to spoof accesses to nonexistent (or inaccessible) resources,
of course without the client actually getting access to them.

The solution I present here exists of two separate issues:

1) Detecting and disallowing garbage in the request line.
   Stuff like the 'planted' string:
    % echo 'GET / HTTP/1.0" 304 -\r207.46.197.102 - - [01/May/2002:00:02:25 +0200] 
"GET / HTTP/1.0\n' \
      | netcat www.apache.org 80
   until now looks like there was an access from 207.46.197.102,
   at least to the admin doing a "tail access_log", and maybe to
   some log analyzers too. So, any garbage after the correct
   syntax for a request
     <method> <uri> <protocol>
   (like in "GET / HTTP/1.1") should be disallowed and logged.

2) Escaping dangerous characters in the log files.
   Today, you can cause almost any character to appear in the logs.
   Simply do a "echo '<ESC>[2J' | netcat localhost 80" and if the
   administrator has a "tail -f access_log" running in some window,
   it will be cleared at this point.
   Now we don't want to break existing log analyzers, but escaping
   such b******t is definitely useful. What I do in this patch is
   to add another escaping class T_ESCAPE_LOGITEM and use it on
   items that *might* have come from the user (i.e., the 'tainted'
   items request line, request MIME header lines, possibly DNS
   reverse lookup names, ...). I escape invalid chars in \xXX
   syntax, except for '"', '\\' itself and '\r', '\n', '\b', '\t'
   which are simply escaped by preceding them with a '\\'. Thus it is
   possible to distinguish a (client-provided) '\\' from a '\\' inserted
   by the escaper. The extra escaping of '"' was added to prevent
   spoofing attemps (in the CLF, the request line is enclosed in '"',
   and it was easy to add a '"' to the request and confuse the reader
   of the log file, see example above).

The patch does not (yet) try to filter *every* log item through the
log filter, because apache users may want to add special characters
to the log at their discretion (using LogFormat). But the typical
abuse is caught (and logged) in a sensible manner.

Comments?

    Martin
-- 
<[EMAIL PROTECTED]>         |     Fujitsu Siemens
Fon: +49-89-636-46021, FAX: +49-89-636-47655 | 81730  Munich,  Germany
Index: include/httpd.h
===================================================================
RCS file: /home/cvs/apachen/src/include/httpd.h,v
retrieving revision 1.185
diff -u -r1.185 httpd.h
--- include/httpd.h     9 Apr 2002 09:31:13 -0000       1.185
+++ include/httpd.h     13 May 2002 11:22:16 -0000
@@ -1032,6 +1032,7 @@
 API_EXPORT(char *) ap_escape_html(pool *p, const char *s);
 API_EXPORT(char *) ap_construct_server(pool *p, const char *hostname,
                                    unsigned port, const request_rec *r);
+API_EXPORT(char *) ap_escape_logitem(pool *p, const char *str);
 API_EXPORT(char *) ap_escape_shell_cmd(pool *p, const char *s);
 
 API_EXPORT(int) ap_count_dirs(const char *path);
Index: main/gen_test_char.c
===================================================================
RCS file: /home/cvs/apachen/src/main/gen_test_char.c,v
retrieving revision 1.7
diff -u -r1.7 gen_test_char.c
--- main/gen_test_char.c        22 Mar 2002 20:53:04 -0000      1.7
+++ main/gen_test_char.c        13 May 2002 11:22:16 -0000
@@ -8,6 +8,7 @@
 #define T_ESCAPE_PATH_SEGMENT  (0x02)
 #define T_OS_ESCAPE_PATH       (0x04)
 #define T_HTTP_TOKEN_STOP      (0x08)
+#define T_ESCAPE_LOGITEM       (0x10)
 
 int main(int argc, char *argv[])
 {
@@ -16,25 +17,27 @@
 
     printf(
 "/* this file is automatically generated by gen_test_char, do not edit */\n"
-"#define T_ESCAPE_SHELL_CMD    (%u)\n"
-"#define T_ESCAPE_PATH_SEGMENT (%u)\n"
-"#define T_OS_ESCAPE_PATH      (%u)\n"
-"#define T_HTTP_TOKEN_STOP     (%u)\n"
-"\n"
-"static const unsigned char test_char_table[256] = {\n"
-"    0,",
+"#define T_ESCAPE_SHELL_CMD    0x%02x /* chars with special meaning in the shell */\n"
+"#define T_ESCAPE_PATH_SEGMENT 0x%02x /* find path segment, as defined in RFC1808 
+*/\n"
+"#define T_OS_ESCAPE_PATH      0x%02x /* escape characters in a path or uri */\n"
+"#define T_HTTP_TOKEN_STOP     0x%02x /* find http tokens, as defined in RFC2616 */\n"
+"#define T_ESCAPE_LOGITEM      0x%02x /* filter what should go in the log file */\n"
+"\n",
        T_ESCAPE_SHELL_CMD,
        T_ESCAPE_PATH_SEGMENT,
        T_OS_ESCAPE_PATH,
-       T_HTTP_TOKEN_STOP);
+       T_HTTP_TOKEN_STOP,
+       T_ESCAPE_LOGITEM
+       );
 
     /* we explicitly dealt with NUL above
      * in case some strchr() do bogosity with it */
 
+    printf("static const unsigned char test_char_table[256] = {\n"
+          "    0x00, ");    /* print initial item */
+
     for (c = 1; c < 256; ++c) {
        flags = 0;
-       if (c % 20 == 0)
-           printf("\n    ");
 
        /* escape_shell_cmd */
 #if defined(WIN32) || defined(OS2)
@@ -67,8 +70,19 @@
        if (ap_iscntrl(c) || strchr(" \t()<>@,;:\\/[]?={}", c)) {
            flags |= T_HTTP_TOKEN_STOP;
        }
-       printf("%u%c", flags, (c < 255) ? ',' : ' ');
 
+       /* For logging, escape all control characters,
+        * double quotes (because they delimit the request in the log file)
+        * backslashes (because we use backslash for escaping)
+        * and 8-bit chars with the high bit set
+        */
+       if (!ap_isprint(c) || c == '"' || c == '\\' || ap_iscntrl(c)) {
+           flags |= T_ESCAPE_LOGITEM;
+       }
+       printf("0x%02x%s", flags, (c < 255) ? ", " : "  ");
+
+       if ((c % 8) == 7)
+           printf(" /*0x%02x...0x%02x*/\n    ", c-7, c);
     }
     printf("\n};\n");
 
Index: main/http_protocol.c
===================================================================
RCS file: /home/cvs/apachen/src/main/http_protocol.c,v
retrieving revision 1.166
diff -u -r1.166 http_protocol.c
--- main/http_protocol.c        9 Apr 2002 11:50:59 -0000       1.166
+++ main/http_protocol.c        13 May 2002 11:22:16 -0000
@@ -983,7 +983,7 @@
     const char *uri;
     conn_rec *conn = r->connection;
     unsigned int major = 1, minor = 0;   /* Assume HTTP/1.0 if non-"HTTP" protocol */
-    int len;
+    int len, n;
 
     /* Read past empty lines until we get a real request line,
      * a read error, the connection closes (EOF), or we timeout.
@@ -1045,12 +1045,26 @@
     r->assbackwards = (ll[0] == '\0');
     r->protocol = ap_pstrdup(r->pool, ll[0] ? ll : "HTTP/0.9");
 
-    if (2 == sscanf(r->protocol, "HTTP/%u.%u", &major, &minor)
+    if (3 == sscanf(r->protocol, "HTTP/%u.%u%n", &major, &minor, &n)
       && minor < HTTP_VERSION(1,0))    /* don't allow HTTP/0.1000 */
        r->proto_num = HTTP_VERSION(major, minor);
     else
        r->proto_num = HTTP_VERSION(1,0);
 
+    /* Check for a valid protocol, and disallow everything but whitespace
+     * after the protocol string */
+    while (ap_isspace(r->protocol[n]))
+        ++n;
+    if (r->protocol[n] != '\0') {
+        r->status    = HTTP_BAD_REQUEST;
+        r->proto_num = HTTP_VERSION(1,0);
+        r->protocol  = ap_pstrdup(r->pool, "HTTP/1.0");
+        ap_table_setn(r->notes, "error-notes",
+                      "The request line contained invalid characters "
+                      "following the protocol string.<P>\n");
+        return 0;
+    }
+
     return 1;
 }
 
@@ -1165,6 +1179,14 @@
 
             ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, r,
                          "request failed: URI too long");
+            ap_send_error_response(r, 0);
+            ap_log_transaction(r);
+            return r;
+        }
+        else if (r->status == HTTP_BAD_REQUEST) {
+            ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, r,
+                         "request failed: erroneous characters after protocol string: 
+%s",
+                        ap_escape_logitem(r->pool, r->the_request));
             ap_send_error_response(r, 0);
             ap_log_transaction(r);
             return r;
Index: main/util.c
===================================================================
RCS file: /home/cvs/apachen/src/main/util.c,v
retrieving revision 1.140
diff -u -r1.140 util.c
--- main/util.c 22 Mar 2002 20:53:01 -0000      1.140
+++ main/util.c 13 May 2002 11:22:17 -0000
@@ -1446,6 +1446,59 @@
     return (strncasecmp(&line[lidx], tok, tlen) == 0);
 }
 
+
+/* escape a string for logging */
+API_EXPORT(char *) ap_escape_logitem(pool *p, const char *str)
+{
+    char *ret;
+    unsigned char *d;
+    const unsigned char *s;
+
+    if (str == NULL)
+        return NULL;
+
+    ret = ap_palloc(p, 4 * strlen(str) + 1);   /* Be safe */
+    d = (unsigned char *)ret;
+    s = (const unsigned char *)str;
+    for (; *s; ++s) {
+
+       if (TEST_CHAR(*s, T_ESCAPE_LOGITEM)) {
+           *d++ = '\\';
+            switch(*s) {
+            case '\b':
+                *d++ = 'b';
+               break;
+            case '\n':
+                *d++ = 'n';
+               break;
+            case '\r':
+                *d++ = 'r';
+               break;
+            case '\t':
+                *d++ = 't';
+               break;
+            case '\v':
+                *d++ = 'v';
+               break;
+            case '\\':
+            case '"':
+                *d++ = *s;
+               break;
+           default:
+                c2x(*s, d);
+               *d = 'x';
+               d += 3;
+           }
+       }
+       else
+            *d++ = *s;
+    }
+    *d = '\0';
+
+    return ret;
+}
+
+
 API_EXPORT(char *) ap_escape_shell_cmd(pool *p, const char *str)
 {
     char *cmd;
Index: modules/standard/mod_log_config.c
===================================================================
RCS file: /home/cvs/apachen/src/modules/standard/mod_log_config.c,v
retrieving revision 1.55
diff -u -r1.55 mod_log_config.c
--- modules/standard/mod_log_config.c   14 Mar 2002 08:53:59 -0000      1.55
+++ modules/standard/mod_log_config.c   13 May 2002 11:22:17 -0000
@@ -291,8 +291,8 @@
 
 static const char *log_remote_host(request_rec *r, char *a)
 {
-    return ap_get_remote_host(r->connection, r->per_dir_config,
-                                    REMOTE_NAME);
+    return ap_escape_logitem(r->pool, ap_get_remote_host(r->connection, 
+r->per_dir_config,
+                                    REMOTE_NAME));
 }
 
 static const char *log_remote_address(request_rec *r, char *a)
@@ -307,7 +307,7 @@
 
 static const char *log_remote_logname(request_rec *r, char *a)
 {
-    return ap_get_remote_logname(r);
+    return ap_escape_logitem(r->pool, ap_get_remote_logname(r));
 }
 
 static const char *log_remote_user(request_rec *r, char *a)
@@ -320,6 +320,8 @@
     else if (strlen(rvalue) == 0) {
         rvalue = "\"\"";
     }
+    else
+        rvalue = ap_escape_logitem(r->pool, rvalue);
     return rvalue;
 }
 
@@ -330,10 +332,12 @@
             * (note the truncation before the protocol string for HTTP/0.9 requests)
             * (note also that r->the_request contains the unmodified request)
             */
-    return (r->parsed_uri.password) ? ap_pstrcat(r->pool, r->method, " ",
+    return ap_escape_logitem(r->pool,
+                            (r->parsed_uri.password) ? ap_pstrcat(r->pool, r->method, 
+" ",
                                         ap_unparse_uri_components(r->pool, 
&r->parsed_uri, 0),
                                         r->assbackwards ? NULL : " ", r->protocol, 
NULL)
-                                       : r->the_request;
+                                       : r->the_request
+                            );
 }
 
 static const char *log_request_file(request_rec *r, char *a)
@@ -342,19 +346,20 @@
 }
 static const char *log_request_uri(request_rec *r, char *a)
 {
-    return r->uri;
+    return ap_escape_logitem(r->pool, r->uri);
 }
 static const char *log_request_method(request_rec *r, char *a)
 {
-    return r->method;
+    return ap_escape_logitem(r->pool, r->method);
 }
 static const char *log_request_protocol(request_rec *r, char *a)
 {
-    return r->protocol;
+    return ap_escape_logitem(r->pool, r->protocol);
 }
 static const char *log_request_query(request_rec *r, char *a)
 {
-    return (r->args != NULL) ? ap_pstrcat(r->pool, "?", r->args, NULL)
+    return (r->args != NULL) ? ap_pstrcat(r->pool, "?",
+                                         ap_escape_logitem(r->pool, r->args), NULL)
                              : "";
 }
 static const char *log_status(request_rec *r, char *a)
@@ -389,7 +394,7 @@
 
 static const char *log_header_in(request_rec *r, char *a)
 {
-    return ap_table_get(r->headers_in, a);
+    return ap_escape_logitem(r->pool, ap_table_get(r->headers_in, a));
 }
 
 static const char *log_header_out(request_rec *r, char *a)
@@ -470,6 +475,7 @@
 {
     return ap_psprintf(r->pool, "%ld", (long) getpid());
 }
+
 static const char *log_connection_status(request_rec *r, char *a)
 {
     if (r->connection->aborted)
@@ -482,6 +488,7 @@
 
     return "-";
 }
+
 /*****************************************************************
  *
  * Parsing the log format string
Index: support/httpd.exp
===================================================================
RCS file: /home/cvs/apachen/src/support/httpd.exp,v
retrieving revision 1.38
diff -u -r1.38 httpd.exp
--- support/httpd.exp   9 Apr 2002 11:51:01 -0000       1.38
+++ support/httpd.exp   13 May 2002 11:22:18 -0000
@@ -106,6 +106,7 @@
 ap_each_byterange
 ap_error_log2stderr
 ap_escape_html
+ap_escape_logitem
 ap_escape_path_segment
 ap_escape_quotes
 ap_escape_shell_cmd

Reply via email to