Okay, once again _with_ patch: We have been discussing this topic in the past: a stricter check should be applied to the request line, in order to prevent arbitrary user input to end up in the access_log and error_log. It could be misused to spoof accesses to nonexistent (or inaccessible) resources, of course without the client actually getting access to them.
The solution I present here exists of two separate issues: 1) Detecting and disallowing garbage in the request line. Stuff like the 'planted' string: % echo 'GET / HTTP/1.0" 304 -\r207.46.197.102 - - [01/May/2002:00:02:25 +0200] "GET / HTTP/1.0\n' \ | netcat www.apache.org 80 until now looks like there was an access from 207.46.197.102, at least to the admin doing a "tail access_log", and maybe to some log analyzers too. So, any garbage after the correct syntax for a request <method> <uri> <protocol> (like in "GET / HTTP/1.1") should be disallowed and logged. 2) Escaping dangerous characters in the log files. Today, you can cause almost any character to appear in the logs. Simply do a "echo '<ESC>[2J' | netcat localhost 80" and if the administrator has a "tail -f access_log" running in some window, it will be cleared at this point. Now we don't want to break existing log analyzers, but escaping such b******t is definitely useful. What I do in this patch is to add another escaping class T_ESCAPE_LOGITEM and use it on items that *might* have come from the user (i.e., the 'tainted' items request line, request MIME header lines, possibly DNS reverse lookup names, ...). I escape invalid chars in \xXX syntax, except for '"', '\\' itself and '\r', '\n', '\b', '\t' which are simply escaped by preceding them with a '\\'. Thus it is possible to distinguish a (client-provided) '\\' from a '\\' inserted by the escaper. The extra escaping of '"' was added to prevent spoofing attemps (in the CLF, the request line is enclosed in '"', and it was easy to add a '"' to the request and confuse the reader of the log file, see example above). The patch does not (yet) try to filter *every* log item through the log filter, because apache users may want to add special characters to the log at their discretion (using LogFormat). But the typical abuse is caught (and logged) in a sensible manner. Comments? Martin -- <[EMAIL PROTECTED]> | Fujitsu Siemens Fon: +49-89-636-46021, FAX: +49-89-636-47655 | 81730 Munich, Germany
Index: include/httpd.h =================================================================== RCS file: /home/cvs/apachen/src/include/httpd.h,v retrieving revision 1.185 diff -u -r1.185 httpd.h --- include/httpd.h 9 Apr 2002 09:31:13 -0000 1.185 +++ include/httpd.h 13 May 2002 11:22:16 -0000 @@ -1032,6 +1032,7 @@ API_EXPORT(char *) ap_escape_html(pool *p, const char *s); API_EXPORT(char *) ap_construct_server(pool *p, const char *hostname, unsigned port, const request_rec *r); +API_EXPORT(char *) ap_escape_logitem(pool *p, const char *str); API_EXPORT(char *) ap_escape_shell_cmd(pool *p, const char *s); API_EXPORT(int) ap_count_dirs(const char *path); Index: main/gen_test_char.c =================================================================== RCS file: /home/cvs/apachen/src/main/gen_test_char.c,v retrieving revision 1.7 diff -u -r1.7 gen_test_char.c --- main/gen_test_char.c 22 Mar 2002 20:53:04 -0000 1.7 +++ main/gen_test_char.c 13 May 2002 11:22:16 -0000 @@ -8,6 +8,7 @@ #define T_ESCAPE_PATH_SEGMENT (0x02) #define T_OS_ESCAPE_PATH (0x04) #define T_HTTP_TOKEN_STOP (0x08) +#define T_ESCAPE_LOGITEM (0x10) int main(int argc, char *argv[]) { @@ -16,25 +17,27 @@ printf( "/* this file is automatically generated by gen_test_char, do not edit */\n" -"#define T_ESCAPE_SHELL_CMD (%u)\n" -"#define T_ESCAPE_PATH_SEGMENT (%u)\n" -"#define T_OS_ESCAPE_PATH (%u)\n" -"#define T_HTTP_TOKEN_STOP (%u)\n" -"\n" -"static const unsigned char test_char_table[256] = {\n" -" 0,", +"#define T_ESCAPE_SHELL_CMD 0x%02x /* chars with special meaning in the shell */\n" +"#define T_ESCAPE_PATH_SEGMENT 0x%02x /* find path segment, as defined in RFC1808 +*/\n" +"#define T_OS_ESCAPE_PATH 0x%02x /* escape characters in a path or uri */\n" +"#define T_HTTP_TOKEN_STOP 0x%02x /* find http tokens, as defined in RFC2616 */\n" +"#define T_ESCAPE_LOGITEM 0x%02x /* filter what should go in the log file */\n" +"\n", T_ESCAPE_SHELL_CMD, T_ESCAPE_PATH_SEGMENT, T_OS_ESCAPE_PATH, - T_HTTP_TOKEN_STOP); + T_HTTP_TOKEN_STOP, + T_ESCAPE_LOGITEM + ); /* we explicitly dealt with NUL above * in case some strchr() do bogosity with it */ + printf("static const unsigned char test_char_table[256] = {\n" + " 0x00, "); /* print initial item */ + for (c = 1; c < 256; ++c) { flags = 0; - if (c % 20 == 0) - printf("\n "); /* escape_shell_cmd */ #if defined(WIN32) || defined(OS2) @@ -67,8 +70,19 @@ if (ap_iscntrl(c) || strchr(" \t()<>@,;:\\/[]?={}", c)) { flags |= T_HTTP_TOKEN_STOP; } - printf("%u%c", flags, (c < 255) ? ',' : ' '); + /* For logging, escape all control characters, + * double quotes (because they delimit the request in the log file) + * backslashes (because we use backslash for escaping) + * and 8-bit chars with the high bit set + */ + if (!ap_isprint(c) || c == '"' || c == '\\' || ap_iscntrl(c)) { + flags |= T_ESCAPE_LOGITEM; + } + printf("0x%02x%s", flags, (c < 255) ? ", " : " "); + + if ((c % 8) == 7) + printf(" /*0x%02x...0x%02x*/\n ", c-7, c); } printf("\n};\n"); Index: main/http_protocol.c =================================================================== RCS file: /home/cvs/apachen/src/main/http_protocol.c,v retrieving revision 1.166 diff -u -r1.166 http_protocol.c --- main/http_protocol.c 9 Apr 2002 11:50:59 -0000 1.166 +++ main/http_protocol.c 13 May 2002 11:22:16 -0000 @@ -983,7 +983,7 @@ const char *uri; conn_rec *conn = r->connection; unsigned int major = 1, minor = 0; /* Assume HTTP/1.0 if non-"HTTP" protocol */ - int len; + int len, n; /* Read past empty lines until we get a real request line, * a read error, the connection closes (EOF), or we timeout. @@ -1045,12 +1045,26 @@ r->assbackwards = (ll[0] == '\0'); r->protocol = ap_pstrdup(r->pool, ll[0] ? ll : "HTTP/0.9"); - if (2 == sscanf(r->protocol, "HTTP/%u.%u", &major, &minor) + if (3 == sscanf(r->protocol, "HTTP/%u.%u%n", &major, &minor, &n) && minor < HTTP_VERSION(1,0)) /* don't allow HTTP/0.1000 */ r->proto_num = HTTP_VERSION(major, minor); else r->proto_num = HTTP_VERSION(1,0); + /* Check for a valid protocol, and disallow everything but whitespace + * after the protocol string */ + while (ap_isspace(r->protocol[n])) + ++n; + if (r->protocol[n] != '\0') { + r->status = HTTP_BAD_REQUEST; + r->proto_num = HTTP_VERSION(1,0); + r->protocol = ap_pstrdup(r->pool, "HTTP/1.0"); + ap_table_setn(r->notes, "error-notes", + "The request line contained invalid characters " + "following the protocol string.<P>\n"); + return 0; + } + return 1; } @@ -1165,6 +1179,14 @@ ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, r, "request failed: URI too long"); + ap_send_error_response(r, 0); + ap_log_transaction(r); + return r; + } + else if (r->status == HTTP_BAD_REQUEST) { + ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, r, + "request failed: erroneous characters after protocol string: +%s", + ap_escape_logitem(r->pool, r->the_request)); ap_send_error_response(r, 0); ap_log_transaction(r); return r; Index: main/util.c =================================================================== RCS file: /home/cvs/apachen/src/main/util.c,v retrieving revision 1.140 diff -u -r1.140 util.c --- main/util.c 22 Mar 2002 20:53:01 -0000 1.140 +++ main/util.c 13 May 2002 11:22:17 -0000 @@ -1446,6 +1446,59 @@ return (strncasecmp(&line[lidx], tok, tlen) == 0); } + +/* escape a string for logging */ +API_EXPORT(char *) ap_escape_logitem(pool *p, const char *str) +{ + char *ret; + unsigned char *d; + const unsigned char *s; + + if (str == NULL) + return NULL; + + ret = ap_palloc(p, 4 * strlen(str) + 1); /* Be safe */ + d = (unsigned char *)ret; + s = (const unsigned char *)str; + for (; *s; ++s) { + + if (TEST_CHAR(*s, T_ESCAPE_LOGITEM)) { + *d++ = '\\'; + switch(*s) { + case '\b': + *d++ = 'b'; + break; + case '\n': + *d++ = 'n'; + break; + case '\r': + *d++ = 'r'; + break; + case '\t': + *d++ = 't'; + break; + case '\v': + *d++ = 'v'; + break; + case '\\': + case '"': + *d++ = *s; + break; + default: + c2x(*s, d); + *d = 'x'; + d += 3; + } + } + else + *d++ = *s; + } + *d = '\0'; + + return ret; +} + + API_EXPORT(char *) ap_escape_shell_cmd(pool *p, const char *str) { char *cmd; Index: modules/standard/mod_log_config.c =================================================================== RCS file: /home/cvs/apachen/src/modules/standard/mod_log_config.c,v retrieving revision 1.55 diff -u -r1.55 mod_log_config.c --- modules/standard/mod_log_config.c 14 Mar 2002 08:53:59 -0000 1.55 +++ modules/standard/mod_log_config.c 13 May 2002 11:22:17 -0000 @@ -291,8 +291,8 @@ static const char *log_remote_host(request_rec *r, char *a) { - return ap_get_remote_host(r->connection, r->per_dir_config, - REMOTE_NAME); + return ap_escape_logitem(r->pool, ap_get_remote_host(r->connection, +r->per_dir_config, + REMOTE_NAME)); } static const char *log_remote_address(request_rec *r, char *a) @@ -307,7 +307,7 @@ static const char *log_remote_logname(request_rec *r, char *a) { - return ap_get_remote_logname(r); + return ap_escape_logitem(r->pool, ap_get_remote_logname(r)); } static const char *log_remote_user(request_rec *r, char *a) @@ -320,6 +320,8 @@ else if (strlen(rvalue) == 0) { rvalue = "\"\""; } + else + rvalue = ap_escape_logitem(r->pool, rvalue); return rvalue; } @@ -330,10 +332,12 @@ * (note the truncation before the protocol string for HTTP/0.9 requests) * (note also that r->the_request contains the unmodified request) */ - return (r->parsed_uri.password) ? ap_pstrcat(r->pool, r->method, " ", + return ap_escape_logitem(r->pool, + (r->parsed_uri.password) ? ap_pstrcat(r->pool, r->method, +" ", ap_unparse_uri_components(r->pool, &r->parsed_uri, 0), r->assbackwards ? NULL : " ", r->protocol, NULL) - : r->the_request; + : r->the_request + ); } static const char *log_request_file(request_rec *r, char *a) @@ -342,19 +346,20 @@ } static const char *log_request_uri(request_rec *r, char *a) { - return r->uri; + return ap_escape_logitem(r->pool, r->uri); } static const char *log_request_method(request_rec *r, char *a) { - return r->method; + return ap_escape_logitem(r->pool, r->method); } static const char *log_request_protocol(request_rec *r, char *a) { - return r->protocol; + return ap_escape_logitem(r->pool, r->protocol); } static const char *log_request_query(request_rec *r, char *a) { - return (r->args != NULL) ? ap_pstrcat(r->pool, "?", r->args, NULL) + return (r->args != NULL) ? ap_pstrcat(r->pool, "?", + ap_escape_logitem(r->pool, r->args), NULL) : ""; } static const char *log_status(request_rec *r, char *a) @@ -389,7 +394,7 @@ static const char *log_header_in(request_rec *r, char *a) { - return ap_table_get(r->headers_in, a); + return ap_escape_logitem(r->pool, ap_table_get(r->headers_in, a)); } static const char *log_header_out(request_rec *r, char *a) @@ -470,6 +475,7 @@ { return ap_psprintf(r->pool, "%ld", (long) getpid()); } + static const char *log_connection_status(request_rec *r, char *a) { if (r->connection->aborted) @@ -482,6 +488,7 @@ return "-"; } + /***************************************************************** * * Parsing the log format string Index: support/httpd.exp =================================================================== RCS file: /home/cvs/apachen/src/support/httpd.exp,v retrieving revision 1.38 diff -u -r1.38 httpd.exp --- support/httpd.exp 9 Apr 2002 11:51:01 -0000 1.38 +++ support/httpd.exp 13 May 2002 11:22:18 -0000 @@ -106,6 +106,7 @@ ap_each_byterange ap_error_log2stderr ap_escape_html +ap_escape_logitem ap_escape_path_segment ap_escape_quotes ap_escape_shell_cmd