This is the second version of the patch.
 - The documentation in cf.data.pre fixed
 - The new  rfc1738_do_escape is used

Regards,
   Christos
=== modified file 'src/cf.data.pre'
--- src/cf.data.pre	2011-02-07 10:27:53 +0000
+++ src/cf.data.pre	2011-03-07 18:05:48 +0000
@@ -7267,54 +7267,57 @@
 DOC_START
 	WHOIS server to query for AS numbers.  NOTE: AS numbers are
 	queried only when Squid starts up, not for every request.
 DOC_END
 
 NAME: offline_mode
 TYPE: onoff
 LOC: Config.onoff.offline
 DEFAULT: off
 DOC_START
 	Enable this option and Squid will never try to validate cached
 	objects.
 DOC_END
 
 NAME: uri_whitespace
 TYPE: uri_whitespace
 LOC: Config.uri_whitespace
 DEFAULT: strip
 DOC_START
 	What to do with requests that have whitespace characters in the
-	URI.  Options:
+	URI. This option also affect the logged URI with %ru formating
+	code.  Options:
 
 	strip:  The whitespace characters are stripped out of the URL.
 		This is the behavior recommended by RFC2396.
 	deny:   The request is denied.  The user receives an "Invalid
-		Request" message.
+		Request" message. The whitespaces are removed before
+		logged.
 	allow:  The request is allowed and the URI is not changed.  The
-		whitespace characters remain in the URI.  Note the
-		whitespace is passed to redirector processes if they
-		are in use.
+		whitespace characters remain in the URI. 
+		WARNING: The whitespace is passed to redirector processes
+		if they are in use.
+		WARNING: The URI is logged with the whitespaces.
 	encode:	The request is allowed and the whitespace characters are
 		encoded according to RFC1738.  This could be considered
-		a violation of the HTTP/1.1
-		RFC because proxies are not allowed to rewrite URI's.
+		a violation of the HTTP/1.1 RFC because proxies are not 
+		allowed to rewrite URI's.
 	chop:	The request is allowed and the URI is chopped at the
 		first whitespace.  This might also be considered a
 		violation.
 DOC_END
 
 NAME: chroot
 TYPE: string
 LOC: Config.chroot_dir
 DEFAULT: none
 DOC_START
 	Specifies a directory where Squid should do a chroot() while
 	initializing.  This also causes Squid to fully drop root
 	privileges after initializing.  This means, for example, if you
 	use a HTTP port less than 1024 and try to reconfigure, you may
 	get an error saying that Squid can not open the port.
 DOC_END
 
 NAME: balance_on_multiple_ip
 TYPE: onoff
 LOC: Config.onoff.balance_on_multiple_ip

=== modified file 'src/client_side.cc'
--- src/client_side.cc	2011-03-05 02:00:32 +0000
+++ src/client_side.cc	2011-03-07 18:14:34 +0000
@@ -1898,48 +1898,81 @@
         end = uriAndHTTPVersion + strcspn(uriAndHTTPVersion, "\r\n");
         assert(end);
     }
 
     for (; end > uriAndHTTPVersion; end--) {
         if (*end == '\n' || *end == '\r')
             continue;
 
         if (xisspace(*end)) {
             if (strncasecmp(end + 1, "HTTP/", 5) == 0)
                 return end + 1;
             else
                 break;
         }
     }
 
     return NULL;
 }
 
 void
-setLogUri(ClientHttpRequest * http, char const *uri)
+setLogUri(ClientHttpRequest * http, char const *uri, bool cleanUrl)
 {
     safe_free(http->log_uri);
 
-    if (!stringHasCntl(uri))
+    if (!cleanUrl)
+        // The uri is already clean just dump it.
         http->log_uri = xstrndup(uri, MAX_URL);
-    else
-        http->log_uri = xstrndup(rfc1738_escape_unescaped(uri), MAX_URL);
+    else {
+            int flags = 0;
+            switch (Config.uri_whitespace) {
+            case URI_WHITESPACE_ALLOW:
+                flags |= RFC1738_ESCAPE_NOSPACE;
+            case URI_WHITESPACE_ENCODE:
+                flags |= RFC1738_ESCAPE_UNESCAPED;
+                http->log_uri = xstrndup(rfc1738_do_escape(uri, flags), MAX_URL);
+                break;
+                
+            case URI_WHITESPACE_CHOP: {
+                int pos = strcspn(uri, w_space);
+                char *q = xstrndup(uri, pos < MAX_URL? pos : MAX_URL);
+                http->log_uri = xstrndup(rfc1738_escape_unescaped(q), MAX_URL);
+            }
+                break;
+                
+            case URI_WHITESPACE_DENY:
+            case URI_WHITESPACE_STRIP:
+            default: {
+                const char *t;
+                char *q = static_cast<char*>(xmalloc(strlen(uri) + 1));
+                t = uri;
+                while (*t) {
+                    if (!xisspace(*t))
+                        *q++ = *t;
+                    t++;
+                }
+                *q = '\0';
+                http->log_uri = xstrndup(rfc1738_escape_unescaped(q), MAX_URL);
+            }
+                break;
+            }
+    }
 }
 
 static void
 prepareAcceleratedURL(ConnStateData * conn, ClientHttpRequest *http, char *url, const char *req_hdr)
 {
     int vhost = conn->port->vhost;
     int vport = conn->port->vport;
     char *host;
     char ipbuf[MAX_IPSTRLEN];
 
     http->flags.accel = 1;
 
     /* BUG: Squid cannot deal with '*' URLs (RFC2616 5.1.2) */
 
     if (strncasecmp(url, "cache_object://", 15) == 0)
         return; /* already in good shape */
 
     if (*url != '/') {
         if (conn->port->vhost)
             return; /* already in good shape */
@@ -2208,41 +2241,40 @@
 
     } else if (conn->port->accel || conn->switchedToHttps()) {
         /* accelerator mode */
         prepareAcceleratedURL(conn, http, url, req_hdr);
 
     } else if (internalCheck(url)) {
         /* internal URL mode */
         /* prepend our name & port */
         http->uri = xstrdup(internalLocalUri(NULL, url));
         http->flags.accel = 1;
     }
 
     if (!http->uri) {
         /* No special rewrites have been applied above, use the
          * requested url. may be rewritten later, so make extra room */
         int url_sz = strlen(url) + Config.appendDomainLen + 5;
         http->uri = (char *)xcalloc(url_sz, 1);
         strcpy(http->uri, url);
     }
 
-    setLogUri(http, http->uri);
     debugs(33, 5, "parseHttpRequest: Complete request received");
     result->flags.parsed_ok = 1;
     xfree(url);
     return result;
 }
 
 int
 ConnStateData::getAvailableBufferLength() const
 {
     int result = in.allocatedSize - in.notYetUsed - 1;
     assert (result >= 0);
     return result;
 }
 
 bool
 ConnStateData::maybeMakeSpaceAvailable()
 {
     if (getAvailableBufferLength() < 2) {
         size_t newSize;
         if (in.allocatedSize >= Config.maxRequestBufferSize) {
@@ -2379,93 +2411,101 @@
 }
 
 static void
 clientProcessRequest(ConnStateData *conn, HttpParser *hp, ClientSocketContext *context, const HttpRequestMethod& method, HttpVersion http_ver)
 {
     ClientHttpRequest *http = context->http;
     HttpRequest *request = NULL;
     bool notedUseOfBuffer = false;
     bool chunked = false;
     bool mustReplyToOptions = false;
     bool unsupportedTe = false;
     bool expectBody = false;
 
     /* We have an initial client stream in place should it be needed */
     /* setup our private context */
     context->registerWithConn();
 
     if (context->flags.parsed_ok == 0) {
         clientStreamNode *node = context->getClientReplyContext();
         debugs(33, 1, "clientProcessRequest: Invalid Request");
+        // setLogUri should called before repContext->setReplyToError
+        setLogUri(http, http->uri,  true);
         clientReplyContext *repContext = dynamic_cast<clientReplyContext *>(node->data.getRaw());
         assert (repContext);
         switch (hp->request_parse_status) {
         case HTTP_HEADER_TOO_LARGE:
             repContext->setReplyToError(ERR_TOO_BIG, HTTP_BAD_REQUEST, method, http->uri, conn->peer, NULL, conn->in.buf, NULL);
             break;
         case HTTP_METHOD_NOT_ALLOWED:
             repContext->setReplyToError(ERR_UNSUP_REQ, HTTP_METHOD_NOT_ALLOWED, method, http->uri, conn->peer, NULL, conn->in.buf, NULL);
             break;
         default:
             repContext->setReplyToError(ERR_INVALID_REQ, HTTP_BAD_REQUEST, method, http->uri, conn->peer, NULL, conn->in.buf, NULL);
         }
         assert(context->http->out.offset == 0);
         context->pullData();
         conn->flags.readMoreRequests = false;
         goto finish;
     }
 
     if ((request = HttpRequest::CreateFromUrlAndMethod(http->uri, method)) == NULL) {
         clientStreamNode *node = context->getClientReplyContext();
         debugs(33, 5, "Invalid URL: " << http->uri);
+        // setLogUri should called before repContext->setReplyToError
+        setLogUri(http, http->uri,  true);
         clientReplyContext *repContext = dynamic_cast<clientReplyContext *>(node->data.getRaw());
         assert (repContext);
         repContext->setReplyToError(ERR_INVALID_URL, HTTP_BAD_REQUEST, method, http->uri, conn->peer, NULL, NULL, NULL);
         assert(context->http->out.offset == 0);
         context->pullData();
         conn->flags.readMoreRequests = false;
         goto finish;
     }
 
     /* RFC 2616 section 10.5.6 : handle unsupported HTTP versions cleanly. */
     /* We currently only accept 0.9, 1.0, 1.1 */
     if ( (http_ver.major == 0 && http_ver.minor != 9) ||
             (http_ver.major == 1 && http_ver.minor > 1 ) ||
             (http_ver.major > 1) ) {
 
         clientStreamNode *node = context->getClientReplyContext();
         debugs(33, 5, "Unsupported HTTP version discovered. :\n" << HttpParserHdrBuf(hp));
+        // setLogUri should called before repContext->setReplyToError
+        setLogUri(http, http->uri,  true);
         clientReplyContext *repContext = dynamic_cast<clientReplyContext *>(node->data.getRaw());
         assert (repContext);
         repContext->setReplyToError(ERR_UNSUP_HTTPVERSION, HTTP_HTTP_VERSION_NOT_SUPPORTED, method, http->uri, conn->peer, NULL, HttpParserHdrBuf(hp), NULL);
         assert(context->http->out.offset == 0);
         context->pullData();
         conn->flags.readMoreRequests = false;
         goto finish;
     }
 
     /* compile headers */
     /* we should skip request line! */
     /* XXX should actually know the damned buffer size here */
     if (http_ver.major >= 1 && !request->parseHeader(HttpParserHdrBuf(hp), HttpParserHdrSz(hp))) {
         clientStreamNode *node = context->getClientReplyContext();
         debugs(33, 5, "Failed to parse request headers:\n" << HttpParserHdrBuf(hp));
+        // setLogUri should called before repContext->setReplyToError
+        setLogUri(http, http->uri,  true);
         clientReplyContext *repContext = dynamic_cast<clientReplyContext *>(node->data.getRaw());
         assert (repContext);
         repContext->setReplyToError(ERR_INVALID_REQ, HTTP_BAD_REQUEST, method, http->uri, conn->peer, NULL, NULL, NULL);
         assert(context->http->out.offset == 0);
         context->pullData();
         conn->flags.readMoreRequests = false;
         goto finish;
     }
 
     request->flags.accelerated = http->flags.accel;
     request->flags.ignore_cc = conn->port->ignore_cc;
     request->flags.no_direct = request->flags.accelerated ? !conn->port->allow_direct : 0;
 
     /** \par
      * If transparent or interception mode is working clone the transparent and interception flags
      * from the port settings to the request.
      */
     if (Ip::Interceptor.InterceptActive()) {
         request->flags.intercepted = http->flags.intercepted;
     }

=== modified file 'src/client_side.h'
--- src/client_side.h	2011-02-07 10:27:53 +0000
+++ src/client_side.h	2011-03-06 18:06:29 +0000
@@ -304,25 +304,25 @@
 private:
     int connReadWasError(comm_err_t flag, int size, int xerrno);
     int connFinishedWithConn(int size);
     void clientMaybeReadData(int do_next_read);
     void clientAfterReadingRequests(int do_next_read);
 
 private:
     CBDATA_CLASS2(ConnStateData);
     bool transparent_;
     bool closing_;
 
     bool switchedToHttps_;
     String sslHostName; ///< Host name for SSL certificate generation
     AsyncCall::Pointer reader; ///< set when we are reading
     BodyPipe::Pointer bodyPipe; // set when we are reading request body
 };
 
 /* convenience class while splitting up body handling */
 /* temporary existence only - on stack use expected */
 
-void setLogUri(ClientHttpRequest * http, char const *uri);
+void setLogUri(ClientHttpRequest * http, char const *uri, bool cleanUrl = false);
 
 const char *findTrailingHTTPVersion(const char *uriAndHTTPVersion, const char *end = NULL);
 
 #endif /* SQUID_CLIENTSIDE_H */

Reply via email to