Hi Willy -

Sorry about the delay. I had to work on some other projects, and just
got back to this.
As usual, thank you for the feedback on the commit.

On Wed, Apr 15, 2015 at 4:12 AM, Willy Tarreau <[email protected]> wrote:
>
> - it did not apply to mainline, I had to apply it by hand, so I suspect
>   that you did it against 1.5 instead of 1.6. Any contrib must be done
>   on 1.6 (dev branch), including fixes, and if needed they're backported
>   later. This ensures we never lose a fix or feature when upgrading.

Apologies for that, I was indeed working against 1.5.

> There, if I send "GET \r\n\r\n", what will happen is that both spc and end
> will point to the same space, resulting in <nchar> being -1, so you can
> already see the segfault in memmove() and later. Also you need to keep in
> mind that multiple spaces are tolerated and that tabs are tolerated as
> well, but they're encoded as "#09" after encode_string().

I wasn't aware that either "GET \r\n\r\n" or tabs were valid in the HTTP
request line, but if the HAProxy parser tolerates it then the logging should
definitely not blow up if such a request comes through!

I've attached a patch that I believe addresses all of your feedback.
Let me know what you thinks!


-- 
- Andrew Hayworth


>From 01db55d61f9efcfe6133126ab17ca8bd22dbb1bf Mon Sep 17 00:00:00 2001
From: Andrew Hayworth <[email protected]>
Date: Mon, 27 Apr 2015 21:37:03 +0000
Subject: [PATCH] Add HTTP request-line log format directives

This commit adds 4 new log format variables that parse the
HTTP Request-Line for more specific logging than "%r" provides.

For example, we can parse the following HTTP Request-Line with
these new variables:

  "GET /foo?bar=baz HTTP/1.1"

- %HM: HTTP Method ("GET")
- %HV: HTTP Version ("HTTP/1.1")
- %HR: HTTP Request-URI ("/foo?bar=baz")
- %HP: HTTP Request-URI without query string ("/foo")
---
 doc/configuration.txt |   4 ++
 include/types/log.h   |   4 ++
 src/log.c             | 139 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 147 insertions(+)

diff --git a/doc/configuration.txt b/doc/configuration.txt
index a37f54c..8e090b6 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -13203,6 +13203,10 @@ Please refer to the table below for currently
defined variables :
   |   | %t   | date_time      (with millisecond resolution)  | date        |
   |   | %ts  | termination_state                             | string      |
   | H | %tsc | termination_state with cookie status          | string      |
+  | H | %HM  | HTTP method (ex: POST)                        | string      |
+  | H | %HV  | HTTP version (ex: HTTP/1.0)                   | string      |
+  | H | %HR  | HTTP request URI (ex: /foo?bar=baz)           | string      |
+  | H | %HP  | HTTP request URI without query string         | string      |
   +---+------+-----------------------------------------------+-------------+

     R = Restrictions : H = mode http only ; S = SSL only
diff --git a/include/types/log.h b/include/types/log.h
index 345a09f..f02cc11 100644
--- a/include/types/log.h
+++ b/include/types/log.h
@@ -93,6 +93,10 @@ enum {
  LOG_FMT_HDRREQUESTLIST,
  LOG_FMT_HDRRESPONSLIST,
  LOG_FMT_REQ,
+ LOG_FMT_HTTP_METHOD,
+ LOG_FMT_HTTP_REQUEST,
+ LOG_FMT_HTTP_PATH,
+ LOG_FMT_HTTP_VERSION,
  LOG_FMT_HOSTNAME,
  LOG_FMT_UNIQUEID,
  LOG_FMT_SSL_CIPHER,
diff --git a/src/log.c b/src/log.c
index 866b110..d0858fa 100644
--- a/src/log.c
+++ b/src/log.c
@@ -32,6 +32,7 @@
 #include <types/log.h>

 #include <proto/frontend.h>
+#include <proto/proto_http.h>
 #include <proto/log.h>
 #include <proto/sample.h>
 #include <proto/stream.h>
@@ -108,6 +109,10 @@ static const struct logformat_type logformat_keywords[] = {
  { "hrl", LOG_FMT_HDRREQUESTLIST, PR_MODE_TCP, LW_REQHDR, NULL }, /*
header request list */
  { "hs", LOG_FMT_HDRRESPONS, PR_MODE_TCP, LW_RSPHDR, NULL },  /*
header response */
  { "hsl", LOG_FMT_HDRRESPONSLIST, PR_MODE_TCP, LW_RSPHDR, NULL },  /*
header response list */
+ { "HM", LOG_FMT_HTTP_METHOD, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP method */
+ { "HR", LOG_FMT_HTTP_REQUEST, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP
full request */
+ { "HP", LOG_FMT_HTTP_PATH, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP path */
+ { "HV", LOG_FMT_HTTP_VERSION, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP
version */
  { "lc", LOG_FMT_LOGCNT, PR_MODE_TCP, LW_INIT, NULL }, /* log counter */
  { "ms", LOG_FMT_MS, PR_MODE_TCP, LW_INIT, NULL },       /* accept
date millisecond */
  { "pid", LOG_FMT_PID, PR_MODE_TCP, LW_INIT, NULL }, /* log pid */
@@ -923,7 +928,10 @@ int build_logline(struct stream *s, char *dst,
size_t maxsize, struct list *list
  struct proxy *fe = sess->fe;
  struct proxy *be = s->be;
  struct http_txn *txn = s->txn;
+ struct chunk chunk;
  char *uri;
+ char *spc;
+ char *end;
  struct tm tm;
  int t_request;
  int hdr;
@@ -1523,6 +1531,137 @@ int build_logline(struct stream *s, char *dst,
size_t maxsize, struct list *list
        last_isspace = 0;
        break;

+     case LOG_FMT_HTTP_PATH: // %HP
+       uri = txn->uri ? txn->uri : "<BADREQ>";
+
+       if (tmp->options && LOG_OPT_QUOTE)
+         LOGCHAR('"');
+
+       end = uri + strlen(uri);
+       // look for the first whitespace character
+       while (uri < end && !HTTP_IS_SPHT(*uri))
+         uri++;
+
+       // keep advancing past multiple spaces
+       while (uri < end && HTTP_IS_SPHT(*uri))
+         uri++;
+
+       // look for first space or question mark after url
+       spc = uri;
+       while (spc < end && *spc != '?' && !HTTP_IS_SPHT(*spc))
+         spc++;
+
+       chunk.str = uri;
+       chunk.len = spc - uri;
+
+       ret = encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk);
+       if (ret == NULL || *ret != '\0')
+         goto out;
+
+       tmplog = ret;
+       if (tmp->options && LOG_OPT_QUOTE)
+         LOGCHAR('"');
+
+       last_isspace = 0;
+       break;
+
+     case LOG_FMT_HTTP_REQUEST: // %HR
+       uri = txn->uri ? txn->uri : "<BADREQ>";
+
+       if (tmp->options && LOG_OPT_QUOTE)
+         LOGCHAR('"');
+
+       end = uri + strlen(uri);
+       // look for the first whitespace character
+       while (uri < end && !HTTP_IS_SPHT(*uri))
+         uri++;
+
+       // keep advancing past multiple spaces
+       while (uri < end && HTTP_IS_SPHT(*uri))
+         uri++;
+
+       // look for first space after url
+       spc = uri;
+       while (spc < end && !HTTP_IS_SPHT(*spc))
+         spc++;
+
+       chunk.str = uri;
+       chunk.len = spc - uri;
+
+       ret = encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk);
+       if (ret == NULL || *ret != '\0')
+         goto out;
+
+       tmplog = ret;
+       if (tmp->options && LOG_OPT_QUOTE)
+         LOGCHAR('"');
+
+       last_isspace = 0;
+       break;
+
+     case LOG_FMT_HTTP_METHOD: // %HM
+       uri = txn->uri ? txn->uri : "<BADREQ>";
+       if (tmp->options && LOG_OPT_QUOTE)
+         LOGCHAR('"');
+
+       end = uri + strlen(uri);
+       // look for the first whitespace character
+       spc = uri;
+       while (spc < end && !HTTP_IS_SPHT(*spc))
+         spc++;
+
+       chunk.str = uri;
+       chunk.len = spc - uri;
+
+       ret = encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk);
+       if (ret == NULL || *ret != '\0')
+         goto out;
+
+       tmplog = ret;
+       if (tmp->options && LOG_OPT_QUOTE)
+         LOGCHAR('"');
+
+       last_isspace = 0;
+       break;
+
+     case LOG_FMT_HTTP_VERSION: // %HV
+       uri = txn->uri ? txn->uri : "<BADREQ>";
+       if (tmp->options && LOG_OPT_QUOTE)
+         LOGCHAR('"');
+
+       end = uri + strlen(uri);
+       spc = uri;
+       // look for the last whitespace character
+       while (uri < end) {
+         if (HTTP_IS_SPHT(*uri)) {
+           spc = ++uri;
+         } else {
+           uri++;
+         }
+       }
+
+       // "Simple" HTTP request per RFC1945
+       // Could technically be HTTP0.9 but we return HTTP/1.0 in the response
+       // so mirror that here.
+       if (spc == (end-1)) {
+         chunk.str = "HTTP/1.0";
+         chunk.len = strlen("HTTP/1.0");
+       } else {
+         chunk.str = spc;
+         chunk.len = uri - spc;
+       }
+
+       ret = encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk);
+       if (ret == NULL || *ret != '\0')
+         goto out;
+
+       tmplog = ret;
+       if (tmp->options && LOG_OPT_QUOTE)
+         LOGCHAR('"');
+
+       last_isspace = 0;
+       break;
+
      case LOG_FMT_COUNTER: // %rt
        if (tmp->options & LOG_OPT_HEXA) {
          iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", s->uniq_id);
--
2.1.3

Attachment: 0001-Add-HTTP-request-line-log-format-directives.patch
Description: Binary data

Reply via email to