Hello -

On Tue, Apr 28, 2015 at 3:01 AM, Willy Tarreau <[email protected]> wrote:
> Hi again Andrew,
>
> I've spent some time on it. Good work. I'm seeing a bug in %HV, it doesn't
> always work. I like your idea to convert missing versions to "HTTP/1.0",
> but I think that in order to be even more accurate, we should report
> "HTTP/0.9" to translate what was actually seen in the logs, what do you
> think ?
>

This latest patch logs as HTTP/0.9 - I re-read the RFC and that
totally makes sense.

> One comment prior to the bug itself, I found %HR confusing because it
> designates a request (and is even mapped to LOG_HTTP_REQUEST), but what
> we name a request all over the code and doc is the association between
> the method, the uri and the version. Since it takes only the URI, I'd
> rather call it LOG_HTTP_URI and %HU. Maybe later we'll even decide to
> alias %HR to %r, I don't know. I tend to be careful to always plan for
> the future because we have accumulated a lot of misdesigns initially
> due to the narrower scope of the project, that we're still carrying
> today. Concerning "%HP" you can mention in the doc "(path)" since it's
> the name we use in other places for the same entity. That way it will
> be consistent.
>

I changed %HR -> %HU, and mentioned '(path)' in the docs.

> Now concerning the bug, it's not very important but it does exist :

Good catch - Suffice it to say I've tested this patch a bit more thoroughly. :-)

After a little reflection and during testing, I also found it
confusing that the new
directives sometimes logged "<BADREQ>" and sometimes parsed as much as
they could out of the request line before giving up. This latest patch
more consistently
logs "<BADREQ>" if the line is bad. I ended up counting spaces, based on the
premise that any valid request in any version of the protocol must
contain at least
one whitespace character.

Thanks!
-- 
- Andrew Hayworth


>From 74b1abcfe2202f7da5de7c6e2f33c303e5dd4f62 Mon Sep 17 00:00:00 2001
From: Andrew Hayworth <[email protected]>
Date: Mon, 27 Apr 2015 21:37:03 +0000
Subject: [PATCH] Add HTTP request-line log format directives

This commit adds 4 new log format variables that parse the
HTTP Request-Line for more specific logging than "%r" provides.

For example, we can parse the following HTTP Request-Line with
these new variables:

  "GET /foo?bar=baz HTTP/1.1"

- %HM: HTTP Method ("GET")
- %HV: HTTP Version ("HTTP/1.1")
- %HU: HTTP Request-URI ("/foo?bar=baz")
- %HP: HTTP Request-URI without query string ("/foo")
---
 doc/configuration.txt |   4 ++
 include/types/log.h   |   4 ++
 src/log.c             | 164 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 172 insertions(+)

diff --git a/doc/configuration.txt b/doc/configuration.txt
index a37f54c..bd7e2a8 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -13203,6 +13203,10 @@ Please refer to the table below for currently
defined variables :
   |   | %t   | date_time      (with millisecond resolution)  | date        |
   |   | %ts  | termination_state                             | string      |
   | H | %tsc | termination_state with cookie status          | string      |
+  | H | %HM  | HTTP method (ex: POST)                        | string      |
+  | H | %HV  | HTTP version (ex: HTTP/1.0)                   | string      |
+  | H | %HR  | HTTP request URI (ex: /foo?bar=baz)           | string      |
+  | H | %HP  | HTTP request URI without query string (path)  | string      |
   +---+------+-----------------------------------------------+-------------+

     R = Restrictions : H = mode http only ; S = SSL only
diff --git a/include/types/log.h b/include/types/log.h
index 345a09f..bbfe020 100644
--- a/include/types/log.h
+++ b/include/types/log.h
@@ -93,6 +93,10 @@ enum {
  LOG_FMT_HDRREQUESTLIST,
  LOG_FMT_HDRRESPONSLIST,
  LOG_FMT_REQ,
+ LOG_FMT_HTTP_METHOD,
+ LOG_FMT_HTTP_URI,
+ LOG_FMT_HTTP_PATH,
+ LOG_FMT_HTTP_VERSION,
  LOG_FMT_HOSTNAME,
  LOG_FMT_UNIQUEID,
  LOG_FMT_SSL_CIPHER,
diff --git a/src/log.c b/src/log.c
index 866b110..4391494 100644
--- a/src/log.c
+++ b/src/log.c
@@ -32,6 +32,7 @@
 #include <types/log.h>

 #include <proto/frontend.h>
+#include <proto/proto_http.h>
 #include <proto/log.h>
 #include <proto/sample.h>
 #include <proto/stream.h>
@@ -108,6 +109,10 @@ static const struct logformat_type logformat_keywords[] = {
  { "hrl", LOG_FMT_HDRREQUESTLIST, PR_MODE_TCP, LW_REQHDR, NULL }, /*
header request list */
  { "hs", LOG_FMT_HDRRESPONS, PR_MODE_TCP, LW_RSPHDR, NULL },  /*
header response */
  { "hsl", LOG_FMT_HDRRESPONSLIST, PR_MODE_TCP, LW_RSPHDR, NULL },  /*
header response list */
+ { "HM", LOG_FMT_HTTP_METHOD, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP method */
+ { "HR", LOG_FMT_HTTP_URI, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP full URI */
+ { "HP", LOG_FMT_HTTP_PATH, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP path */
+ { "HV", LOG_FMT_HTTP_VERSION, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP
version */
  { "lc", LOG_FMT_LOGCNT, PR_MODE_TCP, LW_INIT, NULL }, /* log counter */
  { "ms", LOG_FMT_MS, PR_MODE_TCP, LW_INIT, NULL },       /* accept
date millisecond */
  { "pid", LOG_FMT_PID, PR_MODE_TCP, LW_INIT, NULL }, /* log pid */
@@ -923,11 +928,15 @@ int build_logline(struct stream *s, char *dst,
size_t maxsize, struct list *list
  struct proxy *fe = sess->fe;
  struct proxy *be = s->be;
  struct http_txn *txn = s->txn;
+ struct chunk chunk;
  char *uri;
+ char *spc;
+ char *end;
  struct tm tm;
  int t_request;
  int hdr;
  int last_isspace = 1;
+ int nspaces = 0;
  char *tmplog;
  char *ret;
  int iret;
@@ -1523,6 +1532,161 @@ int build_logline(struct stream *s, char *dst,
size_t maxsize, struct list *list
        last_isspace = 0;
        break;

+     case LOG_FMT_HTTP_PATH: // %HP
+       uri = txn->uri ? txn->uri : "<BADREQ>";
+
+       if (tmp->options && LOG_OPT_QUOTE)
+         LOGCHAR('"');
+
+       end = uri + strlen(uri);
+       // look for the first whitespace character
+       while (uri < end && !HTTP_IS_SPHT(*uri))
+         uri++;
+
+       // keep advancing past multiple spaces
+       while (uri < end && HTTP_IS_SPHT(*uri)) {
+         uri++; nspaces++;
+       }
+
+       // look for first space or question mark after url
+       spc = uri;
+       while (spc < end && *spc != '?' && !HTTP_IS_SPHT(*spc))
+         spc++;
+
+       if (!txn->uri || nspaces == 0) {
+         chunk.str = "<BADREQ>";
+         chunk.len = strlen("<BADREQ>");
+       } else {
+         chunk.str = uri;
+         chunk.len = spc - uri;
+       }
+
+       ret = encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk);
+       if (ret == NULL || *ret != '\0')
+         goto out;
+
+       tmplog = ret;
+       if (tmp->options && LOG_OPT_QUOTE)
+         LOGCHAR('"');
+
+       last_isspace = 0;
+       break;
+
+     case LOG_FMT_HTTP_URI: // %HU
+       uri = txn->uri ? txn->uri : "<BADREQ>";
+
+       if (tmp->options && LOG_OPT_QUOTE)
+         LOGCHAR('"');
+
+       end = uri + strlen(uri);
+       // look for the first whitespace character
+       while (uri < end && !HTTP_IS_SPHT(*uri))
+         uri++;
+
+       // keep advancing past multiple spaces
+       while (uri < end && HTTP_IS_SPHT(*uri)) {
+         uri++; nspaces++;
+       }
+
+       // look for first space after url
+       spc = uri;
+       while (spc < end && !HTTP_IS_SPHT(*spc))
+         spc++;
+
+       if (!txn->uri || nspaces == 0) {
+         chunk.str = "<BADREQ>";
+         chunk.len = strlen("<BADREQ>");
+       } else {
+         chunk.str = uri;
+         chunk.len = spc - uri;
+       }
+
+       ret = encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk);
+       if (ret == NULL || *ret != '\0')
+         goto out;
+
+       tmplog = ret;
+       if (tmp->options && LOG_OPT_QUOTE)
+         LOGCHAR('"');
+
+       last_isspace = 0;
+       break;
+
+     case LOG_FMT_HTTP_METHOD: // %HM
+       uri = txn->uri ? txn->uri : "<BADREQ>";
+       if (tmp->options && LOG_OPT_QUOTE)
+         LOGCHAR('"');
+
+       end = uri + strlen(uri);
+       // look for the first whitespace character
+       spc = uri;
+       while (spc < end && !HTTP_IS_SPHT(*spc))
+         spc++;
+
+       if (spc == end) { // odd case, we have txn->uri, but we only got a verb
+         chunk.str = "<BADREQ>";
+         chunk.len = strlen("<BADREQ>");
+       } else {
+         chunk.str = uri;
+         chunk.len = spc - uri;
+       }
+
+       ret = encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk);
+       if (ret == NULL || *ret != '\0')
+         goto out;
+
+       tmplog = ret;
+       if (tmp->options && LOG_OPT_QUOTE)
+         LOGCHAR('"');
+
+       last_isspace = 0;
+       break;
+
+     case LOG_FMT_HTTP_VERSION: // %HV
+       uri = txn->uri ? txn->uri : "<BADREQ>";
+       if (tmp->options && LOG_OPT_QUOTE)
+         LOGCHAR('"');
+
+       end = uri + strlen(uri);
+       // look for the first whitespace character
+       while (uri < end && !HTTP_IS_SPHT(*uri))
+         uri++;
+
+       // keep advancing past multiple spaces
+       while (uri < end && HTTP_IS_SPHT(*uri)) {
+         uri++; nspaces++;
+       }
+
+       // look for the next whitespace character
+       while (uri < end && !HTTP_IS_SPHT(*uri))
+         uri++;
+
+       // keep advancing past multiple spaces
+       while (uri < end && HTTP_IS_SPHT(*uri))
+         uri++;
+
+       if (!txn->uri || nspaces == 0) {
+         chunk.str = "<BADREQ>";
+         chunk.len = strlen("<BADREQ>");
+       } else if (uri == end) {
+         chunk.str = "HTTP/0.9";
+         chunk.len = strlen("HTTP/0.9");
+       } else {
+         chunk.str = uri;
+         chunk.len = end - uri;
+       }
+
+       ret = encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk);
+       if (ret == NULL || *ret != '\0')
+         goto out;
+
+       tmplog = ret;
+       if (tmp->options && LOG_OPT_QUOTE)
+         LOGCHAR('"');
+
+       last_isspace = 0;
+       break;
+
      case LOG_FMT_COUNTER: // %rt
        if (tmp->options & LOG_OPT_HEXA) {
          iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", s->uniq_id);
--
2.1.3

Attachment: 0001-Add-HTTP-request-line-log-format-directives.patch
Description: Binary data

Reply via email to