From ab5b317b8b09ab1909421d7245f7f7c2268ad442 Mon Sep 17 00:00:00 2001
From: Andrew Hayworth <andrew.hayworth@getbraintree.com>
Date: Tue, 7 Apr 2015 21:42:53 +0000
Subject: [PATCH] Add log format variables that parse the HTTP request line

This commit adds 4 new log format variables that parse the
HTTP Request-Line for more specific logging than "%r" provides.

For example, we can parse the following HTTP Request-Line with
these new variables:

  "GET /foo?bar=baz HTTP/1.1"

- %HM: HTTP Method ("GET")
- %HV: HTTP Version ("HTTP/1.1")
- %HR: HTTP Request-URI ("/foo?bar=baz")
- %HP: HTTP Request-URI without query string ("/foo")
---
 doc/configuration.txt |   4 ++
 include/types/log.h   |   4 ++
 src/log.c             | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 115 insertions(+)

diff --git a/doc/configuration.txt b/doc/configuration.txt
index 3ae6624..8a0b2f1 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -12342,6 +12342,10 @@ Please refer to the table below for currently defined variables :
   |   | %t   | date_time      (with millisecond resolution)  | date        |
   |   | %ts  | termination_state                             | string      |
   | H | %tsc | termination_state with cookie status          | string      |
+  | H | %HM  | HTTP method (ex: POST)                        | string      |
+  | H | %HV  | HTTP version (ex: HTTP/1.0)                   | string      |
+  | H | %HR  | HTTP request URI (ex: /foo?bar=baz)           | string      |
+  | H | %HP  | HTTP request URI without query string         | string      |
   +---+------+-----------------------------------------------+-------------+
 
     R = Restrictions : H = mode http only ; S = SSL only
diff --git a/include/types/log.h b/include/types/log.h
index c7e47ea..625106d 100644
--- a/include/types/log.h
+++ b/include/types/log.h
@@ -90,6 +90,10 @@ enum {
 	LOG_FMT_HDRREQUESTLIST,
 	LOG_FMT_HDRRESPONSLIST,
 	LOG_FMT_REQ,
+	LOG_FMT_HTTP_METHOD,
+	LOG_FMT_HTTP_REQUEST,
+	LOG_FMT_HTTP_PATH,
+	LOG_FMT_HTTP_VERSION,
 	LOG_FMT_HOSTNAME,
 	LOG_FMT_UNIQUEID,
 	LOG_FMT_SSL_CIPHER,
diff --git a/src/log.c b/src/log.c
index 1a5ad25..f227f47 100644
--- a/src/log.c
+++ b/src/log.c
@@ -108,6 +108,10 @@ static const struct logformat_type logformat_keywords[] = {
 	{ "hs", LOG_FMT_HDRRESPONS, PR_MODE_TCP, LW_RSPHDR, NULL },  /* header response */
 	{ "hsl", LOG_FMT_HDRRESPONSLIST, PR_MODE_TCP, LW_RSPHDR, NULL },  /* header response list */
 	{ "ms", LOG_FMT_MS, PR_MODE_TCP, LW_INIT, NULL },       /* accept date millisecond */
+	{ "HM", LOG_FMT_HTTP_METHOD, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP method */
+	{ "HR", LOG_FMT_HTTP_REQUEST, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP full request */
+	{ "HP", LOG_FMT_HTTP_PATH, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP path */
+	{ "HV", LOG_FMT_HTTP_VERSION, PR_MODE_HTTP, LW_REQ, NULL },  /* HTTP version */
 	{ "pid", LOG_FMT_PID, PR_MODE_TCP, LW_INIT, NULL }, /* log pid */
 	{ "r", LOG_FMT_REQ, PR_MODE_HTTP, LW_REQ, NULL },  /* request */
 	{ "rc", LOG_FMT_RETRIES, PR_MODE_TCP, LW_BYTES, NULL },  /* retries */
@@ -922,6 +926,7 @@ int build_logline(struct session *s, char *dst, size_t maxsize, struct list *lis
 	char *tmplog;
 	char *ret;
 	int iret;
+	int nchar;
 	struct logformat_node *tmp;
 
 	/* FIXME: let's limit ourselves to frontend logging for now. */
@@ -1539,6 +1544,108 @@ int build_logline(struct session *s, char *dst, size_t maxsize, struct list *lis
 				last_isspace = 0;
 				break;
 
+			case LOG_FMT_HTTP_METHOD: // %HM
+				if (tmp->options & LOG_OPT_QUOTE)
+					LOGCHAR('"');
+				uri = txn->uri ? txn->uri : "<BADREQ>";
+				ret = encode_string(tmplog, dst + maxsize,
+						       '#', url_encode_map, uri);
+				if (ret == NULL || *ret != '\0')
+					goto out;
+
+				uri = strchr(tmplog, ' ');
+				if (uri == NULL) {
+					tmplog = ret;
+				} else {
+					*uri = '\0';
+					tmplog = uri;
+				}
+
+				if (tmp->options & LOG_OPT_QUOTE)
+					LOGCHAR('"');
+				last_isspace = 0;
+				break;
+
+			case LOG_FMT_HTTP_PATH: // %HP
+				if (tmp->options & LOG_OPT_QUOTE)
+					LOGCHAR('"');
+				uri = txn->uri ? txn->uri : "<BADREQ>";
+				ret = encode_string(tmplog, dst + maxsize,
+						       '#', url_encode_map, uri);
+				if (ret == NULL || *ret != '\0')
+					goto out;
+
+				// Cut off request line at first occurrence of '?' which signals the beginning of
+				// request params (and end of path). If no params are present, cut off at last space
+				// which otherwise signals the end of the path.
+				char *spc = strchr(tmplog, ' ');
+				char *end = strchr(tmplog, '?');
+				if (end == NULL) {
+					end = strrchr(tmplog, ' ');
+				}
+				if (end != NULL) {
+					nchar = end - spc - 1;
+					memmove(tmplog, spc+1, nchar);
+					tmplog[nchar] = '\0';
+					tmplog += nchar;
+				} else {
+					tmplog = ret;
+				}
+
+				if (tmp->options & LOG_OPT_QUOTE)
+					LOGCHAR('"');
+				last_isspace = 0;
+				break;
+
+			case LOG_FMT_HTTP_REQUEST: // %HR
+				if (tmp->options & LOG_OPT_QUOTE)
+					LOGCHAR('"');
+				uri = txn->uri ? txn->uri : "<BADREQ>";
+				ret = encode_string(tmplog, dst + maxsize,
+						       '#', url_encode_map, uri);
+				if (ret == NULL || *ret != '\0')
+					goto out;
+
+				spc = strchr(tmplog, ' ');
+				end = strrchr(tmplog, ' ');
+				if (end != NULL) {
+					nchar = end-spc;
+					memmove(tmplog, spc+1, nchar);
+					tmplog[nchar] = '\0';
+					tmplog += nchar;
+				} else {
+					tmplog = ret;
+				}
+
+				if (tmp->options & LOG_OPT_QUOTE)
+					LOGCHAR('"');
+				last_isspace = 0;
+				break;
+
+			case LOG_FMT_HTTP_VERSION: // %HV
+				if (tmp->options & LOG_OPT_QUOTE)
+					LOGCHAR('"');
+				uri = txn->uri ? txn->uri : "<BADREQ>";
+				ret = encode_string(tmplog, dst + maxsize,
+						       '#', url_encode_map, uri);
+				if (ret == NULL || *ret != '\0')
+					goto out;
+
+				spc = strrchr(tmplog, ' ');
+				if (spc != NULL) {
+					nchar = strlen(tmplog) - (spc - tmplog);
+					memmove(tmplog, spc+1, nchar);
+					tmplog[nchar] = '\0';
+					tmplog += nchar - 1;
+				} else {
+					tmplog = ret;
+				}
+
+				if (tmp->options & LOG_OPT_QUOTE)
+					LOGCHAR('"');
+				last_isspace = 0;
+				break;
+
 			case LOG_FMT_PID: // %pid
 				if (tmp->options & LOG_OPT_HEXA) {
 					iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", pid);
-- 
2.1.3

