Hi Andrew,
On Tue, Apr 14, 2015 at 02:33:25PM -0500, Andrew Hayworth wrote:
> I've attached another patch that implements
> 4 separate log variables to log individual parts of the request line,
> as you suggested.
Great, thanks. I'm having a few comments about the patch :
- it did not apply to mainline, I had to apply it by hand, so I suspect
that you did it against 1.5 instead of 1.6. Any contrib must be done
on 1.6 (dev branch), including fixes, and if needed they're backported
later. This ensures we never lose a fix or feature when upgrading.
- I got the following warning due to the variable declarations mixed
with the code, which should really be avoided :
src/log.c: In function 'build_logline':
src/log.c:1606:5: warning: ISO C90 forbids mixed declarations and code
[-Wdeclaration-after-statement]
- the following strchr/strrchr dance is dangerous and bogus. As a rule
of thumb, you should always consider that if you're using strrchr(),
you're parsing backwards and you're doing something wrong :
spc = strchr(tmplog, ' ');
end = strchr(tmplog, '?');
if (end == NULL) {
end = strrchr(tmplog, ' ');
}
if (end != NULL) {
nchar = end - spc - 1;
memmove(tmplog, spc+1, nchar);
tmplog[nchar] = '\0';
tmplog += nchar;
} else {
tmplog = ret;
}
There, if I send "GET \r\n\r\n", what will happen is that both spc and end
will point to the same space, resulting in <nchar> being -1, so you can
already see the segfault in memmove() and later. Also you need to keep in
mind that multiple spaces are tolerated and that tabs are tolerated as
well, but they're encoded as "#09" after encode_string().
Thus I'd suggest that you use encode_chunk() instead which takes a chunk
(string + length) and that you make this chunk point to the exact string
you want to encode. Example for the path above :
struct chunk chunk;
.../...
case LOG_FMT_HTTP_PATH: // %HP
uri = txn->uri ? txn->uri : "<BADREQ>";
end = uri + strlen(end);
// look for first space
while (uri < end && !http_is_spht(*uri))
uri++;
// look for first non-space (uri)
while (uri < end && http_is_spht(*uri))
uri++;
// look for first space or question mark after uri
spc = uri;
while (spc < end && *spc != '?' && !http_is_spht(*spc))
spc++;
chunk.str = uri;
chunk.len = spc - uri;
if (tmp->options & LOG_OPT_QUOTE)
LOGCHAR('"');
ret = encode_chunk(tmplog, dst + maxsize,
'#', url_encode_map, &chunk);
if (ret == NULL || *ret != '\0')
goto out;
tmplog = ret;
if (tmp->options & LOG_OPT_QUOTE)
LOGCHAR('"');
last_isspace = 0;
break;
%HU is the same without the stop on '?'.
I think you get the idea. I'm attaching the patch I rebased so that you
can save time and restart from here.
Thanks!
Willy
>From d090a00b2b9cd1f4d57ceb7ea3d0aa18857dd6ad Mon Sep 17 00:00:00 2001
From: Andrew Hayworth <[email protected]>
Date: Tue, 7 Apr 2015 21:42:53 +0000
Subject: MEDIUM: log: add log format variables that parse the HTTP request
line
This commit adds 4 new log format variables that parse the
HTTP Request-Line for more specific logging than "%r" provides.
For example, we can parse the following HTTP Request-Line with
these new variables:
"GET /foo?bar=baz HTTP/1.1"
- %HM: HTTP Method ("GET")
- %HV: HTTP Version ("HTTP/1.1")
- %HR: HTTP Request-URI ("/foo?bar=baz")
- %HP: HTTP Request-URI without query string ("/foo")
---
doc/configuration.txt | 4 ++
include/types/log.h | 4 ++
src/log.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 117 insertions(+)
diff --git a/doc/configuration.txt b/doc/configuration.txt
index 2242c71..5e2464a 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -13203,6 +13203,10 @@ Please refer to the table below for currently defined
variables :
| | %t | date_time (with millisecond resolution) | date |
| | %ts | termination_state | string |
| H | %tsc | termination_state with cookie status | string |
+ | H | %HM | HTTP method (ex: POST) | string |
+ | H | %HV | HTTP version (ex: HTTP/1.0) | string |
+ | H | %HR | HTTP request URI (ex: /foo?bar=baz) | string |
+ | H | %HP | HTTP request URI without query string | string |
+---+------+-----------------------------------------------+-------------+
R = Restrictions : H = mode http only ; S = SSL only
diff --git a/include/types/log.h b/include/types/log.h
index 345a09f..f02cc11 100644
--- a/include/types/log.h
+++ b/include/types/log.h
@@ -93,6 +93,10 @@ enum {
LOG_FMT_HDRREQUESTLIST,
LOG_FMT_HDRRESPONSLIST,
LOG_FMT_REQ,
+ LOG_FMT_HTTP_METHOD,
+ LOG_FMT_HTTP_REQUEST,
+ LOG_FMT_HTTP_PATH,
+ LOG_FMT_HTTP_VERSION,
LOG_FMT_HOSTNAME,
LOG_FMT_UNIQUEID,
LOG_FMT_SSL_CIPHER,
diff --git a/src/log.c b/src/log.c
index 866b110..bbcb0b0 100644
--- a/src/log.c
+++ b/src/log.c
@@ -110,6 +110,10 @@ static const struct logformat_type logformat_keywords[] = {
{ "hsl", LOG_FMT_HDRRESPONSLIST, PR_MODE_TCP, LW_RSPHDR, NULL }, /*
header response list */
{ "lc", LOG_FMT_LOGCNT, PR_MODE_TCP, LW_INIT, NULL }, /* log counter */
{ "ms", LOG_FMT_MS, PR_MODE_TCP, LW_INIT, NULL }, /* accept date
millisecond */
+ { "HM", LOG_FMT_HTTP_METHOD, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP
method */
+ { "HR", LOG_FMT_HTTP_REQUEST, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP
full request */
+ { "HP", LOG_FMT_HTTP_PATH, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP path
*/
+ { "HV", LOG_FMT_HTTP_VERSION, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP
version */
{ "pid", LOG_FMT_PID, PR_MODE_TCP, LW_INIT, NULL }, /* log pid */
{ "r", LOG_FMT_REQ, PR_MODE_HTTP, LW_REQ, NULL }, /* request */
{ "rc", LOG_FMT_RETRIES, PR_MODE_TCP, LW_BYTES, NULL }, /* retries */
@@ -930,7 +934,10 @@ int build_logline(struct stream *s, char *dst, size_t
maxsize, struct list *list
int last_isspace = 1;
char *tmplog;
char *ret;
+ char *spc;
+ char *end;
int iret;
+ int nchar;
struct logformat_node *tmp;
/* FIXME: let's limit ourselves to frontend logging for now. */
@@ -1564,6 +1571,108 @@ int build_logline(struct stream *s, char *dst, size_t
maxsize, struct list *list
last_isspace = 0;
break;
+ case LOG_FMT_HTTP_METHOD: // %HM
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ uri = txn->uri ? txn->uri : "<BADREQ>";
+ ret = encode_string(tmplog, dst + maxsize,
+ '#', url_encode_map,
uri);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+
+ uri = strchr(tmplog, ' ');
+ if (uri == NULL) {
+ tmplog = ret;
+ } else {
+ *uri = '\0';
+ tmplog = uri;
+ }
+
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_HTTP_PATH: // %HP
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ uri = txn->uri ? txn->uri : "<BADREQ>";
+ ret = encode_string(tmplog, dst + maxsize,
+ '#', url_encode_map,
uri);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+
+ // Cut off request line at first occurrence of
'?' which signals the beginning of
+ // request params (and end of path). If no
params are present, cut off at last space
+ // which otherwise signals the end of the path.
+ spc = strchr(tmplog, ' ');
+ end = strchr(tmplog, '?');
+ if (end == NULL) {
+ end = strrchr(tmplog, ' ');
+ }
+ if (end != NULL) {
+ nchar = end - spc - 1;
+ memmove(tmplog, spc+1, nchar);
+ tmplog[nchar] = '\0';
+ tmplog += nchar;
+ } else {
+ tmplog = ret;
+ }
+
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_HTTP_REQUEST: // %HR
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ uri = txn->uri ? txn->uri : "<BADREQ>";
+ ret = encode_string(tmplog, dst + maxsize,
+ '#', url_encode_map,
uri);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+
+ spc = strchr(tmplog, ' ');
+ end = strrchr(tmplog, ' ');
+ if (end != NULL) {
+ nchar = end-spc;
+ memmove(tmplog, spc+1, nchar);
+ tmplog[nchar] = '\0';
+ tmplog += nchar;
+ } else {
+ tmplog = ret;
+ }
+
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_HTTP_VERSION: // %HV
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ uri = txn->uri ? txn->uri : "<BADREQ>";
+ ret = encode_string(tmplog, dst + maxsize,
+ '#', url_encode_map,
uri);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+
+ spc = strrchr(tmplog, ' ');
+ if (spc != NULL) {
+ nchar = strlen(tmplog) - (spc - tmplog);
+ memmove(tmplog, spc+1, nchar);
+ tmplog[nchar] = '\0';
+ tmplog += nchar - 1;
+ } else {
+ tmplog = ret;
+ }
+
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ last_isspace = 0;
+ break;
+
case LOG_FMT_PID: // %pid
if (tmp->options & LOG_OPT_HEXA) {
iret = snprintf(tmplog, dst + maxsize -
tmplog, "%04X", pid);
--
1.7.12.1