It's often undesirable to log query params - and in some cases, it can
create legal compliance problems. This commit adds a new log format
variable that logs the HTTP verb and the path requested sans query
string (and additionally ommitting the protocol). For example, the
following HTTP request line:
GET /foo?bar=baz HTTP/1.1
becomes:
GET /foo
with this log format variable.
---
include/types/log.h | 1 +
src/log.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 47 insertions(+)
diff --git a/include/types/log.h b/include/types/log.h
index c7e47ea..3205ce6 100644
--- a/include/types/log.h
+++ b/include/types/log.h
@@ -90,6 +90,7 @@ enum {
LOG_FMT_HDRREQUESTLIST,
LOG_FMT_HDRRESPONSLIST,
LOG_FMT_REQ,
+ LOG_FMT_PATH,
LOG_FMT_HOSTNAME,
LOG_FMT_UNIQUEID,
LOG_FMT_SSL_CIPHER,
diff --git a/src/log.c b/src/log.c
index 1a5ad25..e9c1b10 100644
--- a/src/log.c
+++ b/src/log.c
@@ -108,6 +108,7 @@ static const struct logformat_type logformat_keywords[] = {
{ "hs", LOG_FMT_HDRRESPONS, PR_MODE_TCP, LW_RSPHDR, NULL }, /* header
response */
{ "hsl", LOG_FMT_HDRRESPONSLIST, PR_MODE_TCP, LW_RSPHDR, NULL }, /*
header response list */
{ "ms", LOG_FMT_MS, PR_MODE_TCP, LW_INIT, NULL }, /* accept date
millisecond */
+ { "p", LOG_FMT_PATH, PR_MODE_HTTP, LW_REQ, NULL }, /* path */
{ "pid", LOG_FMT_PID, PR_MODE_TCP, LW_INIT, NULL }, /* log pid */
{ "r", LOG_FMT_REQ, PR_MODE_HTTP, LW_REQ, NULL }, /* request */
{ "rc", LOG_FMT_RETRIES, PR_MODE_TCP, LW_BYTES, NULL }, /* retries */
@@ -161,6 +162,30 @@ struct logformat_var_args var_args_list[] = {
{ 0, 0 }
};
+char* strip_uri_params(char *str) {
+ int spaces = 0;
+ int end = strlen(str);
+
+ int i;
+ int path_end = end;
+ for (i = 0; i < end; i++) {
+ if (str[i] == ' ' && spaces == 0) {
+ spaces++;
+ } else if (str[i] == '?' || (str[i] == ' ' && spaces > 0)) {
+ path_end = i;
+ break;
+ }
+ }
+
+ char *temp = malloc(path_end + 1);
+ if (temp == NULL)
+ return temp;
+
+ strncpy(temp, str, path_end);
+ temp[path_end] = '\0';
+ return temp;
+}
+
/* return the name of the directive used in the current proxy for which we're
* currently parsing a header, when it is known.
*/
@@ -1539,6 +1564,27 @@ int build_logline(struct session *s, char *dst, size_t
maxsize, struct list *lis
last_isspace = 0;
break;
+ case LOG_FMT_PATH: // %p
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ uri = txn->uri ? txn->uri : "<BADREQ>";
+ ret = encode_string(tmplog, dst + maxsize,
+ '#', url_encode_map,
uri);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+
+ char *sanitized = strip_uri_params(tmplog);
+ if (sanitized == NULL)
+ goto out;
+
+ tmplog += strlen(sanitized);
+ free(sanitized);
+
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ last_isspace = 0;
+ break;
+
case LOG_FMT_PID: // %pid
if (tmp->options & LOG_OPT_HEXA) {
iret = snprintf(tmplog, dst + maxsize -
tmplog, "%04X", pid);
--
2.1.3