It's often undesirable to log query params - and in some cases, it can
create legal compliance problems. This commit adds a new log format
variable that logs the HTTP verb and the path requested sans query
string (and additionally ommitting the protocol). For example, the
following HTTP request line:

  GET /foo?bar=baz HTTP/1.1

becomes:

  GET /foo

with this log format variable.
---
 include/types/log.h |  1 +
 src/log.c           | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/include/types/log.h b/include/types/log.h
index c7e47ea..3205ce6 100644
--- a/include/types/log.h
+++ b/include/types/log.h
@@ -90,6 +90,7 @@ enum {
        LOG_FMT_HDRREQUESTLIST,
        LOG_FMT_HDRRESPONSLIST,
        LOG_FMT_REQ,
+       LOG_FMT_PATH,
        LOG_FMT_HOSTNAME,
        LOG_FMT_UNIQUEID,
        LOG_FMT_SSL_CIPHER,
diff --git a/src/log.c b/src/log.c
index 1a5ad25..e9c1b10 100644
--- a/src/log.c
+++ b/src/log.c
@@ -108,6 +108,7 @@ static const struct logformat_type logformat_keywords[] = {
        { "hs", LOG_FMT_HDRRESPONS, PR_MODE_TCP, LW_RSPHDR, NULL },  /* header 
response */
        { "hsl", LOG_FMT_HDRRESPONSLIST, PR_MODE_TCP, LW_RSPHDR, NULL },  /* 
header response list */
        { "ms", LOG_FMT_MS, PR_MODE_TCP, LW_INIT, NULL },       /* accept date 
millisecond */
+       { "p", LOG_FMT_PATH, PR_MODE_HTTP, LW_REQ, NULL },  /* path */
        { "pid", LOG_FMT_PID, PR_MODE_TCP, LW_INIT, NULL }, /* log pid */
        { "r", LOG_FMT_REQ, PR_MODE_HTTP, LW_REQ, NULL },  /* request */
        { "rc", LOG_FMT_RETRIES, PR_MODE_TCP, LW_BYTES, NULL },  /* retries */
@@ -161,6 +162,30 @@ struct logformat_var_args var_args_list[] = {
        {  0,  0 }
 };
 
+char* strip_uri_params(char *str) {
+       int spaces = 0;
+       int end = strlen(str);
+
+       int i;
+       int path_end = end;
+       for (i = 0; i < end; i++) {
+               if (str[i] == ' ' && spaces == 0) {
+                       spaces++;
+               } else if (str[i] == '?' || (str[i] == ' ' && spaces > 0)) {
+                       path_end = i;
+                       break;
+               }
+       }
+
+       char *temp = malloc(path_end + 1);
+       if (temp == NULL)
+               return temp;
+
+       strncpy(temp, str, path_end);
+       temp[path_end] = '\0';
+       return temp;
+}
+
 /* return the name of the directive used in the current proxy for which we're
  * currently parsing a header, when it is known.
  */
@@ -1539,6 +1564,27 @@ int build_logline(struct session *s, char *dst, size_t 
maxsize, struct list *lis
                                last_isspace = 0;
                                break;
 
+                       case LOG_FMT_PATH: // %p
+                               if (tmp->options & LOG_OPT_QUOTE)
+                                       LOGCHAR('"');
+                               uri = txn->uri ? txn->uri : "<BADREQ>";
+                               ret = encode_string(tmplog, dst + maxsize,
+                                                      '#', url_encode_map, 
uri);
+                               if (ret == NULL || *ret != '\0')
+                                       goto out;
+
+                               char *sanitized = strip_uri_params(tmplog);
+                               if (sanitized == NULL)
+                                       goto out;
+
+                               tmplog += strlen(sanitized);
+                               free(sanitized);
+
+                               if (tmp->options & LOG_OPT_QUOTE)
+                                       LOGCHAR('"');
+                               last_isspace = 0;
+                               break;
+
                        case LOG_FMT_PID: // %pid
                                if (tmp->options & LOG_OPT_HEXA) {
                                        iret = snprintf(tmplog, dst + maxsize - 
tmplog, "%04X", pid);
-- 
2.1.3


Reply via email to