[PATCH] logformat annotation fixes

Tsantilas Christos Thu, 24 Apr 2014 07:13:52 -0700

Currently note values printed with "%note" formating code,  which
contain non alphanumeric characters, were quoted and quotes were then
escaped, resulting in bizarre logged rendition of empty or simple values
(often received from various helpers):
      %22-%22
      %22Default_Google%22
      %22pg13,US%22


This patch:
- does not use quotes to print annotations

- Allow system admin to define a separator to use for logged
annotations. The %note logformat accepts the following argument:
     [name][:separator]
The separator can be one of the ',' ';' or ':'. By default, multiple
note values are separated with "," and multiple notes are separated with
"\r\n". When logging named notes with %{name}note, the explicitly
configured separator is used between note values. When logging all notes
with %note, the explicitly configured separator is used between
individual notes. There is currently no way to specify both value and
notes separators when logging all notes with %note.

- Makes the Format::Token::data a struct (now is a union) and initialize
Format::Token::data data members in Format::Token::Token constructor.

This is a Measurement Factory project

Logformat annotation fixes

Currently note values printed with "%note" formating code, which contain non 
alphanumeric characters, were quoted and quotes were then escaped, resulting
in bizarre logged rendition of empty or simple values (often received from
various helpers):
      %22-%22
      %22Default_Google%22
      %22pg13,US%22

This patch:
- does not use quotes to print annotations

- Allow system admin to define a separator to use for logged 
  annotations. The %note logformat accepts the following argument:
     [name][:separator]
  The separator can be one of the ',' ';' or ':'.
  By default, multiple note values are separated with "," and multiple
  notes are separated with "\r\n". When logging named notes with
  %{name}note, the explicitly configured separator is used between note
  values. When logging all notes with %note, the explicitly configured
  separator is used between individual notes. There is currently no way to
  specify both value and notes separators when logging all notes with %note.

- Makes the Format::Token::data a struct (now is a union) and initialize 
  Format::Token::data data members in Format::Token::Token constructor.

This is a Measurement Factory project
=== modified file 'src/Notes.cc'
--- src/Notes.cc	2014-02-10 12:58:49 +0000
+++ src/Notes.cc	2014-04-17 11:16:00 +0000
@@ -141,63 +141,63 @@
             storeAppendPrintf(entry, "\n");
         }
     }
 }
 
 void
 Notes::clean()
 {
     notes.clear();
 }
 
 NotePairs::~NotePairs()
 {
     while (!entries.empty()) {
         delete entries.back();
         entries.pop_back();
     }
 }
 
 const char *
-NotePairs::find(const char *noteKey) const
+NotePairs::find(const char *noteKey, const char *sep) const
 {
     static String value;
     value.clean();
     for (std::vector<NotePairs::Entry *>::const_iterator  i = entries.begin(); i != entries.end(); ++i) {
         if ((*i)->name.cmp(noteKey) == 0) {
             if (value.size())
-                value.append(", ");
-            value.append(ConfigParser::QuoteString((*i)->value));
+                value.append(sep);
+            value.append((*i)->value);
         }
     }
     return value.size() ? value.termedBuf() : NULL;
 }
 
 const char *
 NotePairs::toString(const char *sep) const
 {
     static String value;
     value.clean();
     for (std::vector<NotePairs::Entry *>::const_iterator  i = entries.begin(); i != entries.end(); ++i) {
         value.append((*i)->name);
         value.append(": ");
-        value.append(ConfigParser::QuoteString((*i)->value));
+        value.append((*i)->value);
         value.append(sep);
     }
     return value.size() ? value.termedBuf() : NULL;
 }
 
 const char *
 NotePairs::findFirst(const char *noteKey) const
 {
     for (std::vector<NotePairs::Entry *>::const_iterator  i = entries.begin(); i != entries.end(); ++i) {
         if ((*i)->name.cmp(noteKey) == 0)
             return (*i)->value.termedBuf();
     }
     return NULL;
 }
 
 void
 NotePairs::add(const char *key, const char *note)
 {
     entries.push_back(new NotePairs::Entry(key, note));
 }

=== modified file 'src/Notes.h'
--- src/Notes.h	2014-02-21 10:46:19 +0000
+++ src/Notes.h	2014-04-24 13:40:32 +0000
@@ -128,41 +128,41 @@
     };
 
     NotePairs() {}
     ~NotePairs();
 
     /**
      * Append the entries of the src NotePairs list to our list.
      */
     void append(const NotePairs *src);
 
     /**
      * Append any new entries of the src NotePairs list to our list.
      * Entries which already exist in the destination set are ignored.
      */
     void appendNewOnly(const NotePairs *src);
 
     /**
      * Returns a comma separated list of notes with key 'noteKey'.
      * Use findFirst instead when a unique kv-pair is needed.
      */
-    const char *find(const char *noteKey) const;
+    const char *find(const char *noteKey, const char *sep = ",") const;
 
     /**
      * Returns the first note value for this key or an empty string.
      */
     const char *findFirst(const char *noteKey) const;
 
     /**
      * Adds a note key and value to the notes list.
      * If the key name already exists in list, add the given value to its set
      * of values.
      */
     void add(const char *key, const char *value);
 
     /**
      * Adds a note key and values strList to the notes list.
      * If the key name already exists in list, add the new values to its set
      * of values.
      */
     void addStrList(const char *key, const char *values);
 

=== modified file 'src/cf.data.pre'
--- src/cf.data.pre	2014-04-22 16:01:23 +0000
+++ src/cf.data.pre	2014-04-24 07:51:19 +0000
@@ -3725,44 +3725,56 @@
 		[	output in squid text log format as used by log_mime_hdrs
 		#	output in URL quoted format
 		'	output as-is
 
 		-	left aligned
 
 		width	minimum and/or maximum field width:
 			    [width_min][.width_max]
 			When minimum starts with 0, the field is zero-padded.
 			String values exceeding maximum width are truncated.
 
 		{arg}	argument such as header name etc
 
 	Format codes:
 
 		%	a literal % character
 		sn	Unique sequence number per log line entry
 		err_code    The ID of an error response served by Squid or
 				a similar internal error identifier.
 		err_detail  Additional err_code-dependent error information.
-		note	The meta header specified by the argument. Also
+		note	The annotation specified by the argument. Also
 			logs the adaptation meta headers set by the
 			adaptation_meta configuration parameter.
-			If no argument given all meta headers logged.
+			If no argument given all annotations logged.
+			The argument may include a separator to use with
+			annotation values:
+                            name[:separator]
+			By default, multiple note values are separated with ","
+			and multiple notes are separated with "\r\n".
+			When logging named notes with %{name}note, the
+			explicitly configured separator is used between note
+			values. When logging all notes with %note, the
+			explicitly configured separator is used between
+			individual notes. There is currently no way to
+			specify both value and notes separators when logging
+			all notes with %note.
 
 	Connection related format codes:
 
 		>a	Client source IP address
 		>A	Client FQDN
 		>p	Client source port
 		>eui	Client source EUI (MAC address, EUI-48 or EUI-64 identifier)
 		>la	Local IP address the client connected to
 		>lp	Local port number the client connected to
 		>qos    Client connection TOS/DSCP value set by Squid
 		>nfmark Client connection netfilter mark set by Squid
 
 		la	Local listening IP address the client connection was connected to.
 		lp	Local listening port number the client connection was connected to.
 
 		<a	Server IP address of the last server or peer connection
 		<A	Server FQDN or peer name
 		<p	Server port number of the last server or peer connection
 		<la	Local IP address of the last server or peer connection
 		<lp     Local port number of the last server or peer connection

=== modified file 'src/format/Format.cc'
--- src/format/Format.cc	2014-04-22 02:47:09 +0000
+++ src/format/Format.cc	2014-04-24 07:51:19 +0000
@@ -1085,65 +1085,70 @@
         case LFT_SSL_USER_CERT_SUBJECT:
             if (X509 *cert = al->cache.sslClientCert.get()) {
                 if (X509_NAME *subject = X509_get_subject_name(cert)) {
                     X509_NAME_oneline(subject, tmp, sizeof(tmp));
                     out = tmp;
                 }
             }
             break;
 
         case LFT_SSL_USER_CERT_ISSUER:
             if (X509 *cert = al->cache.sslClientCert.get()) {
                 if (X509_NAME *issuer = X509_get_issuer_name(cert)) {
                     X509_NAME_oneline(issuer, tmp, sizeof(tmp));
                     out = tmp;
                 }
             }
             break;
 #endif
 
         case LFT_NOTE:
-            if (fmt->data.string) {
+            tmp[0] = fmt->data.header.separator;
+            tmp[1] = '\0';
+            if (fmt->data.header.header && *fmt->data.header.header) {
+                const char *separator = tmp;
 #if USE_ADAPTATION
                 Adaptation::History::Pointer ah = al->request ? al->request->adaptHistory() : Adaptation::History::Pointer();
                 if (ah != NULL && ah->metaHeaders != NULL) {
-                    if (const char *meta = ah->metaHeaders->find(fmt->data.string))
+                    if (const char *meta = ah->metaHeaders->find(fmt->data.header.header, separator))
                         sb.append(meta);
                 }
 #endif
                 if (al->notes != NULL) {
-                    if (const char *note = al->notes->find(fmt->data.string)) {
+                    if (const char *note = al->notes->find(fmt->data.header.header, separator)) {
                         if (sb.size())
-                            sb.append(", ");
+                            sb.append(separator);
                         sb.append(note);
                     }
                 }
                 out = sb.termedBuf();
                 quote = 1;
             } else {
+                // if no argument given use default "\r\n" as notes separator
+                const char *separator = fmt->data.string ? tmp : "\r\n";
 #if USE_ADAPTATION
                 Adaptation::History::Pointer ah = al->request ? al->request->adaptHistory() : Adaptation::History::Pointer();
                 if (ah != NULL && ah->metaHeaders != NULL && !ah->metaHeaders->empty())
-                    sb.append(ah->metaHeaders->toString());
+                    sb.append(ah->metaHeaders->toString(separator));
 #endif
                 if (al->notes != NULL && !al->notes->empty())
-                    sb.append(al->notes->toString());
+                    sb.append(al->notes->toString(separator));
 
                 out = sb.termedBuf();
                 quote = 1;
             }
             break;
 
         case LFT_CREDENTIALS:
 #if USE_AUTH
             if (al->request && al->request->auth_user_request != NULL)
                 out = strOrNull(al->request->auth_user_request->credentialsStr());
 #endif
 
             break;
 
         case LFT_PERCENT:
             out = "%";
 
             break;
         }
 

=== modified file 'src/format/Token.cc'
--- src/format/Token.cc	2014-03-30 12:00:34 +0000
+++ src/format/Token.cc	2014-04-23 16:25:46 +0000
@@ -390,40 +390,42 @@
 done:
 
     switch (type) {
 
 #if USE_ADAPTATION
     case LFT_ADAPTATION_LAST_HEADER:
 #endif
 
 #if ICAP_CLIENT
     case LFT_ICAP_REQ_HEADER:
 
     case LFT_ICAP_REP_HEADER:
 #endif
 
     case LFT_ADAPTED_REQUEST_HEADER:
 
     case LFT_REQUEST_HEADER:
 
     case LFT_REPLY_HEADER:
 
+    case LFT_NOTE:
+
         if (data.string) {
             char *header = data.string;
             char *cp = strchr(header, ':');
 
             if (cp) {
                 *cp = '\0';
                 ++cp;
 
                 if (*cp == ',' || *cp == ';' || *cp == ':') {
                     data.header.separator = *cp;
                     ++cp;
                 } else {
                     data.header.separator = ',';
                 }
 
                 data.header.element = cp;
 
                 switch (type) {
                 case LFT_REQUEST_HEADER:
                     type = LFT_REQUEST_HEADER_ELEM;
@@ -524,32 +526,51 @@
         break;
 
     case LFT_REQUEST_VERSION_OLD_2X:
         debugs(46, DBG_PARSE_NOTE(DBG_IMPORTANT), "WARNING: The \">v\" formatting code is deprecated. Use the \">rv\" instead.");
         type = LFT_REQUEST_VERSION;
         break;
 
 #if !USE_SQUID_EUI
     case LFT_CLIENT_EUI:
         debugs(46, DBG_CRITICAL, "WARNING: The \">eui\" formatting code requires EUI features which are disabled in this Squid.");
         break;
 #endif
 
     default:
         break;
     }
 
     return (cur - def);
 }
 
+Format::Token::Token() : type(LFT_NONE),
+                         label(NULL),
+                         widthMin(-1),
+                         widthMax(-1),
+                         quote(LOG_QUOTE_NONE),
+                         left(false),
+                         space(false),
+                         zero(false),
+                         divisor(1),
+                         next(NULL)
+{
+    data.string = NULL;
+    data.header.header = NULL; 
+    data.header.element = NULL;
+    data.header.separator = ',';
+}
+
+
+
 Format::Token::~Token()
 {
     label = NULL; // drop reference to global static.
     safe_free(data.string);
     while (next) {
         Token *tokens = next;
         next = next->next;
         tokens->next = NULL;
         delete tokens;
     }
 }
 

=== modified file 'src/format/Token.h'
--- src/format/Token.h	2014-01-10 15:50:03 +0000
+++ src/format/Token.h	2014-04-23 15:52:26 +0000
@@ -10,66 +10,55 @@
  * - logging
  * - external ACL input
  * - deny page URL
  *
  * These enumerations and classes define the API for parsing of
  * format directives to define these patterns. Along with output
  * functionality to produce formatted buffers.
  */
 
 namespace Format
 {
 
 class TokenTableEntry;
 
 #define LOG_BUF_SZ (MAX_URL<<2)
 
 // XXX: inherit from linked list
 class Token
 {
 public:
-    Token() : type(LFT_NONE),
-            label(NULL),
-            widthMin(-1),
-            widthMax(-1),
-            quote(LOG_QUOTE_NONE),
-            left(false),
-            space(false),
-            zero(false),
-            divisor(1),
-            next(NULL)
-    { data.string = NULL; }
-
+    Token();
     ~Token();
 
     /// Initialize the format token registrations
     static void Init();
 
     /** parses a single token. Returns the token length in characters,
      * and fills in this item with the token information.
      * def is for sure null-terminated.
      */
     int parse(const char *def, enum Quoting *quote);
 
     ByteCode_t type;
     const char *label;
-    union {
+    struct {
         char *string;
 
         struct {
             char *header;
             char *element;
             char separator;
         } header;
         char *timespec;
     } data;
     int widthMin; ///< minimum field width
     int widthMax; ///< maximum field width
     enum Quoting quote;
     bool left;
     bool space;
     bool zero;
     int divisor;    // class invariant: MUST NOT be zero.
     Token *next;	/* todo: move from linked list to array */
 
 private:
     const char *scanForToken(TokenTableEntry const table[], const char *cur);

[PATCH] logformat annotation fixes

Reply via email to