Hi,

I wrote a patch for finding similar messages in the logcheck output and
compress them.
e.g.:
May 11 13:10:16 foonode funkydaemon: My error message 12345
May 11 13:21:17 barnode funkydaemon: My error message 543
[...]

are compressed to:
May 11 13:10:16 foonode funkydaemon: My error message 12345
Similar messages appear 42 times


messages are similar when they only differ in: logdate, lognode, any number and
any 16-digit hex number.

These 'compressed' messages are put in a different section (e.g. "System
Events, High Rate") and removed from the normal section ("System Events")


For us this reduces (valid) error messages from the lustre filesystem to a
readable amount. And it helps for other floodings, too :)

Attachments:
 - logcheck_ratelimit.patch for /usr/sbin/logcheck
 - logcheck_ratelimit_conf.patch for /etc/logcheck/logcheck.conf

two new options:
RATELIMIT=1 (or 0) to turn this feature on/off
RATELIMIT_RATE=5 to enable compress if >= 5 similar messages


does this patch make sense for you?

Mfg
Alexander Elbs

-- 
Alexander Elbs  ***  eMail [EMAIL PROTECTED]
--- logcheck	2007-05-05 16:03:32.000000000 +0200
+++ logcheck2	2007-05-11 12:55:53.000000000 +0200
@@ -233,26 +233,113 @@
     fi
 }
 
+# Rate limit reports
+ratelimit() {
+    # Messages are similar if they only differ in logdate,
+    # lognode, any number, any 16-digit hex number
+
+    SED_RATE_HEADER='^[A-Za-z]\{3\} [ :0-9]\{11\} [\._\/a-zA-Z0-9-]\+'
+    SED_RATE_HEX16='[0-9a-fA-F]\{16\}'
+    SED_RATE_NUMBER='[0-9\.]\+'
+
+    GREP_RATE_HEADER='^\\w{3} [ :0-9]{11} [\._\/[:alnum:]-]+'
+    GREP_RATE_HEX16='[0-9a-fA-F]{16}'
+    GREP_RATE_NUMBER='[0-9\\.]+'
+
+    # we make all messages 'similar'
+    cat $TMPDIR/checked |
+    sed -e "s/$SED_RATE_HEADER/__LOGHEADER__/" \
+        -e "s/$SED_RATE_HEX16/__HEX__/g" \
+        -e "s/$SED_RATE_NUMBER/__NUMBER__/g" |
+    # sort these messages and look for there rate, highest rate first
+    sort | uniq -c | sort -nr | 
+    while read LINE; do
+        # we extract number of occurences and the messages
+        NUMLINES=$(echo "$LINE" | awk '{ print $1 }')
+        DATA=$(echo "$LINE" | sed 's/^[0-9]* //')
+        # if occurences is higher than the minimal rate we 
+        # generate a regular expression that matches these
+        # messages
+        if [ "$NUMLINES" -ge "$RATELIMIT_RATE" ]; then
+            # escape special chars and transform back the placeholders
+            # to regular expressions
+            echo "$DATA" | sed -e 's/\([][()+*^]\)/\\\1/g' \
+                -e "s/__LOGHEADER__/$GREP_RATE_HEADER/" \
+                -e "s/__HEX__/$GREP_RATE_HEX16/g" \
+                -e "s/__NUMBER__/$GREP_RATE_NUMBER/g" \
+                -e 's/$/\$/'
+        fi
+    done > $TMPDIR/rateregex
+
+    # we need one regex per file
+    split -d -l 1 -a 4 $TMPDIR/rateregex $TMPDIR/rateregex.
+
+    # if we have at least one regex we iterate over them
+    if [ -e $TMPDIR/rateregex.0000 ]; then
+        for f in $TMPDIR/rateregex.*; do
+            egrep -f $f $TMPDIR/checked | head -n 1
+            COUNT=$(egrep -c -f $f $TMPDIR/checked)
+            echo "Similar messages appear $COUNT times"
+            if [ "$COUNT" -eq 0 ]; then
+                error "On ratelimit regex didn't match: $(cat $f)"
+            fi
+        done
+    fi > $TMPDIR/checked.rate
+
+    # remove all similar lines
+    egrep -v -f $TMPDIR/rateregex $TMPDIR/checked > $TMPDIR/checked.norate
+
+    rm -f $TMPDIR/rateregex*
+}
+
 # Add any events to the report
 report() {
     if [ -s $TMPDIR/checked ]; then
-	printheader "$*" >> $TMPDIR/report \
-	    || error "Could not append to report."
 	if [ $SYSLOGSUMMARY -eq 1 ] && [ -x $SYSLOG_SUMMARY ]; then
+            printheader "$*" >> $TMPDIR/report \
+	      || error "Could not append to report."
 	    debug "report: running syslog-summary - $*"
 	    $SYSLOG_SUMMARY $TMPDIR/checked | \
 		egrep -v "^Summarizing " | cat >> $TMPDIR/report \
 		    || error "Could not append to report."
+	    echo >> $TMPDIR/report \
+	        || error "Could not append to report."
 	else
 	    if [ $SYSLOGSUMMARY -eq 1 ] && [ ! -x $SYSLOG_SUMMARY ]; then
 	    	debug "report : WARNING : can't exec $SYSLOG_SUMMARY. Running without summary"
 	    fi
-	    debug "report: cat'ing - $*"
-	    cat $TMPDIR/checked >> $TMPDIR/report \
-	        || error "Could not append to report."
+            if [ $RATELIMIT -eq 1 ]; then
+                ratelimit
+	        if [ -s $TMPDIR/checked.rate ]; then
+                    printheader "$*, High Rate" >> $TMPDIR/report \
+                      || error "Could not append to report."
+                    debug "report: cat'ing - $*, High Rate"
+                    cat $TMPDIR/checked.rate >> $TMPDIR/report \
+                      || error "Could not append to report."
+                    echo >> $TMPDIR/report \
+                      || error "Could not append to report."
+                    rm -f $TMPDIR/checked.rate
+                fi
+                if [ -s $TMPDIR/checked.norate ]; then
+                    printheader "$*" >> $TMPDIR/report \
+                      || error "Could not append to report."
+                    debug "report: cat'ing - $*"
+                    cat $TMPDIR/checked.norate >> $TMPDIR/report \
+                      || error "Could not append to report."
+                    echo >> $TMPDIR/report \
+                      || error "Could not append to report."
+                    rm -f $TMPDIR/checked.norate
+                fi
+            else
+                printheader "$*" >> $TMPDIR/report \
+                  || error "Could not append to report."
+	        debug "report: cat'ing - $*"
+                cat $TMPDIR/checked >> $TMPDIR/report \
+                  || error "Could not append to report."
+                echo >> $TMPDIR/report \
+                  || error "Could not append to report."
+            fi
 	fi
-	echo >> $TMPDIR/report \
-	    || error "Could not append to report."
 	return 0
     else
 	return 1
--- logcheck.conf	2007-05-11 13:10:35.000000000 +0200
+++ logcheck.conf2	2007-05-11 13:10:28.000000000 +0200
@@ -63,3 +63,10 @@
 # Controls [logcheck] prefix on Subject: lines
 
 # ADDTAG="no"
+
+# Rate limiting looks for similar messages and compresses them
+# to a single message and the amount of recurrences
+RATELIMIT=1
+
+# only compress if >= RATELIMIT_RATE similar messages
+RATELIMIT_RATE=5
_______________________________________________
Logcheck-devel mailing list
[email protected]
http://lists.alioth.debian.org/mailman/listinfo/logcheck-devel

Reply via email to