Hi,
I wrote a patch for finding similar messages in the logcheck output and
compress them.
e.g.:
May 11 13:10:16 foonode funkydaemon: My error message 12345
May 11 13:21:17 barnode funkydaemon: My error message 543
[...]
are compressed to:
May 11 13:10:16 foonode funkydaemon: My error message 12345
Similar messages appear 42 times
messages are similar when they only differ in: logdate, lognode, any number and
any 16-digit hex number.
These 'compressed' messages are put in a different section (e.g. "System
Events, High Rate") and removed from the normal section ("System Events")
For us this reduces (valid) error messages from the lustre filesystem to a
readable amount. And it helps for other floodings, too :)
Attachments:
- logcheck_ratelimit.patch for /usr/sbin/logcheck
- logcheck_ratelimit_conf.patch for /etc/logcheck/logcheck.conf
two new options:
RATELIMIT=1 (or 0) to turn this feature on/off
RATELIMIT_RATE=5 to enable compress if >= 5 similar messages
does this patch make sense for you?
Mfg
Alexander Elbs
--
Alexander Elbs *** eMail [EMAIL PROTECTED]
--- logcheck 2007-05-05 16:03:32.000000000 +0200
+++ logcheck2 2007-05-11 12:55:53.000000000 +0200
@@ -233,26 +233,113 @@
fi
}
+# Rate limit reports
+ratelimit() {
+ # Messages are similar if they only differ in logdate,
+ # lognode, any number, any 16-digit hex number
+
+ SED_RATE_HEADER='^[A-Za-z]\{3\} [ :0-9]\{11\} [\._\/a-zA-Z0-9-]\+'
+ SED_RATE_HEX16='[0-9a-fA-F]\{16\}'
+ SED_RATE_NUMBER='[0-9\.]\+'
+
+ GREP_RATE_HEADER='^\\w{3} [ :0-9]{11} [\._\/[:alnum:]-]+'
+ GREP_RATE_HEX16='[0-9a-fA-F]{16}'
+ GREP_RATE_NUMBER='[0-9\\.]+'
+
+ # we make all messages 'similar'
+ cat $TMPDIR/checked |
+ sed -e "s/$SED_RATE_HEADER/__LOGHEADER__/" \
+ -e "s/$SED_RATE_HEX16/__HEX__/g" \
+ -e "s/$SED_RATE_NUMBER/__NUMBER__/g" |
+ # sort these messages and look for there rate, highest rate first
+ sort | uniq -c | sort -nr |
+ while read LINE; do
+ # we extract number of occurences and the messages
+ NUMLINES=$(echo "$LINE" | awk '{ print $1 }')
+ DATA=$(echo "$LINE" | sed 's/^[0-9]* //')
+ # if occurences is higher than the minimal rate we
+ # generate a regular expression that matches these
+ # messages
+ if [ "$NUMLINES" -ge "$RATELIMIT_RATE" ]; then
+ # escape special chars and transform back the placeholders
+ # to regular expressions
+ echo "$DATA" | sed -e 's/\([][()+*^]\)/\\\1/g' \
+ -e "s/__LOGHEADER__/$GREP_RATE_HEADER/" \
+ -e "s/__HEX__/$GREP_RATE_HEX16/g" \
+ -e "s/__NUMBER__/$GREP_RATE_NUMBER/g" \
+ -e 's/$/\$/'
+ fi
+ done > $TMPDIR/rateregex
+
+ # we need one regex per file
+ split -d -l 1 -a 4 $TMPDIR/rateregex $TMPDIR/rateregex.
+
+ # if we have at least one regex we iterate over them
+ if [ -e $TMPDIR/rateregex.0000 ]; then
+ for f in $TMPDIR/rateregex.*; do
+ egrep -f $f $TMPDIR/checked | head -n 1
+ COUNT=$(egrep -c -f $f $TMPDIR/checked)
+ echo "Similar messages appear $COUNT times"
+ if [ "$COUNT" -eq 0 ]; then
+ error "On ratelimit regex didn't match: $(cat $f)"
+ fi
+ done
+ fi > $TMPDIR/checked.rate
+
+ # remove all similar lines
+ egrep -v -f $TMPDIR/rateregex $TMPDIR/checked > $TMPDIR/checked.norate
+
+ rm -f $TMPDIR/rateregex*
+}
+
# Add any events to the report
report() {
if [ -s $TMPDIR/checked ]; then
- printheader "$*" >> $TMPDIR/report \
- || error "Could not append to report."
if [ $SYSLOGSUMMARY -eq 1 ] && [ -x $SYSLOG_SUMMARY ]; then
+ printheader "$*" >> $TMPDIR/report \
+ || error "Could not append to report."
debug "report: running syslog-summary - $*"
$SYSLOG_SUMMARY $TMPDIR/checked | \
egrep -v "^Summarizing " | cat >> $TMPDIR/report \
|| error "Could not append to report."
+ echo >> $TMPDIR/report \
+ || error "Could not append to report."
else
if [ $SYSLOGSUMMARY -eq 1 ] && [ ! -x $SYSLOG_SUMMARY ]; then
debug "report : WARNING : can't exec $SYSLOG_SUMMARY. Running without summary"
fi
- debug "report: cat'ing - $*"
- cat $TMPDIR/checked >> $TMPDIR/report \
- || error "Could not append to report."
+ if [ $RATELIMIT -eq 1 ]; then
+ ratelimit
+ if [ -s $TMPDIR/checked.rate ]; then
+ printheader "$*, High Rate" >> $TMPDIR/report \
+ || error "Could not append to report."
+ debug "report: cat'ing - $*, High Rate"
+ cat $TMPDIR/checked.rate >> $TMPDIR/report \
+ || error "Could not append to report."
+ echo >> $TMPDIR/report \
+ || error "Could not append to report."
+ rm -f $TMPDIR/checked.rate
+ fi
+ if [ -s $TMPDIR/checked.norate ]; then
+ printheader "$*" >> $TMPDIR/report \
+ || error "Could not append to report."
+ debug "report: cat'ing - $*"
+ cat $TMPDIR/checked.norate >> $TMPDIR/report \
+ || error "Could not append to report."
+ echo >> $TMPDIR/report \
+ || error "Could not append to report."
+ rm -f $TMPDIR/checked.norate
+ fi
+ else
+ printheader "$*" >> $TMPDIR/report \
+ || error "Could not append to report."
+ debug "report: cat'ing - $*"
+ cat $TMPDIR/checked >> $TMPDIR/report \
+ || error "Could not append to report."
+ echo >> $TMPDIR/report \
+ || error "Could not append to report."
+ fi
fi
- echo >> $TMPDIR/report \
- || error "Could not append to report."
return 0
else
return 1
--- logcheck.conf 2007-05-11 13:10:35.000000000 +0200
+++ logcheck.conf2 2007-05-11 13:10:28.000000000 +0200
@@ -63,3 +63,10 @@
# Controls [logcheck] prefix on Subject: lines
# ADDTAG="no"
+
+# Rate limiting looks for similar messages and compresses them
+# to a single message and the amount of recurrences
+RATELIMIT=1
+
+# only compress if >= RATELIMIT_RATE similar messages
+RATELIMIT_RATE=5
_______________________________________________
Logcheck-devel mailing list
[email protected]
http://lists.alioth.debian.org/mailman/listinfo/logcheck-devel