Hi,
Patch resent to the proper ML (ipmitool instead of openipmi)
Please let me know if this is the proper format/ML :)
Thanks in advance,
Olivier.
--
Hi,
If the SEL buffer become full, IPMI will stop logging new events. There is a
mechanism in the IPMI specification to help monitoring the status of the SEL
buffer and a flag that indicates when the buffer is full so that events
had to
be dropped. This is part of the data returned by "Get SEL Info Command" as
defined in the IPMI 2.0 specification (section 31.2, page 386).
The attached patch improves "ipmievd" to monitor the percentage used in
the SEL
buffer and log warnings when the percentage is above a 80% and every
time the
percentage used increases above 80% and also log another alert syslog
message
when "overflow" occurs.
1) If the percentage used is above 80% a LOG_WARNING is emitted:
"SEL buffer used at nn%, please consider clearing the SEL buffer"
2) A new LOG_WARNING is emitted for any increase of percentage used
above 80%
(eg. a new messages will be logged for 81%, 82%, ... 99%)
3) If the percentage decreases, no warning is emitted
4) If the "overflow" flag is set, a LOG_ALERT is emitted
"SEL buffer overflow, no SEL message can be logged until the SEL buffer is
cleared"
Then it would be the sysadmin's responsibility to clear the SEL buffer
(possibly
after saving the log of events to a file), but at least "ipmievd" provides a
mechanism to help diagnosing before the SEL buffer becomes full.
Cheers,
Olivier
Proposed patch:
Improve "ipmievd" to monitor the percentage used in the SEL buffer and log
a warning every time the percentage used increases above 80% and log another
alert syslog message when the SEL buffer is full (and no other message can be
logged).
Olivier Fourdan <[EMAIL PROTECTED]>
--- ipmitool-1.8.9/src/ipmievd.c.checkoverflow 2008-07-22 08:36:34.000000000 +0100
+++ ipmitool-1.8.9/src/ipmievd.c 2008-07-22 09:55:39.000000000 +0100
@@ -75,6 +75,7 @@
#include <ipmitool/ipmi_strings.h>
#include <ipmitool/ipmi_main.h>
+#define WARNING_THRESHOLD 80
#define DEFAULT_PIDFILE _PATH_VARRUN "ipmievd.pid"
char pidfile[64];
@@ -83,6 +84,8 @@
int csv_output = 0;
uint16_t selwatch_count = 0; /* number of entries in the SEL */
uint16_t selwatch_lastid = 0; /* current last entry in the SEL */
+int selwatch_pctused = 0; /* current percent usage in the SEL */
+int selwatch_overflow = 0; /* SEL overflow */
int selwatch_timeout = 10; /* default to 10 seconds */
/* event interface definition */
@@ -98,6 +101,13 @@
struct ipmi_intf * intf;
};
+/* Data from SEL we are interested in */
+typedef struct sel_data {
+ uint16_t entries;
+ int pctused;
+ int overflow;
+} sel_data;
+
static void log_event(struct ipmi_event_intf * eintf, struct sel_event_record * evt);
/* ~~~~~~~~~~~~~~~~~~~~~~ openipmi ~~~~~~~~~~~~~~~~~~~~ */
@@ -183,6 +193,20 @@
return NULL;
}
+static int
+compute_pctfull(uint16_t entries, uint16_t freespace)
+{
+ int pctfull = 0;
+
+ if (entries) {
+ entries *= 16;
+ freespace += entries;
+ pctfull = (int)(100 * ( (double)entries / (double)freespace ));
+ }
+ return pctfull;
+}
+
+
static void
log_event(struct ipmi_event_intf * eintf, struct sel_event_record * evt)
{
@@ -459,11 +483,12 @@
/*************************************************************************/
/** SEL Watch Functions **/
/*************************************************************************/
-static uint16_t
-selwatch_get_count(struct ipmi_intf * intf)
+static int
+selwatch_get_data(struct ipmi_intf * intf, struct sel_data *data)
{
struct ipmi_rs * rsp;
struct ipmi_rq req;
+ uint16_t freespace;
memset(&req, 0, sizeof(req));
req.msg.netfn = IPMI_NETFN_STORAGE;
@@ -480,8 +505,17 @@
return 0;
}
- lprintf(LOG_DEBUG, "SEL count is %d", buf2short(rsp->data+1));
- return buf2short(rsp->data+1);
+ freespace = buf2short(rsp->data + 3);
+ data->entries = buf2short(rsp->data + 1);
+ data->pctused = compute_pctfull (data->entries, freespace);
+ data->overflow = rsp->data[13] & 0x80;
+
+ lprintf(LOG_DEBUG, "SEL count is %d", data->entries);
+ lprintf(LOG_DEBUG, "SEL freespace is %d", freespace);
+ lprintf(LOG_DEBUG, "SEL Percent Used: %d%%\n", data->pctused);
+ lprintf(LOG_DEBUG, "SEL Overflow: %s", data->overflow ? "true" : "false");
+
+ return 1;
}
static uint16_t
@@ -521,14 +555,29 @@
static int
selwatch_setup(struct ipmi_event_intf * eintf)
{
- /* save current sel record count */
- selwatch_count = selwatch_get_count(eintf->intf);
- lprintf(LOG_DEBUG, "Current SEL count is %d", selwatch_count);
-
- /* save current last record ID */
- selwatch_lastid = selwatch_get_lastid(eintf->intf);
- lprintf(LOG_DEBUG, "Current SEL lastid is %04x", selwatch_lastid);
+ struct sel_data data;
+
+ /* save current sel record count */
+ if (selwatch_get_data(eintf->intf, &data)) {
+ selwatch_count = data.entries;
+ selwatch_pctused = data.pctused;
+ selwatch_overflow = data.overflow;
+ lprintf(LOG_DEBUG, "Current SEL count is %d", selwatch_count);
+ /* save current last record ID */
+ selwatch_lastid = selwatch_get_lastid(eintf->intf);
+ lprintf(LOG_DEBUG, "Current SEL lastid is %04x", selwatch_lastid);
+ /* display alert/warning immediatly as startup if relevant */
+ if (selwatch_pctused >= WARNING_THRESHOLD) {
+ lprintf(LOG_WARNING, "SEL buffer used at %d%%, please consider clearing the SEL buffer", selwatch_pctused);
+ }
+ if (selwatch_overflow) {
+ lprintf(LOG_ALERT, "SEL buffer overflow, no SEL message can be logged until the SEL buffer is cleared");
+ }
+
+ return 1;
+ }
+ lprintf(LOG_ERR, "Unable to retrieve SEL data");
return 0;
}
@@ -541,13 +590,29 @@
selwatch_check(struct ipmi_event_intf * eintf)
{
uint16_t old_count = selwatch_count;
- selwatch_count = selwatch_get_count(eintf->intf);
- if (selwatch_count == 0) {
- lprintf(LOG_DEBUG, "SEL count is 0 (old=%d), resetting lastid to 0", old_count);
- selwatch_lastid = 0;
- } else if (selwatch_count < old_count) {
- selwatch_lastid = selwatch_get_lastid(eintf->intf);
- lprintf(LOG_DEBUG, "SEL count lowered, new SEL lastid is %04x", selwatch_lastid);
+ int old_pctused = selwatch_pctused;
+ int old_overflow = selwatch_overflow;
+ struct sel_data data;
+
+ if (selwatch_get_data(eintf->intf, &data)) {
+ selwatch_count = data.entries;
+ selwatch_pctused = data.pctused;
+ selwatch_overflow = data.overflow;
+ if (old_overflow && !selwatch_overflow) {
+ lprintf(LOG_NOTICE, "SEL overflow is cleared");
+ } else if (!old_overflow && selwatch_overflow) {
+ lprintf(LOG_ALERT, "SEL buffer overflow, no new SEL message will be logged until the SEL buffer is cleared");
+ }
+ if ((selwatch_pctused >= WARNING_THRESHOLD) && (selwatch_pctused > old_pctused)) {
+ lprintf(LOG_WARNING, "SEL buffer is %d%% full, please consider clearing the SEL buffer", selwatch_pctused);
+ }
+ if (selwatch_count == 0) {
+ lprintf(LOG_DEBUG, "SEL count is 0 (old=%d), resetting lastid to 0", old_count);
+ selwatch_lastid = 0;
+ } else if (selwatch_count < old_count) {
+ selwatch_lastid = selwatch_get_lastid(eintf->intf);
+ lprintf(LOG_DEBUG, "SEL count lowered, new SEL lastid is %04x", selwatch_lastid);
+ }
}
return (selwatch_count > old_count);
}
-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
_______________________________________________
Ipmitool-devel mailing list
Ipmitool-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/ipmitool-devel