Hi all,
This patch is a thing to let crm_mon display the attribute value of the node. The details argued in the following threads. http://www.gossamer-threads.com/lists/linuxha/pacemaker/59168 This function applies a patch to crm_mon.c to use it, and please install crm_mon. This patch is a change set of stable-1.0: I can apply to 15377. And I attach a "-A" option, and please carry out crm_mon. The main function is as follows. * I added a function to display the attribute value of the node. - When abnormality occurred in an attribute value of pingd; of the function to display a warning message added it. - Of the function that a list shows a warning message for indication of "fail-count" added it. * Originally I deleted it because the function to display an attribute value of implemented pingd was redundant. The attribute to show below excludes you from indication. * probe_complete * shutdown * terminate * fail-count* * last-failure* I want you to merge it into a source tree if you do not have any problem. Best Regards, IIDA Yuusuke I show below indication example. # crm_mon -fA1 ============ Last updated: 26 Fri Feb 13:49:05 2010 Stack: openais Current DC: node1 - partition with quorum Version: 1.0.7-049006f17277+ stable-1.0 tip Two two Nodes configured, expected votes Two Resources configured. ============ Online: [node1 node2] Resource Group: grpDummy prmDummy1 (ocf::heartbeat:Dummy): Started node2 prmDummy2 (ocf::heartbeat:Dummy): Started node2 prmDummy3 (ocf::heartbeat:Dummy): Started node2 Clone Set: clnPingd Started: [node2 node1] Node Attributes: * Node node2: + default_ping_set : 0 : Link is failure!! (Expected=100) * Node node1: + default_ping_set : 100 Migration summary: * Node node2: * Node node1: prmDummy3: migration-threshold=1 fail-count=1: Resource is failure!! Failed actions: prmDummy3_monitor_10000 (node=node1, call=13, rc=7, status=complete): not running #
--- stable-1.0/tools/crm_mon.c 2010-02-26 15:05:25.000000000 +0900 +++ hb_monitor/pacemaker106/tools/crm_mon.c 2010-02-26 15:02:04.000000000 +0900 @@ -82,6 +82,8 @@ gboolean print_failcount = FALSE; gboolean print_operations = FALSE; gboolean print_timing = FALSE; +gboolean print_nodes_attr = FALSE; +#define FILTER_STR {"shutdown", "terminate", "fail-count", "last-failure", "probe_complete", NULL} gboolean log_diffs = FALSE; gboolean log_updates = FALSE; @@ -247,11 +249,12 @@ {"mail-to", 1, 0, 'T', "Send Mail alerts to this user. See also --mail-from, --mail-host, --mail-prefix", !ENABLE_ESMTP}, {"-spacer-", 1, 0, '-', "\nDisplay Options:"}, - {"group-by-node", 0, 0, 'n', "Group resources by node" }, - {"inactive", 0, 0, 'r', "Display inactive resources" }, - {"failcounts", 0, 0, 'f', "Display resource fail counts"}, - {"operations", 0, 0, 'o', "Display resource operation history" }, - {"timing-details", 0, 0, 't', "Display resource operation history with timing details\n" }, + {"group-by-node", 0, 0, 'n', "\tGroup resources by node" }, + {"inactive", 0, 0, 'r', "\tDisplay inactive resources" }, + {"failcounts", 0, 0, 'f', "\tDisplay resource fail counts"}, + {"operations", 0, 0, 'o', "\tDisplay resource operation history" }, + {"timing-details", 0, 0, 't', "\tDisplay resource operation history with timing details" }, + {"show-node-attributes", 0, 0, 'A', "Display node attributes\n" }, {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, {"interval", 1, 0, 'i', "\tUpdate frequency in seconds" }, @@ -297,7 +300,7 @@ pid_file = crm_strdup("/tmp/ClusterMon.pid"); crm_log_init(basename(argv[0]), LOG_CRIT, FALSE, FALSE, 0, NULL); - crm_set_options("V?$i:nrh:dp:s1wx:oftNS:T:F:H:P:E:e:", "mode [options]", long_options, + crm_set_options("V?$i:nrh:dp:s1wx:oftANS:T:F:H:P:E:e:", "mode [options]", long_options, "Provides a summary of cluster's current state." "\n\nOutputs varying levels of detail in a number of different formats.\n"); @@ -343,6 +346,9 @@ case 'f': print_failcount = TRUE; break; + case 'A': + print_nodes_attr = TRUE; + break; case 'p': crm_free(pid_file); pid_file = crm_strdup(optarg); @@ -570,17 +576,6 @@ extern int get_failcount(node_t *node, resource_t *rsc, int *last_failure, pe_working_set_t *data_set); -static void get_ping_score(node_t *node, pe_working_set_t *data_set) -{ - const char *attr = "pingd"; - const char *value = NULL; - value = g_hash_table_lookup(node->details->attrs, attr); - - if(value != NULL) { - print_as(" %s=%s", attr, value); - } -} - static void print_date(time_t time) { int lpc = 0; @@ -622,6 +617,11 @@ print_date(last_failure); } + if(failcount > 0) { + printed = TRUE; + print_as(": Resource is failure!!"); + } + if(printed) { print_as("\n"); } @@ -718,6 +718,84 @@ g_list_free(sorted_op_list); } +static void print_attr_msg(node_t *node, GListPtr rsc_list, const char *attrname, const char *attrvalue) +{ + slist_iter(rsc, resource_t, rsc_list, lpc2, + + if(rsc->children != NULL) { + print_attr_msg(node, rsc->children, attrname, attrvalue); + } + + if(safe_str_eq("pingd", g_hash_table_lookup(rsc->meta, "type"))) { + const char *name = "pingd"; + const char *multiplier = NULL; + char **host_list = NULL; + int host_list_num = 0; + int expected_score = 0; + + if(g_hash_table_lookup(rsc->meta, "name") != NULL) { + name = g_hash_table_lookup(rsc->meta, "name"); + } + + /* To identify the resource with the attribute name. */ + if(safe_str_eq(name, attrname)) { + multiplier = g_hash_table_lookup(rsc->meta, "multiplier"); + host_list = g_strsplit(g_hash_table_lookup(rsc->meta, "host_list"), " ", 0); + host_list_num = g_strv_length(host_list); + g_strfreev(host_list); + /* pingd multiplier is the same as the default value. */ + expected_score = host_list_num * crm_parse_int(multiplier, "1"); + + /* pingd is abnormal score. */ + if(crm_parse_int(attrvalue, "0") < expected_score) { + print_as("\t: Link is failure!! (Expected=%d)", expected_score); + } + } + } + ); +} + +static void print_nodes_attribute(pe_working_set_t *data_set) +{ + xmlNode *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); + xmlNode *trns_attr = NULL; + int i; + const char *filt_str[] = FILTER_STR; + + print_as("\nNode Attributes:\n"); + + xml_child_iter_filter( + cib_status, node_state, XML_CIB_TAG_STATE, + node_t *node = pe_find_node_id(data_set->nodes, ID(node_state)); + if(node == NULL || node->details->online == FALSE){ + continue; + } + + print_as("* Node %s:\n", crm_element_value(node_state, XML_ATTR_UNAME)); + + trns_attr = find_xml_node(node_state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); + trns_attr = find_xml_node(trns_attr, XML_TAG_ATTR_SETS, FALSE); + + xml_child_iter(trns_attr, nvpair, + const char *node_attr_name = crm_element_value(nvpair, XML_NVPAIR_ATTR_NAME); + const char *node_attr_value = crm_element_value(nvpair, XML_NVPAIR_ATTR_VALUE); + + /* filtering automatic attributes */ + for(i=0;filt_str[i]!=NULL;i++) { + if(g_str_has_prefix(node_attr_name, filt_str[i])) { + goto filtering; + } + } + + print_as(" + %-32s\t: %-10s", node_attr_name, node_attr_value); + print_attr_msg(node, node->details->running_rsc, node_attr_name, node_attr_value); + print_as("\n"); +filtering: + continue; + ); + ); +} + static void print_node_summary(pe_working_set_t *data_set, gboolean operations) { xmlNode *lrm_rsc = NULL; @@ -737,7 +815,6 @@ } print_as("* Node %s: ", crm_element_value(node_state, XML_ATTR_UNAME)); - get_ping_score(node, data_set); print_as("\n"); lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); @@ -953,6 +1030,10 @@ ); } + if(print_nodes_attr) { + print_nodes_attribute(data_set); + } + if(print_operations || print_failcount) { print_node_summary(data_set, print_operations); }
_______________________________________________ Pacemaker mailing list Pacemaker@oss.clusterlabs.org http://oss.clusterlabs.org/mailman/listinfo/pacemaker