16.01.2012 09:20, Andrew Beekhof wrote:
[snip]
>>> At the same time, stonith_admin -B succeeds.
>>> The main difference I see is st_opt_sync_call in a latter case.
>>> Will try to experiment with it.
>>
>> Yeeeesssss!!!
>>
>> Now I see following:
>> Dec 19 11:53:34 vd01-a cluster-dlm: [2474]: info:
>> pacemaker_terminate_member: Requesting that node 1090782474/vd01-b be fenced
>
> So the important question... what did you change?
Nice you're back ;)
+ rc = st->cmds->fence(st, *st_opt_sync_call*, node_uname, "reboot", 120);
attaching my resulting version of pacemaker.c (which still has a lot of
mess because of different approaches I tried to get the result and needs
a cleanup). Function you may look at is pacemaker_terminate_member()
which is almost one-to-one copy of crm_terminate_member_no_mainloop()
except rename of variable to compile without warnings and change of
->fence() arguments.
>
>> Dec 19 11:53:34 vd01-a stonith-ng: [1905]: info:
>> initiate_remote_stonith_op: Initiating remote operation reboot for
>> vd01-b: 21425fc0-4311-40fa-9647-525c3f258471
>> Dec 19 11:53:34 vd01-a stonith-ng: [1905]: info: crm_get_peer: Node
>> vd01-c now has id: 1107559690
>> Dec 19 11:53:34 vd01-a stonith-ng: [1905]: info: stonith_command:
>> Processed st_query from vd01-c: rc=0
>> Dec 19 11:53:34 vd01-a stonith-ng: [1905]: info: crm_get_peer: Node
>> vd01-d now has id: 1124336906
>> Dec 19 11:53:34 vd01-a stonith-ng: [1905]: info: stonith_command:
>> Processed st_query from vd01-d: rc=0
>> Dec 19 11:53:34 vd01-a stonith-ng: [1905]: info: stonith_command:
>> Processed st_query from vd01-a: rc=0
>> Dec 19 11:53:34 vd01-a stonith-ng: [1905]: info: call_remote_stonith:
>> Requesting that vd01-c perform op reboot vd01-b
>> Dec 19 11:53:34 vd01-a stonith-ng: [1905]: info: crm_get_peer: Node
>> vd01-b now has id: 1090782474
>> ...
>> Dec 19 11:53:40 vd01-a stonith-ng: [1905]: info: stonith_command:
>> Processed st_fence_history from cluster-dlm: rc=0
>> Dec 19 11:53:40 vd01-a crmd: [1910]: info: tengine_stonith_notify: Peer
>> vd01-b was terminated (reboot) by vd01-c for vd01-a
>> (ref=21425fc0-4311-40fa-9647-525c3f258471): OK
>>
>> But, then I see minor issue that node is marked to be fenced again:
>> Dec 19 11:53:40 vd01-a pengine: [1909]: WARN: pe_fence_node: Node vd01-b
>> will be fenced because it is un-expectedly down
>
> Do you have logs for that?
> tengine_stonith_notify() got called, that should have been enough to
> get the node cleaned up in the cib.
Ugh, seems like yes, but they are archived already. Will get them back
to nodes and try to compose hb_report for them (but pe inputs are
already lost, do you still need logs without them?)
>
>> ...
>> Dec 19 11:53:40 vd01-a pengine: [1909]: WARN: stage6: Scheduling Node
>> vd01-b for STONITH
>> ...
>> Dec 19 11:53:40 vd01-a crmd: [1910]: info: te_fence_node: Executing
>> reboot fencing operation (249) on vd01-b (timeout=60000)
>> ...
>> Dec 19 11:53:40 vd01-a stonith-ng: [1905]: info: call_remote_stonith:
>> Requesting that vd01-c perform op reboot vd01-b
>>
>> And so on.
>>
>> I can't investigated this one in more depth, because I use fence_xvm in
>> this testing cluster, and it has issues when running more than one
>> stonith resource on a node. Also, my RA (in a cluster where this testing
>> cluster runs) undefines VM after failure, so fence_xvm does not see
>> fencing victim in a qpid and is unable to fence it again.
>>
>> May be it is possible to look if node was just fenced and skip redundant
>> fencing?
>
> If the callbacks are being used correctly, it shouldn't be required
#include <syslog.h>
#include "config.h"
#include "dlm_daemon.h"
#include <glib.h>
#include <bzlib.h>
#include <heartbeat/ha_msg.h>
#include <pacemaker/crm_config.h>
#include <pacemaker/crm/crm.h>
#include <pacemaker/crm/ais.h>
#include <pacemaker/crm/attrd.h>
/* heartbeat support is irrelevant here */
#undef SUPPORT_HEARTBEAT
#define SUPPORT_HEARTBEAT 0
#include <pacemaker/crm/common/cluster.h>
#include <pacemaker/crm/cluster/stack.h>
#include <pacemaker/crm/common/ipc.h>
#include <pacemaker/crm/msg_xml.h>
#include <pacemaker/crm/cib.h>
#include <pacemaker/crm/stonith-ng.h>
#define COMMS_DIR "/sys/kernel/config/dlm/cluster/comms"
int setup_ccs(void)
{
/* To avoid creating an additional place for the dlm to be configured,
* only allow configuration from the command-line until CoroSync is stable
* enough to be used with Pacemaker
*/
cfgd_groupd_compat = 0; /* always use libcpg and disable backward compat */
return 0;
}
void close_ccs(void) { return; }
int get_weight(int nodeid, char *lockspace) { return 1; }
/* TODO: Make this configurable
* Can't use logging.c as-is as whitetank exposes a different logging API
*/
void init_logging(void) {
openlog("cluster-dlm", LOG_PERROR|LOG_PID|LOG_CONS|LOG_NDELAY, LOG_DAEMON);
/* cl_log_enable_stderr(TRUE); */
}
void setup_logging(void) { return; }
void close_logging(void) {
closelog();
}
extern int ais_fd_async;
char *local_node_uname = NULL;
void dlm_process_node(gpointer key, gpointer value, gpointer user_data);
int setup_cluster(void)
{
ais_fd_async = -1;
crm_log_init("cluster-dlm", LOG_INFO, FALSE, TRUE, 0, NULL);
if(init_ais_connection(NULL, NULL, NULL, &local_node_uname, &our_nodeid) == FALSE) {
log_error("Connection to our AIS plugin failed");
return -1;
}
/* Sign up for membership updates */
send_ais_text(crm_class_notify, "true", TRUE, NULL, crm_msg_ais);
/* Requesting the current list of known nodes */
send_ais_text(crm_class_members, __FUNCTION__, TRUE, NULL, crm_msg_ais);
return ais_fd_async;
}
void update_cluster(void)
{
static uint64_t last_membership = 0;
cluster_quorate = crm_have_quorum;
if(last_membership < crm_peer_seq) {
log_debug("Processing membership %llu", crm_peer_seq);
g_hash_table_foreach(crm_peer_id_cache, dlm_process_node, &last_membership);
last_membership = crm_peer_seq;
}
}
void process_cluster(int ci)
{
ais_dispatch(ais_fd_async, NULL);
update_cluster();
}
void close_cluster(void) {
terminate_ais_connection();
}
#include <arpa/inet.h>
#include <corosync/totem/totemip.h>
void dlm_process_node(gpointer key, gpointer value, gpointer user_data)
{
int rc = 0;
struct stat tmp;
char path[PATH_MAX];
crm_node_t *node = value;
uint64_t *last = user_data;
const char *action = "Skipped";
gboolean do_add = FALSE;
gboolean do_remove = FALSE;
gboolean is_active = FALSE;
memset(path, 0, PATH_MAX);
snprintf(path, PATH_MAX, "%s/%d", COMMS_DIR, node->id);
rc = stat(path, &tmp);
is_active = crm_is_member_active(node);
if(rc == 0 && is_active) {
/* nothing to do?
* maybe the node left and came back...
*/
} else if(rc == 0) {
do_remove = TRUE;
} else if(is_active && node->addr) {
do_add = TRUE;
}
if(do_remove) {
action = "Removed";
del_configfs_node(node->id);
}
if(do_add) {
char *addr_copy = strdup(node->addr);
char *addr_top = addr_copy;
char *addr = NULL;
if(do_remove) {
action = "Re-added";
} else {
action = "Added";
}
do {
char ipaddr[1024];
int addr_family = AF_INET;
int cna_len = 0;
struct sockaddr_storage cna_addr;
struct totem_ip_address totem_addr;
addr = strsep(&addr_copy, " ");
if(addr == NULL) {
break;
}
/* do_cmd_get_node_addrs */
if(strstr(addr, "ip(") == NULL) {
continue;
} else if(strchr(addr, ':')) {
rc = sscanf(addr, "ip(%[0-9A-Fa-f:])", ipaddr);
if(rc != 1) {
log_error("Could not extract IPv6 address from '%s'", addr);
continue;
}
addr_family = AF_INET6;
} else {
rc = sscanf(addr, "ip(%[0-9.]) ", ipaddr);
if(rc != 1) {
log_error("Could not extract IPv4 address from '%s'", addr);
continue;
}
}
rc = inet_pton(addr_family, ipaddr, &totem_addr);
if(rc != 1) {
log_error("Could not parse '%s' as in IPv%c address", ipaddr, (addr_family==AF_INET)?'4':'6');
continue;
}
rc = totemip_parse(&totem_addr, ipaddr, addr_family);
if(rc != 0) {
log_error("Could not convert '%s' into a totem address", ipaddr);
continue;
}
rc = totemip_totemip_to_sockaddr_convert(&totem_addr, 0, &cna_addr, &cna_len);
if(rc != 0) {
log_error("Could not convert totem address for '%s' into sockaddr", ipaddr);
continue;
}
log_debug("Adding address %s to configfs for node %u", addr, node->id);
add_configfs_node(node->id, ((char*)&cna_addr), cna_len, (node->id == our_nodeid));
} while(addr != NULL);
free(addr_top);
}
log_debug("%s %sctive node %u: born-on=%llu, last-seen=%llu, this-event=%llu, last-event=%llu",
action, crm_is_member_active(value)?"a":"ina",
node->id, node->born, node->last_seen,
crm_peer_seq, (unsigned long long)*last);
}
int is_cluster_member(int nodeid)
{
crm_node_t *node = crm_get_peer(nodeid, NULL);
return crm_is_member_active(node);
}
char *nodeid2name(int nodeid) {
crm_node_t *node = crm_get_peer(nodeid, NULL);
if(node->uname == NULL) {
return NULL;
}
return strdup(node->uname);
}
static int
pacemaker_terminate_member(int nodeid)
{
int rc = stonith_ok;
const char *node_uname = NULL;
stonith_t *st = NULL;
if (node_uname == NULL) {
crm_node_t *node = crm_get_peer(nodeid, node_uname);
if (node) {
node_uname = node->uname;
}
}
if (node_uname == NULL) {
crm_err("Nothing known about node id=%d", nodeid);
return -1;
} else {
st = stonith_api_new();
}
if (st) {
rc = st->cmds->connect(st, crm_system_name, NULL);
} else {
crm_err("Could not connect to stonith subsystem");
return -1;
}
if (rc == stonith_ok) {
/* Default pacemaker fencing action is "reboot", and admin may remap it to "off" */
crm_info("Requesting that node %d/%s be fenced", nodeid, node_uname);
rc = st->cmds->fence(st, st_opt_sync_call, node_uname, "reboot", 120);
}
st->cmds->disconnect(st);
stonith_api_delete(st);
if (rc < stonith_ok) {
crm_err("Could not fence node %d/%s: %s", nodeid, crm_str(node_uname), stonith_error2string(rc));
rc = 1;
} else {
rc = 0;
}
return rc;
}
void kick_node_from_cluster(int nodeid)
{
int rc = pacemaker_terminate_member(nodeid);
switch(rc) {
case 0:
log_debug("Requested that node %d be kicked from the cluster", nodeid);
break;
case -1:
log_error("Don't know how to kick node %d from the cluster", nodeid);
break;
case 1:
log_error("Could not kick node %d from the cluster", nodeid);
break;
default:
log_error("Unknown result when kicking node %d from the cluster", nodeid);
break;
}
return;
}
int fence_in_progress(int *in_progress)
{
*in_progress = 0;
return 0;
}
int fence_node_time(int nodeid, uint64_t *last_fenced_time)
{
int rc = 0;
const char *node_uname = NULL;
crm_node_t *node = crm_get_peer(nodeid, node_uname);
stonith_history_t *history, *hp = NULL;
stonith_t *st = NULL;
int fencing_requested = 0;
int refence = 0;
if(last_fenced_time) {
*last_fenced_time = 0;
}
if (node && node->uname) {
node_uname = node->uname;
st = stonith_api_new();
} else {
crm_err("Nothing known about node id=%d", nodeid);
return 0;
}
if(st) {
rc = st->cmds->connect(st, crm_system_name, NULL);
}
if(rc == stonith_ok) {
int i = 0;
st->cmds->history(st, st_opt_sync_call, node_uname, &history, 120);
for(hp = history; hp; hp = hp->next, i++) {
if(hp->state == st_done) {
log_debug("Stonith history[%d]: Node %d/%s fenced at %u", i, nodeid, node_uname, hp->completed);
*last_fenced_time = hp->completed;
fencing_requested = 0;
}else if (hp->state == st_failed) {
log_debug("Stonith history[%d]: Fencing of node %d/%s failed, reqesting it again", i, nodeid, node_uname);
kick_node_from_cluster(nodeid);
refence = 1;
fencing_requested = 0;
} else if (hp->state == st_exec) {
log_debug("Stonith history[%d]: Fencing of node %d/%s is in progress", i, nodeid, node_uname);
fencing_requested = 1;
rc = 2;
} else {
log_debug("Stonith history[%d]: Node %d/%s state %d at %d", i, nodeid, node_uname, hp->state, hp->completed);
}
}
}
rc = 0;
if(*last_fenced_time != 0) {
log_debug("Node %d/%s was last shot at: %d", nodeid, node_uname, *last_fenced_time);
} else {
if(!fencing_requested && !refence) {
log_debug("It does not appear node %d/%s has been shot", nodeid, node_uname);
rc = 1;
}
}
if(st) {
st->cmds->disconnect(st);
stonith_api_delete(st);
}
return rc;
}
_______________________________________________
Pacemaker mailing list: [email protected]
http://oss.clusterlabs.org/mailman/listinfo/pacemaker
Project Home: http://www.clusterlabs.org
Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
Bugs: http://bugs.clusterlabs.org