Hi lists,
I'm posting another patch for IPv6addr (changeset:cf020d609b57).
Its role is the following.
Check whether the VIP is available _before_ assigning it in start
operation, and if the address is already available, exit with error.
This behavior is to take precautions against SplitBrain.
With the former behavior, when SplitBrain occurs,
though it's a fraction of a second,
same VIPs are assigned on two or more nodes at the same time.
Any comments and suggestions are really appreciated.
Best Regards,
Kazunori INOUE
diff -urN linux-ha-dev.org/resources/OCF/IPv6addr.c
linux-ha-dev.mod/resources/OCF/IPv6addr.c
--- linux-ha-dev.org/resources/OCF/IPv6addr.c 2009-07-30 10:01:41.000000000
+0900
+++ linux-ha-dev.mod/resources/OCF/IPv6addr.c 2009-07-30 17:11:53.000000000
+0900
@@ -93,6 +93,8 @@
#include <clplumbing/cl_log.h>
#include <libnet.h>
+#include <linux/errqueue.h>
+#include <linux/filter.h>
#define PIDFILE_BASE HA_VARRUNDIR "/IPv6addr-"
@@ -122,6 +124,12 @@
#define OCF_ERR_CONFIGURED 6
#define OCF_NOT_RUNNING 7
+#define DEFDATALEN (64 - 8) /* default data length
*/
+#define MAXWAIT 10 /* max seconds to wait
for response */
+#define MININTERVAL 10 /* Minimal interpacket
gap */
+#define SCHINT(a) (((a) <= MININTERVAL) ? MININTERVAL :
(a))
+#define MAXPACKET 128000 /* max packet size */
+
const char* IF_INET6 = "/proc/net/if_inet6";
const char* APP_NAME = "IPv6addr";
@@ -145,6 +153,25 @@
unsigned int ifr6_ifindex;
};
+/* counters */
+long npackets = 1; /* packets to transmit */
+long nreceived; /* # of packets we got back */
+long ntransmitted; /* sequence # for outbound packets = #sent */
+long nerrors; /* icmp errors */
+__u16 acked;
+int once;
+volatile int exiting;
+
+int interval = 1000; /* interval between packets (msec) */
+int lingertime = MAXWAIT*1000;
+unsigned long waittime;
+
+int datalen = DEFDATALEN;
+int ident; /* process id to identify our packets */
+int icmp_socket;
+struct sockaddr_in6 whereto; /* who to ping */
+u_char outpacket[MAXPACKET];
+
static int start_addr6(struct in6_addr* addr6, int prefix_len);
static int stop_addr6(struct in6_addr* addr6, int prefix_len);
static int status_addr6(struct in6_addr* addr6, int prefix_len);
@@ -167,6 +194,21 @@
int is_addr6_available(struct in6_addr* addr6);
static int send_ua(struct in6_addr* src_ip, char* if_name);
+static inline void set_signal(int signo, void (*handler)(int));
+static void sigexit(int signo);
+static int schedule_exit(int next);
+int is_addr6_running(char *target);
+static int send_probe(void);
+static int pinger(void);
+static int receive_error_msg(void);
+static int parse_reply(struct msghdr *msg, int len, void *addr);
+static void install_filter(void);
+static void setup(int icmp_socket);
+static int main_loop(int icmp_socket, __u8 *buf, int buflen);
+char* pr_icmph(__u8 type, __u8 code, __u32 info);
+char* pr_addr(struct in6_addr *addr);
+char* pr_addr_n(struct in6_addr *addr);
+
int
main(int argc, char* argv[])
{
@@ -279,6 +321,14 @@
return OCF_SUCCESS;
}
+ for (i = 0; i < 2; i++) {
+ if (is_addr6_running(pr_addr_n(addr6)) > 0) {
+ cl_log(LOG_DEBUG, "%s is running, return
OCF_ERR_GENERIC", pr_addr_n(addr6));
+ return OCF_ERR_GENERIC;
+ }
+ }
+ cl_log(LOG_DEBUG, "%s is NOT running", pr_addr_n(addr6));
+
/* we need to find a proper device to assign the address */
if_name = find_if(addr6, &prefix_len);
if (NULL == if_name) {
@@ -864,3 +914,526 @@
printf("%s\n",meta_data);
return OCF_SUCCESS;
}
+
+static inline void
+set_signal(int signo, void (*handler)(int))
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = (void (*)(int))handler;
+ sigaction(signo, &sa, NULL);
+}
+
+static void
+sigexit(int signo)
+{
+ exiting = 1;
+}
+
+static int
+schedule_exit(int next)
+{
+ struct itimerval it;
+
+ if (waittime)
+ return next;
+
+ waittime = lingertime*1000;
+
+ if (next < 0 || next < waittime/1000)
+ next = waittime/1000;
+
+ it.it_interval.tv_sec = 0;
+ it.it_interval.tv_usec = 0;
+ it.it_value.tv_sec = waittime/1000000;
+ it.it_value.tv_usec = waittime%1000000;
+ setitimer(ITIMER_REAL, &it, NULL);
+ return next;
+}
+
+int
+is_addr6_running(char *target)
+{
+ int hold, packlen;
+ u_char *packet;
+ struct addrinfo hints, *ai;
+ int gai;
+ struct icmp6_filter filter;
+ int ret;
+
+ cl_log(LOG_DEBUG, "%s(): target[%s]", __FUNCTION__, target);
+
+ icmp_socket = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6);
+ if (icmp_socket < 0) {
+ cl_perror("icmp open socket");
+ return -2;
+ }
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_INET6;
+ gai = getaddrinfo(target, NULL, &hints, &ai);
+ if (gai) {
+ cl_log(LOG_ERR, "getaddrinfo(%s) error:%d detected", target,
gai);
+ close(icmp_socket);
+ return -2;
+ }
+
+ memset(&whereto, 0, sizeof(whereto));
+ memcpy(&whereto, ai->ai_addr, sizeof(whereto));
+ whereto.sin6_port = htons(IPPROTO_ICMPV6);
+ freeaddrinfo(ai);
+
+ packlen = datalen + 8 + 40 + 8;
+ if (!(packet = (u_char *)malloc((u_int)packlen))) {
+ fprintf(stderr, "ping: out of memory.\n");
+ close(icmp_socket);
+ return -2;
+ }
+
+ hold = 1;
+ setsockopt(icmp_socket, SOL_IPV6, IPV6_RECVERR, (char *)&hold,
sizeof(hold));
+
+ /* select icmp echo reply as icmp type to receive */
+ ICMP6_FILTER_SETBLOCKALL(&filter);
+ ICMP6_FILTER_SETPASS(ICMP6_ECHO_REPLY, &filter);
+
+ ret = setsockopt(icmp_socket, IPPROTO_ICMPV6, ICMP6_FILTER, &filter,
+ sizeof(struct icmp6_filter));
+ if (ret < 0) {
+ cl_perror("setsockopt(ICMP6_FILTER)");
+ close(icmp_socket);
+ return -2;
+ }
+
+ setup(icmp_socket);
+
+ ret = main_loop(icmp_socket, packet, packlen);
+
+ close(icmp_socket);
+ return ret;
+}
+
+static int
+send_probe(void)
+{
+ struct icmp6_hdr *icmph;
+ int cc;
+ int i;
+
+ icmph = (struct icmp6_hdr *)outpacket;
+ icmph->icmp6_type = ICMP6_ECHO_REQUEST;
+ icmph->icmp6_code = 0;
+ icmph->icmp6_cksum = 0;
+ icmph->icmp6_seq = htons(ntransmitted+1);
+ icmph->icmp6_id = ident;
+
+ cc = datalen + 8; /* skips ICMP portion */
+ i = sendto(icmp_socket, (char *)outpacket, cc, 0,
+ (struct sockaddr *) &whereto, sizeof(struct sockaddr_in6));
+ return (cc == i ? 0 : i);
+}
+
+/*
+ * pinger --
+ * Compose and transmit an ICMP ECHO REQUEST packet. The IP packet
+ * will be added on by the kernel. The ID field is our UNIX process ID,
+ * and the sequence number is an ascending integer. The first 8 bytes
+ * of the data portion are used to hold a UNIX "timeval" struct in VAX
+ * byte-order, to compute the round-trip time.
+ */
+static int
+pinger(void)
+{
+ int i;
+
+ /* Have we already sent enough? If we have, return an arbitrary
positive value. */
+ if (exiting || (ntransmitted >= npackets)) {
+ return 1000;
+ }
+
+resend:
+ i = send_probe();
+ if (i == 0) {
+ ntransmitted++;
+ return interval;
+ }
+
+ /* And handle various errors... */
+ if (i > 0) {
+ /* Apparently, it is some fatal bug. */
+ abort();
+ } else if (errno == EAGAIN) {
+ /* Socket buffer is full. */
+ return MININTERVAL;
+ } else {
+ if ((i=receive_error_msg()) > 0) {
+ /* An ICMP error arrived. */
+ return MININTERVAL;
+ }
+ if (!errno)
+ goto resend;
+ }
+ /* Hard local error. Pretend we sent packet. */
+ ntransmitted++;
+
+ if (i == 0) {
+ cl_perror("sendmsg");
+ }
+ return SCHINT(interval);
+}
+
+static int
+receive_error_msg()
+{
+ int res;
+ char cbuf[512];
+ struct iovec iov;
+ struct msghdr msg;
+ struct cmsghdr *cmsg;
+ struct sock_extended_err *e;
+ struct icmp6_hdr icmph;
+ struct sockaddr_in6 target;
+ int net_errors = 0;
+ int local_errors = 0;
+ int saved_errno = errno;
+
+ iov.iov_base = &icmph;
+ iov.iov_len = sizeof(icmph);
+ msg.msg_name = (void*)⌖
+ msg.msg_namelen = sizeof(target);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_flags = 0;
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+
+ res = recvmsg(icmp_socket, &msg, MSG_ERRQUEUE|MSG_DONTWAIT);
+ if (res < 0)
+ goto out;
+
+ e = NULL;
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_IPV6) {
+ if (cmsg->cmsg_type == IPV6_RECVERR)
+ e = (struct sock_extended_err *)CMSG_DATA(cmsg);
+ }
+ }
+ if (e == NULL)
+ abort();
+
+ if (e->ee_origin == SO_EE_ORIGIN_LOCAL) {
+ local_errors++;
+ if (e->ee_errno != EMSGSIZE)
+ cl_log(LOG_ERR, "local error: %s",
strerror(e->ee_errno));
+ else
+ cl_log(LOG_ERR, "local error: Message too long,
mtu=%u", e->ee_info);
+ nerrors++;
+ } else if (e->ee_origin == SO_EE_ORIGIN_ICMP6) {
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6*)(e+1);
+
+ if (res < sizeof(icmph) ||
+ memcmp(&target.sin6_addr, &whereto.sin6_addr, 16) ||
+ icmph.icmp6_type != ICMP6_ECHO_REQUEST ||
+ icmph.icmp6_id != ident) {
+ /* Not our error, not an error at all. Clear. */
+ saved_errno = 0;
+ goto out;
+ }
+ net_errors++;
+ nerrors++;
+ cl_log(LOG_DEBUG, "from %s icmp_seq=%u %s",
pr_addr(&sin6->sin6_addr),
+ ntohs(icmph.icmp6_seq), pr_icmph(e->ee_type,
e->ee_code, e->ee_info));
+ }
+
+out:
+ errno = saved_errno;
+ return net_errors ? : -local_errors;
+}
+
+/*
+ * parse_reply --
+ * Print out the packet, if it came from us. This logic is necessary
+ * because ALL readers of the ICMP socket get a copy of ALL ICMP packets
+ * which arrive ('tis only fair). This permits multiple copies of this
+ * program to be run without having intermingled output (or statistics!).
+ */
+static int
+parse_reply(struct msghdr *msg, int cc, void *addr)
+{
+ struct sockaddr_in6 *from = addr;
+ __u8 *buf = msg->msg_iov->iov_base;
+ struct cmsghdr *c;
+ struct icmp6_hdr *icmph;
+
+ for (c = CMSG_FIRSTHDR(msg); c; c = CMSG_NXTHDR(msg, c)) {
+ if (c->cmsg_level != SOL_IPV6)
+ continue;
+ }
+
+ /* Now the ICMP part */
+ icmph = (struct icmp6_hdr *)buf;
+ if (cc < 8) {
+ return 1;
+ }
+
+ if (icmph->icmp6_type == ICMP6_ECHO_REPLY) {
+ if (icmph->icmp6_id != ident)
+ return 1;
+ ++nreceived;
+ if (ntohs(icmph->icmp6_seq) > acked)
+ acked = ntohs(icmph->icmp6_seq);
+ cl_log(LOG_DEBUG, "%d bytes from %s: icmp_seq=%u",
+ cc, pr_addr(&from->sin6_addr), ntohs(icmph->icmp6_seq));
+ }
+ return 0;
+}
+
+static void
+install_filter(void)
+{
+ static struct sock_filter insns[] = {
+ BPF_STMT(BPF_LD|BPF_H|BPF_ABS, 4), /* Load icmp echo ident
*/
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0xAAAA, 0, 1), /* Ours? */
+ BPF_STMT(BPF_RET|BPF_K, ~0U), /* Yes, it passes. */
+ BPF_STMT(BPF_LD|BPF_B|BPF_ABS, 0), /* Load icmp type */
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, ICMP6_ECHO_REPLY, 1, 0),
/* Echo? */
+ BPF_STMT(BPF_RET|BPF_K, ~0U), /* No. It passes. This
must not happen. */
+ BPF_STMT(BPF_RET|BPF_K, 0), /* Echo with wrong
ident. Reject. */
+ };
+ static struct sock_fprog filter = {
+ sizeof insns / sizeof(insns[0]),
+ insns
+ };
+
+ if (once)
+ return;
+ once = 1;
+
+ /* Patch bpflet for current identifier. */
+ insns[1] = (struct sock_filter)BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
htons(ident), 0, 1);
+
+ if (setsockopt(icmp_socket, SOL_SOCKET, SO_ATTACH_FILTER, &filter,
sizeof(filter)))
+ cl_log(LOG_WARNING, "failed to install socket filter");
+}
+
+/* Protocol independent setup and parameter checks. */
+static void
+setup(int icmp_socket)
+{
+ struct timeval tv;
+ int i;
+ u_char *p = outpacket+8;
+
+ /* Set some SNDTIMEO to prevent blocking forever
+ * on sends, when device is too slow or stalls. Just put limit
+ * of one second, or "interval", if it is less.
+ */
+ tv.tv_sec = 1;
+ tv.tv_usec = 0;
+ setsockopt(icmp_socket, SOL_SOCKET, SO_SNDTIMEO, (char*)&tv,
sizeof(tv));
+ setsockopt(icmp_socket, SOL_SOCKET, SO_RCVTIMEO, (char*)&tv,
sizeof(tv));
+
+ acked = 0;
+ nreceived = 0;
+ ntransmitted = 0;
+ nerrors = 0;
+ exiting = 0;
+ once = 0;
+ waittime = 0;
+
+ memset(outpacket, 0, MAXPACKET);
+ for (i = 0; i < datalen; ++i)
+ *p++ = i;
+
+ ident = getpid() & 0xFFFF;
+
+ set_signal(SIGINT, sigexit);
+ set_signal(SIGALRM, sigexit);
+}
+
+static int
+main_loop(int icmp_socket, __u8 *packet, int packlen)
+{
+ char addrbuf[128];
+ char ans_data[4096];
+ struct iovec iov;
+ struct msghdr msg;
+ int cc;
+ int next;
+
+ iov.iov_base = (char *)packet;
+
+ for (;;) {
+ /* Check exit conditions. */
+ if (exiting) {
+ break;
+ }
+ if (npackets && nreceived + nerrors >= npackets) {
+ break;
+ }
+
+ /* Send probes scheduled to this time. */
+ do {
+ next = pinger();
+ if (ntransmitted >= npackets)
+ next = schedule_exit(next);
+ } while (next <= 0);
+
+ /* "next" is time to send next probe, if positive.
+ * If next<=0 send now or as soon as possible. */
+
+ for (;;) {
+ int not_ours = 0; /* Raw socket can receive messages
+ * destined to other running pings. */
+
+ iov.iov_len = packlen;
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_name = addrbuf;
+ msg.msg_namelen = sizeof(addrbuf);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = ans_data;
+ msg.msg_controllen = sizeof(ans_data);
+
+ cc = recvmsg(icmp_socket, &msg, 0);
+ if (cc < 0) {
+ if (errno == EAGAIN || errno == EINTR)
+ break;
+ if (!receive_error_msg()) {
+ if (errno) {
+ cl_perror("recvmsg");
+ break;
+ }
+ not_ours = 1;
+ }
+ } else {
+ not_ours = parse_reply(&msg, cc, addrbuf);
+ }
+
+ /* See? ... someone runs another ping on this host. */
+ if (not_ours)
+ install_filter();
+
+ /* If nothing is in flight, "break" returns us to
pinger. */
+ if ((ntransmitted-acked) == 0) {
+ break;
+ }
+
+ /* Otherwise, try to recvmsg() again. recvmsg()
+ * is nonblocking after the first iteration, so that
+ * if nothing is queued, it will receive EAGAIN
+ * and return to pinger. */
+ }
+ }
+ if (nreceived > 0 && (nreceived == ntransmitted)) {
+ return 1;
+ }
+ return 0;
+}
+
+char*
+pr_icmph(__u8 type, __u8 code, __u32 info)
+{
+ static char dest[256];
+ char buf[128];
+
+ switch(type) {
+ case ICMP6_DST_UNREACH:
+ strcpy(dest, "Destination unreachable: ");
+ switch (code) {
+ case ICMP6_DST_UNREACH_NOROUTE:
+ strcat(dest, "No route");
+ break;
+ case ICMP6_DST_UNREACH_ADMIN:
+ strcat(dest, "Administratively prohibited");
+ break;
+ case ICMP6_DST_UNREACH_BEYONDSCOPE:
+ strcat(dest, "Beyond scope of source address");
+ break;
+ case ICMP6_DST_UNREACH_ADDR:
+ strcat(dest, "Address unreachable");
+ break;
+ case ICMP6_DST_UNREACH_NOPORT:
+ strcat(dest, "Port unreachable");
+ break;
+ default:
+ sprintf(buf, "Unknown code %d", code);
+ strcat(dest, buf);
+ break;
+ }
+ break;
+ case ICMP6_PACKET_TOO_BIG:
+ sprintf(dest, "Packet too big: mtu=%u", info);
+ if (code) {
+ sprintf(buf, ", code=%d", code);
+ strcat(dest, buf);
+ }
+ break;
+ case ICMP6_TIME_EXCEEDED:
+ strcpy(dest, "Time exceeded: ");
+ if (code == ICMP6_TIME_EXCEED_TRANSIT) {
+ strcat(dest, "Hop limit");
+ } else if (code == ICMP6_TIME_EXCEED_REASSEMBLY) {
+ strcat(dest, "Defragmentation failure");
+ } else {
+ sprintf(buf, "code %d", code);
+ strcat(dest, buf);
+ }
+ break;
+ case ICMP6_PARAM_PROB:
+ strcpy(dest, "Parameter problem: ");
+ if (code == ICMP6_PARAMPROB_HEADER) {
+ strcat(dest, "Wrong header field ");
+ } else if (code == ICMP6_PARAMPROB_NEXTHEADER) {
+ strcat(dest, "Unknown header ");
+ } else if (code == ICMP6_PARAMPROB_OPTION) {
+ strcat(dest, "Unknown option ");
+ } else {
+ sprintf(buf, "code %d ", code);
+ strcat(dest, buf);
+ }
+ sprintf(buf, "at %u", info);
+ strcat(dest, buf);
+ break;
+ case ICMP6_ECHO_REQUEST:
+ strcpy(dest, "Echo request");
+ break;
+ case ICMP6_ECHO_REPLY:
+ strcpy(dest, "Echo reply");
+ break;
+ case MLD_LISTENER_QUERY:
+ strcpy(dest, "MLD Query");
+ break;
+ case MLD_LISTENER_REPORT:
+ strcpy(dest, "MLD Report");
+ break;
+ case MLD_LISTENER_REDUCTION:
+ strcpy(dest, "MLD Reduction");
+ break;
+ default:
+ strcpy(dest, "unknown icmp type");
+ }
+ return dest;
+}
+
+/*
+ * pr_addr --
+ * Return an ascii host address as a dotted quad and optionally with
+ * a hostname.
+ */
+char*
+pr_addr(struct in6_addr *addr)
+{
+ struct hostent *hp = NULL;
+ hp = gethostbyaddr((__u8*)addr, sizeof(struct in6_addr), AF_INET6);
+ return hp ? hp->h_name : pr_addr_n(addr);
+}
+
+char*
+pr_addr_n(struct in6_addr *addr)
+{
+ static char str[64];
+ inet_ntop(AF_INET6, addr, str, sizeof(str));
+ return str;
+}
_______________________________________________________
Linux-HA-Dev: [email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/