Honza,

This patch looks alright.  I think we also need a further unique value
which represents the cpd address.  The cpd address+nodeid are unique
comparitors for finding cpds.  This can then be used to allow us to
uniquely identify leave messages with the original join messages in the
case where we have more then one handle joined to the same process group
within a process.

Test case attached (which currently fails).

I've looked briefly at solving the problem and found three things.
First do_proc_join's calling of process_info_find should be placed
inside the message_handler_req_exec_cpg_joinlist while loop to allow a
new proc join to be added to the list when cpg_join is called from the
library.

Second the list_del that message_handler_req_exec_cpg_procleave executes
should probably break immediately out of the while loop after list del.

Third the searching for cpds in the join and leave handling needs
another comparison which includes the cpd's address from the join
message contents, otherwise it sets the state to UNJOINED (because it
matches the first cpd in the list) and causes further leaves to fail
after 10-20 are processed.

Regards
-steve


On Wed, 2009-07-01 at 18:21 +0200, Jan Friesse wrote:
> Included patch should fix
> https://bugzilla.redhat.com/show_bug.cgi?id=506255 .
> 
> David, I hope it will fix problem for you.
> 
> It's based on simple idea of adding node startup timestamp at the end of
> cpg_join (and joinlist) calls. If timestamp is larger then old timestamp
> we know, node was restarted and we didn't notices -> deliver leave event
> and then join event. If timestamp is same (or in special cases lower) ->
> new cpg app joined -> send only join event.
> 
> Of course, patch isn't so simple. Cpg_join messages are always send as
> larger messages with timestamp (btw. timestamp is 64-bit value, because
> I expect l(o^64)ng life of corosync ;) ). On delivery, we test, if
> message is larger then standard message. If it is -> we have ts -> use it.
> 
> Bigger problem was joinlist, because it's array, ... you will see in
> source. Solution is, to send special entry, with pid 0 (shouldn't ever
> happened to process, to have pid 0), and timestamp encoded in name
> (ugly, but looks like working).
> 
> Please comment, if you can.
> 
> Regards,
>   Honza
> _______________________________________________
> Openais mailing list
> Openais@lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/openais
Index: test/stress_cpgjoinleave.c
===================================================================
--- test/stress_cpgjoinleave.c	(revision 0)
+++ test/stress_cpgjoinleave.c	(revision 0)
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2009 Red Hat, Inc.
+ *
+ * All rights reserved.
+ *
+ * Author: Steven Dake (sd...@redhat.com)
+ *
+ * This software licensed under BSD license, the text of which follows:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the MontaVista Software, Inc. nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <errno.h>
+#include <string.h>
+#include <corosync/corotypes.h>
+#include <corosync/cpg.h>
+#include "../exec/crypto.h"
+
+struct my_msg {
+	unsigned int msg_size;
+	unsigned char sha1[20];
+	unsigned char buffer[0];
+};
+
+static void cpg_deliver_fn (
+        cpg_handle_t handle,
+        const struct cpg_name *group_name,
+        uint32_t nodeid,
+        uint32_t pid,
+        void *m,
+        size_t msg_len)
+{
+}
+
+static void cpg_confchg_fn (
+        cpg_handle_t handle,
+        const struct cpg_name *group_name,
+        const struct cpg_address *member_list, size_t member_list_entries,
+        const struct cpg_address *left_list, size_t left_list_entries,
+        const struct cpg_address *joined_list, size_t joined_list_entries)
+{
+}
+
+static cpg_callbacks_t callbacks = {
+	cpg_deliver_fn,
+	cpg_confchg_fn
+};
+
+static struct cpg_name group_name = {
+        .value = "cpg_joinleave",
+        .length = 6
+};
+
+
+#define INSTANCES 100
+int main (void)
+{
+	cpg_handle_t handle[INSTANCES];
+	cs_error_t res;
+	unsigned int i = 0;
+
+	for (i = 0; i < INSTANCES; i++) {
+		res = cpg_initialize (&handle[i], &callbacks);
+		if (res != CS_OK) {
+			printf ("FAIL %d\n", res);
+			exit (-1);
+		}
+	}
+
+	for (i = 0; i < INSTANCES; i++) {
+		res = cpg_join (handle[i], &group_name);
+		if (res != CS_OK) {
+			printf ("FAIL %d\n", res);
+			exit (-1);
+		}
+	}
+	for (i = 0; i < INSTANCES; i++) {
+printf  ("leave %llx\n", handle[i]);
+		res = cpg_leave (handle[i], &group_name);
+		if (res != CS_OK) {
+			printf ("FAIL %d\n", res);
+			exit (-1);
+		}
+	}
+
+	for (i = 0; i < INSTANCES; i++) {
+		res = cpg_dispatch (handle[i], CS_DISPATCH_ALL);
+	}
+
+	for (i = 0; i < INSTANCES; i++) {
+		cpg_finalize (handle[i]);
+	}
+
+	return (0);
+}
Index: test/Makefile.am
===================================================================
--- test/Makefile.am	(revision 2359)
+++ test/Makefile.am	(working copy)
@@ -36,7 +36,7 @@
 noinst_PROGRAMS		= testevs evsbench evsverify cpgverify testcpg testcpg2 cpgbench testconfdb	\
 			logsysbench logsysrec testquorum testvotequorum1 testvotequorum2	\
 			logsys_s logsys_t1 logsys_t2 testcpgzc cpgbenchzc testzcgc \
-			stress_cpgzc stress_cpgfdget stress_cpgcontext
+			stress_cpgzc stress_cpgfdget stress_cpgcontext stress_cpgjoinleave
 
 testevs_LDADD		= -levs -lcoroipcc
 testevs_LDFLAGS		= -L../lib
@@ -54,6 +54,8 @@
 stress_cpgfdget_LDFLAGS	= -L../lib
 stress_cpgcontext_LDADD	= -lcpg
 stress_cpgcontext_LDFLAGS	= -L../lib
+stress_cpgjoinleave_LDADD = -lcpg
+stress_cpgjoinleave_LDFLAGS	= -L../lib
 testconfdb_LDADD	= -lconfdb ../lcr/liblcr.a -lcoroipcc
 testconfdb_LDFLAGS	= -L../lib
 testquorum_LDADD	= -lquorum -lcoroipcc
_______________________________________________
Openais mailing list
Openais@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/openais

Reply via email to