Dear Gentle Reader....

Attached is a small test program to stress initializing and finalizing
communication between a corosync cpg client and the corosync daemon.
The test was run under version 1.2.4.  Initial testing was with a
single node, subsequent testing occurred on a system consisting of 3
nodes.

1) If the program is run in such a way that it loops on the
initialize/mcast_joined/dispatch/finalize AND the corosync daemon is
restarted while the program is looping (service corosync restart) then
the application locks up in the corosync client library in a variety
of interesting locations.  This is easiest to reproduce in a single
node system with a large iteration count and a usleep value between
joins.  'stress_finalize -t 500 -i 10000 -u 1000 -v'  Sometimes it
recovers in a few seconds (analysis of strace indicated
futex(...FUTEX_WAIT, 0, {1, 997888000}) ... which would account for
multiple 2 second delays in error recovery from a lost corosync
daemon).  Sometimes it locks up solid!   What is the proper way of
handling the loss of the corosync daemon?  Is it possible to have the
cpg library have a fast error recovery in the case of a failed daemon?

sample back trace of lockup:
#0  0x000000363c60c711 in sem_wait () from /lib64/libpthread.so.0
#1  0x0000003000002a34 in coroipcc_msg_send_reply_receive (
    handle=<value optimized out>, iov=<value optimized out>, iov_len=1,
    res_msg=0x7fffaefecac0, res_len=24) at coroipcc.c:465
#2  0x0000003000802db1 in cpg_leave (handle=1648075416440668160,
    group=<value optimized out>) at cpg.c:458
#3  0x0000000000400df8 in coInit (handle=0x7fffaefecdb0,
    groupNameStr=0x7fffaefeccb0 "./stress_finalize_groupName-0", ctx=0x6e1)
    at stress_finalize.c:101
#4  0x000000000040138a in main (argc=8, argv=0x7fffaefecf28)
    at stress_finalize.c:243

2) If the test program is run with an iteration count of greater than
about 10, group joins for the specified group name tends to start
failing (CS_ERR_TRY_AGAIN) but never recovers (trying again doesn't
help :).  This test was run on a single node of a 3 node system (but
may be reproduce similar problems on a smaller number of nodes).
' ./stress_finalize -i 10 -j 1 junk'

3) An unrelated observation is that if the corosync daemon is setup on
two nodes that are participate in multicast through a tunnel, the
corosync daemon runs in a tight loop at very high priority level
effectively halting the machine.  Is this because the basic daemon
communication relies on message reflection of the underlying transport
which would occur on an ethernet multicast but would not on a tunnel?

An example setup for an ip tunnel might be something along the following lines:
modprobe ip_grep up
echo 1 > /proc/sys/net/ipv4/ip_forward
ip tunnel add gre1 mode gre remote 10.x.y.z local 20.z.y.x ttl 127
ip addr add 192.168.100.33/24 peer 192.168.100.11/24 dev gre1
ip link set gre1 up multicast on

Thank you for taking the time to consider these tests.  Perhaps future
versions of the software package could include a similar set of tests
illustrating proper behavior?

dan
/*
 *  @file stress_finalize.c
 */

#include <stdio.h>
#include <unistd.h>
#include <sys/time.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>

#include <corosync/corotypes.h>
#include <corosync/cpg.h>

#define MAX_ITERATIONS ((unsigned long)10000)
#define MAX_BUF ((size_t)32)
#define MAX_USEC ((useconds_t)100)
#define NULL_HANDLE ((cpg_handle_t)0)
static int verbose = 1; /**< noisy print of progress to stdout */
static suseconds_t maxUsecTolerance = 500; /**< maximum tolerance in usecs for library call */

/* DeliverCallback()
 * @brief stub out message delivery callback function
 */
static void 
DeliverCallback (
	cpg_handle_t handle,
	const struct cpg_name *groupName,
	uint32_t nodeid,
	uint32_t pid,
	void *msg,
	size_t msg_len)
{
} 
/* ConfigchgCallback() 
 * @brief stub out configuration change callback
 */
static void 
ConfchgCallback (
	cpg_handle_t handle,
	const struct cpg_name *groupName,
	const struct cpg_address *member_list, size_t member_list_entries,
	const struct cpg_address *left_list, size_t left_list_entries,
	const struct cpg_address *joined_list, size_t joined_list_entries)
{
}
/* @struct callback
 * @brief CPG callback for change in group and message delivery
 */
static cpg_callbacks_t callbacks = {
	.cpg_deliver_fn =            DeliverCallback,
	.cpg_confchg_fn =            ConfchgCallback,
};

static void
deltatime(struct timeval *tvbeg, const char *msg)
{
	struct timeval tvend;	/**< time end */
	struct timeval tvdelta;	/**< time delta */
	int report;

	gettimeofday(&tvend, NULL);
	timersub(&tvend, tvbeg, &tvdelta);
	
	report = 0;
	if(tvdelta.tv_sec < 0 || tvdelta.tv_usec < 0) {
		printf("%s negative delta! %lu.%lu\n", msg, tvdelta.tv_sec, tvdelta.tv_usec);
	}
	if(tvdelta.tv_sec > 0) {
		report = 1;
	}
	if(tvdelta.tv_sec == 0 && tvdelta.tv_usec > maxUsecTolerance) {
		report = 1;
	}
	if(report) {
		printf("%s out of tolerance %lu.%06lu\n", msg, tvdelta.tv_sec, tvdelta.tv_usec);
	}
}

static cs_error_t 
coInit(cpg_handle_t *handle, char* groupNameStr, void *ctx)
{
	cs_error_t result;	/**< result code returned from cpg library calls */
    struct cpg_name groupName;	/**< group name/length structure */
	struct timeval tvbeg;	/**< timing begin */
	uint32_t nodeid;	/**< nodeid defined by library */
    int cofd; /**< file descriptor in use by library for interprocess comms */
	int cnt;	/**< count of trys to join group */
	int maxCnt;	/**< maximum number of try to join */

	if(groupNameStr == NULL) {
		printf("error - null group name not allowed\n");
		_exit(3);
	}

	strncpy(groupName.value, groupNameStr, sizeof(groupName.value));
	groupName.length = strlen(groupNameStr)+1;

	if(NULL_HANDLE != *handle) {
		gettimeofday(&tvbeg, NULL);
		result = cpg_leave(*handle, &groupName);
		deltatime(&tvbeg, "cpg_leave");
		if(CS_OK != result) {
			printf("error %s cpg leave %d\n", groupNameStr, result);
		}
		gettimeofday(&tvbeg, NULL);
		result = cpg_finalize (*handle);
		deltatime(&tvbeg, "cpg_finalize");
		if(CS_OK != result) {
			printf("error %s finalize failed %d\n", groupNameStr, result);
		}
		*handle = NULL_HANDLE;
	}
	gettimeofday(&tvbeg, NULL);
	result = cpg_initialize (handle, &callbacks);
	deltatime(&tvbeg, "cpg_initialize");
	if (CS_OK != result) {
		printf ("error %s Could not initialize Cluster Process Group API instance %d\n", groupNameStr, result);
		return result;
	}
	gettimeofday(&tvbeg, NULL);
	cnt = 0;
	maxCnt = 1;
	do { 
		result = cpg_join(*handle, &groupName);
		deltatime(&tvbeg, "cpg_join");
		if (CS_OK != result) {
			printf ("warning %s Could not join process group %d\n", groupNameStr ,result);
			if(maxCnt > 1)
				usleep(100000);
		}
	} while(result == CS_ERR_TRY_AGAIN && cnt++ < maxCnt);
	/* propose enhancement of a context per group! */
	gettimeofday(&tvbeg, NULL);
	result = cpg_context_set(*handle, ctx);
	deltatime(&tvbeg, "context_set");
	if (CS_OK != result) {
		printf ("error %s Could not set cpg context %d\n", groupNameStr, result);
		return result;
	}
	gettimeofday(&tvbeg, NULL);
	result = cpg_fd_get(*handle, &cofd);
	deltatime(&tvbeg, "cpg_fd_get");
	if (CS_OK != result) {
		printf ("error %s Could not get cpg fd %d\n", groupNameStr, result);
		return result;
	}
	gettimeofday(&tvbeg, NULL);
	result = cpg_local_get (*handle, &nodeid);
	deltatime(&tvbeg, "cpg_local_get");
	if (CS_OK != result) {
		printf ("error %s Could not get local node id %d\n", groupNameStr, result);
		return result;
	}
	if(verbose) printf ("%s fd %d nodeid %x\n", groupNameStr, cofd, nodeid);
	return CS_OK;
}

/* usage()
 * @brief print usage to stdout
 * @param[in] name of the executable program
 */
static void
usage(const char *pgm)
{
	printf("%s [groupname]\n", pgm);
	printf(" [-i maxIterations]		maximum iterations\n");
	printf(" [-j handleCnt]		maximum handles for process\n");
	printf(" [-t maxUsecTolerance]	maximum reporting micro seconds\n");
	printf(" [-u asleep]	microseconds to usleep between iterations\n");
    printf("[-v]	verbose output\n");
	_exit(0);
}

/* main()
 * @brief test main for stress test and timing of cpg_finalize
 * @param[in] argc count of arguments
 * @param[in] argv array of arguments, followed by optional group name
 * @return 0 => success
 */
#define MAX_HANDLE (10)
int 
main (int argc, char *argv[]) 
{
	cpg_handle_t handle[MAX_HANDLE];	/**< opaque handle to corosync communications */
	char groupNameBaseStr[CPG_MAX_NAME_LENGTH]; /**< arbitrary group name */
	char groupNameStr[CPG_MAX_NAME_LENGTH]; /**< arbitrary group name */
	cs_error_t result;	/**< return value of library calls */
	struct timeval tvbeg;	/**< timing begin */
	unsigned long i;	/**< iteration counter (simple context tracker) */
	unsigned long maxIterations;	/**< maximum iterations in loop */
	struct iovec iov;	/**< output vector */
	size_t iovLen;	/**< length of vector */
	char buf[MAX_BUF];	/**< buffer for a message */
	int opt; /**< option currently parsed */
	const char *options = "i:j:t:u:v"; /**< char listing of options */
	useconds_t asleep;	/**< microseconds to sleep between iterations */
	int handleCnt;	/**< maximum count of handles */
	int j;	/**< iteration of different handles to corosync */
	int prevfd[MAX_HANDLE]; /**< previus fd for index */
    int cofd; /**< file descriptor in use by library for interprocess comms */

	maxIterations = MAX_ITERATIONS;
	asleep = (useconds_t)MAX_USEC;
	handleCnt = 2;
	while ( (opt = getopt(argc, argv, options)) != -1 ) {
		switch (opt) {
		case 'j':
			handleCnt = strtoul(optarg, NULL, 0);
			break;
		case 'i':
			maxIterations = strtoul(optarg, NULL, 0);
			break;
		case 't':
			maxUsecTolerance = strtoul(optarg, NULL, 0);
			break;
		case 'u':
			asleep = strtoul(optarg, NULL, 0);
			break;
		case 'v':
			verbose = (verbose+1)%2;
			break;
		default:
			usage(argv[0]);
			break;
		}
	}

	if (argc > optind) {
		snprintf(groupNameBaseStr, sizeof(groupNameBaseStr), "%s", argv[optind]);
	} else {
		snprintf(groupNameBaseStr, sizeof(groupNameBaseStr), "%s_groupName", argv[0]);
	}

	for(j = 0; j < handleCnt; j++) {
		handle[j] = NULL_HANDLE;
		prevfd[j] = -1;
	}
	gettimeofday(&tvbeg, NULL);
	for(i = 0; i < maxIterations; i++) {
		for(j = 0; j < handleCnt; j++) {
			snprintf(groupNameStr, sizeof(groupNameStr), "%s-%d", groupNameBaseStr, j);
			result = coInit(&handle[j], groupNameStr, (void *)i);
			if(CS_OK != result) {
				printf("%s failed to init iteration %lu/%lu\n", groupNameStr, i, maxIterations);
				continue;
			}
			snprintf(buf, sizeof(buf), "msg %d %lu", j, i);
			iov.iov_base = buf;
			iov.iov_len = sizeof(buf);
			iovLen = sizeof(iov)/sizeof(struct iovec);
			result = cpg_mcast_joined(handle[j], CPG_TYPE_AGREED, &iov, iovLen);
			if(CS_OK != result) {
				printf("%s cpg mcast joined %d", groupNameStr, result);
				continue;
			}
			/* send a message */
			result = cpg_dispatch(handle[j], CS_DISPATCH_ALL);
			if(CS_OK != result) {
				printf("cpg dispatch %d", result);
			}
			result = cpg_fd_get(handle[j], &cofd);
			if (CS_OK != result) {
				printf ("error %s Could not get cpg fd %d\n", groupNameStr, result);
				continue;
			}
			if(prevfd[j] != -1 && cofd != prevfd[j]) {
				printf("failed %s leaking fd now %d previously %d\n", groupNameStr, cofd, prevfd[j]);
			}
			prevfd[j] = cofd;
		}
		if((i % 10) == 0) {
			if(asleep > 0) {
				usleep(asleep);
			}
		}
	}
	deltatime(&tvbeg, "Init collection");

	for(j = 0; j < handleCnt; j++) {
		result = cpg_finalize (handle[j]);
		if(CS_OK != result) {
			printf("finalize failed %d\n", result);
			continue;
		}
	}
	return (0);
}
_______________________________________________
Openais mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/openais

Reply via email to