please merge only the source code implementation

the man pages need some editing.  I'll merge those later today.

Regards
-steve

On Tue, 2009-12-01 at 14:40 +0100, Jan Friesse wrote:
> Attached are:
> - Fixed SAM
> - Fixed man pages
> 
> Regards,
>   Honza
> 
> Steven Dake wrote:
> > Honza,
> > 
> > Please merge this patch into your patch.  It allows make distcheck to
> > work properly.
> > 
> > Regards
> > -steve
> > 
> > On Sat, 2009-11-28 at 14:24 -0700, Steven Dake wrote:
> >> Honza,
> >>
> >> Few comments on code:
> >>
> >> please use poll instead of select
> >>
> >> poll/select syscalls can return -1 errno = EINTR which should be handled
> >> by a goto to retry the poll operation.
> >>
> >> read/write syscalls can return EAGAIN/EINTR in which case they should be
> >> retried
> >>
> >> changed "readed" to "bytes_read"
> >>
> >> the return error for waitpid is not checked
> >>
> >> instead of return (unused_param) use return (NULL)
> >>
> >> change reincarnation_id to "generation_id"
> >>
> >> Other then those nitpicks, looks like a really clever design, simple to
> >> use, and effective at solving the problem outlined.
> >>
> >> I'll edit the lib documentation and send you a patch.  Once that is done
> >> and the above is resolved we can take another stab at review/merge.
> >>
> >> Great work
> >>
> >> Regards
> >> -steve
> >>
> >> On Fri, 2009-11-20 at 12:34 +0100, Jan Friesse wrote:
> >>> Attached is first public SAM version for review. If you are unsure, what 
> >>> SAM is, please look to corosync roadmap 
> >>> ftp://ftp%40corosync%2Eorg:[email protected]/presentations/corosync-roadmap.pdf
> >>>
> >>> What really needs to be done is documentation (next week action), but 
> >>> code and API should be stable.
> >>>
> >>> I hope you will enjoy it.
> >>>
> >>> Regards,
> >>>    Honza
> >>> _______________________________________________
> >>> Openais mailing list
> >>> [email protected]
> >>> https://lists.linux-foundation.org/mailman/listinfo/openais
> >> _______________________________________________
> >> Openais mailing list
> >> [email protected]
> >> https://lists.linux-foundation.org/mailman/listinfo/openais
> 
> plain text document attachment (sam-2009-12-01.patch)
> diff --git a/trunk/include/Makefile.am b/trunk/include/Makefile.am
> index bafe545..3f15cf5 100644
> --- a/trunk/include/Makefile.am
> +++ b/trunk/include/Makefile.am
> @@ -32,7 +32,8 @@
>  MAINTAINERCLEANFILES    = Makefile.in corosync/config.h.in
>  
>  CS_H                 = hdb.h cs_config.h cpg.h cfg.h evs.h mar_gen.h swab.h  
> \
> -                     coroipcc.h coroipcs.h coroipc_types.h corodefs.h 
> confdb.h list.h corotypes.h quorum.h votequorum.h
> +                     coroipcc.h coroipcs.h coroipc_types.h corodefs.h \
> +                     confdb.h list.h corotypes.h quorum.h votequorum.h sam.h
>  
>  CS_INTERNAL_H                = ipc_cfg.h ipc_confdb.h ipc_cpg.h ipc_evs.h 
> ipc_pload.h ipc_quorum.h   \
>                       jhash.h pload.h cs_queue.h quorum.h sq.h 
> ipc_votequorum.h coroipc_ipc.h
> diff --git a/trunk/include/corosync/sam.h b/trunk/include/corosync/sam.h
> new file mode 100644
> index 0000000..50d7d8b
> --- /dev/null
> +++ b/trunk/include/corosync/sam.h
> @@ -0,0 +1,155 @@
> +/*
> + * Copyright (c) 2009 Red Hat, Inc.
> + *
> + * All rights reserved.
> + *
> + * Author: Jan Friesse ([email protected])
> + *
> + * This software licensed under BSD license, the text of which follows:
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions are 
> met:
> + *
> + * - Redistributions of source code must retain the above copyright notice,
> + *   this list of conditions and the following disclaimer.
> + * - Redistributions in binary form must reproduce the above copyright 
> notice,
> + *   this list of conditions and the following disclaimer in the 
> documentation
> + *   and/or other materials provided with the distribution.
> + * - Neither the name of the Red Hat, Inc. nor the names of its
> + *   contributors may be used to endorse or promote products derived from 
> this
> + *   software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
> IS"
> + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
> + * THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +#ifndef COROSYNC_SAM_H_DEFINED
> +#define COROSYNC_SAM_H_DEFINED
> +
> +#include <corosync/corotypes.h>
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +typedef enum {
> +     SAM_RECOVERY_POLICY_QUIT = 1,
> +     SAM_RECOVERY_POLICY_RESTART = 2,
> +} sam_recovery_policy_t;
> +
> +/*
> + * Callback definition for event driven checking
> + */
> +typedef int (*sam_hc_callback_t)(void);
> +
> +/*
> + * Create a new SAM connection. This function must be called before any 
> other.
> + * It is recommended to call it as one of first in application.
> + *
> + * @param time_interval Time interval in miliseconds of healthcheck. After 
> this time, application
> + * will be killed and recovery policy will be taken. This can be zero, which 
> means,
> + * that there is no time limit (only fall of application is checked and only 
> then
> + * recovery action is taken)
> + * @param recovery_policy One of SAM_RECOVERY_POLICY_RESTART, which means, 
> that after
> + * timeout application will be killed and new instance will be started.
> + * SAM_RECOVERY_POLICY_QUIT will just stop application
> + * @return
> + * - CS_OK in case no problem appeared
> + * - CS_ERR_BAD_HANDLE in case user is trying to initialize initialized 
> instance
> + * - CS_ERR_INVALID_PARAM in case recovery_policy had bad value
> + */
> +cs_error_t sam_initialize (
> +        int time_interval,
> +        sam_recovery_policy_t recovery_policy);
> +
> +/*
> + * Close the SAM handle. This function should be called as late as possible
> + * (in reality, if you plan just quit, and checking is stopped, there is no 
> need
> + * to call it). Function will stop healtchecking and put library to state, 
> where
> + * no new start is possible.
> + *
> + * @return
> + * - CS_OK in case no problem appeared
> + * - CS_ERR_BAD_HANDLE library was not initialized by #sam_initialize
> + */
> +cs_error_t sam_finalize (void);
> +
> +/*
> + * Start healthchecking. From this time, you should call every time_interval
> + * sam_hc_send, otherwise, recovery action will be taken.
> + * @return
> + * - CS_OK in case no problem appeared
> + * - CS_ERR_BAD_HANDLE component was not registered by #sam_register
> + */
> +cs_error_t sam_start (void);
> +
> +/*
> + * Stop healthchecking. Oposite of #sam_start. You can call sam_start and
> + * sam_stop how many times you want.
> + *
> + * @return
> + * - CS_OK in case no problem appeared
> + * - CS_ERR_BAD_HANDLE healthchecking is not in running state (no sam_start
> + *   was called)
> + */
> +cs_error_t sam_stop (void);
> +
> +/*
> + * Register application. This is one of most crucial function. In case, your
> + * application will be restarted, you will always return to point after 
> calling
> + * this function. This function can be called only once, and SAM must be 
> initialized
> + * by sam_initialize. You can choose any place in your application, where to 
> call
> + * this function.
> + *
> + * @param instance_id NULL or pointer to int memory, where current instance
> + * of application will be returned. It's always safe to suppose, that first 
> instance
> + * (this means, no recovery action was taken yet) will be always 1 and 
> instance_id
> + * will be raising up to MAX_INT (after this, it will fall to 0).
> + * @return
> + * - CS_OK in case no problem appeared
> + * - CS_ERR_BAD_HANDLE in case, you call this function twice, or before 
> sam_init
> + * - CS_ERR_LIBRARY internal library call failed. This can be one of pipe or 
> fork
> + *   creation. You can get more information from errno
> + */
> +cs_error_t sam_register (
> +     unsigned int *instance_id);
> +
> +/*
> + * Send healthcheck confirmation. This should be called after #sam_start
> + *
> + * - CS_OK in case no problem appeared
> + * - CS_ERR_BAD_HANDLE healthchecking is not in running state (no sam_start 
> was
> + *   called, or called after sam_stop/sam_finalize)
> + */
> +cs_error_t sam_hc_send (void);
> +
> +/*
> + * Register healtcheck callback. After you will call this function, and set
> + * cb to something else then NULL, SAM is automatically switched from
> + * application driven healtchecking to event driven healtchecking. In other
> + * words, is not longer needed to call sam_hc_send, but your callback 
> function
> + * must return 0 in case of healtchecking is correct, or value different then
> + * 0, in case something happend. After next hc iteration, warning signal and
> + * after that kill signal is sent back to your application.
> + * @param cb Pointer to healtcheck function, or NULL to switch back to 
> application driven hc
> + * @return
> + * - CS_OK in case no problem appeared
> + * - CS_ERR_BAD_HANDLE in case, you call this function before sam_init or 
> after sam_start
> + * - CS_ERR_LIBRARY internal library call failed. This can be one of pipe or 
> pthread
> + *   creation.
> + */
> +cs_error_t sam_hc_callback_register (sam_hc_callback_t cb);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* COROSYNC_SAM_H_DEFINED */
> diff --git a/trunk/lib/Makefile.am b/trunk/lib/Makefile.am
> index 164aa07..cc2d642 100644
> --- a/trunk/lib/Makefile.am
> +++ b/trunk/lib/Makefile.am
> @@ -39,7 +39,7 @@ AM_LDFLAGS          = -lpthread
>  INCLUDES             = -I$(top_builddir)/include -I$(top_srcdir)/include
>  
>  lib_LIBRARIES                = libcpg.a libconfdb.a libevs.a libcfg.a 
> libquorum.a \
> -                       libvotequorum.a libpload.a libcoroipcc.a
> +                       libvotequorum.a libpload.a libcoroipcc.a libsam.a
>  SHARED_LIBS          = $(filter-out libcpg.so.$(SONAME), 
> $(lib_LIBRARIES:%.a=%.so.$(SONAME))) libcpg.so.$(CPG_SONAME)
>  SHARED_LIBS_SO               = $(lib_LIBRARIES:%.a=%.so)
>  SHARED_LIBS_SO_TWO   = $(lib_LIBRARIES:%.a=%.so.$(SOMAJOR))
> @@ -52,12 +52,14 @@ libquorum_a_SOURCES       = quorum.c
>  libvotequorum_a_SOURCES      = votequorum.c
>  libconfdb_a_SOURCES  = confdb.c sa-confdb.c
>  libcoroipcc_a_SOURCES        = coroipcc.c
> +libsam_a_SOURCES     = sam.c
>  
>  noinst_HEADERS               = sa-confdb.h util.h \
>                         libcfg.versions libconfdb.versions \
>                         libcoroipcc.versions libcpg.versions \
>                         libevs.versions libpload.versions \
> -                       libquorum.versions libvotequorum.versions
> +                       libquorum.versions libvotequorum.versions \
> +                       libsam.versions
>  
>  ../lcr/lcr_ifact.o:
>       $(MAKE) -C ../lcr lcr_ifact.o
> diff --git a/trunk/lib/libsam.versions b/trunk/lib/libsam.versions
> new file mode 100644
> index 0000000..48fba2c
> --- /dev/null
> +++ b/trunk/lib/libsam.versions
> @@ -0,0 +1,12 @@
> +# Version and symbol export for libsam.so
> +
> +COROSYNC_SAM_1.0 {
> +     global:
> +             sam_initialized;
> +             sam_finalize;
> +             sam_start;
> +             sam_stop;
> +             sam_register;
> +             sam_hc_send;
> +             sam_hc_callback_register;
> +};
> diff --git a/trunk/lib/sam.c b/trunk/lib/sam.c
> new file mode 100644
> index 0000000..2e11eb5
> --- /dev/null
> +++ b/trunk/lib/sam.c
> @@ -0,0 +1,589 @@
> +/*
> + * Copyright (c) 2009 Red Hat, Inc.
> + *
> + * All rights reserved.
> + *
> + * Author: Jan Friesse ([email protected])
> + *
> + * This software licensed under BSD license, the text of which follows:
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions are 
> met:
> + *
> + * - Redistributions of source code must retain the above copyright notice,
> + *   this list of conditions and the following disclaimer.
> + * - Redistributions in binary form must reproduce the above copyright 
> notice,
> + *   this list of conditions and the following disclaimer in the 
> documentation
> + *   and/or other materials provided with the distribution.
> + * - Neither the name of the Red Hat, Inc. nor the names of its
> + *   contributors may be used to endorse or promote products derived from 
> this
> + *   software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
> IS"
> + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
> + * THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +/*
> + * Provides a SAM API
> + */
> +
> +#include <config.h>
> +
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <errno.h>
> +
> +#include <corosync/corotypes.h>
> +#include <corosync/coroipc_types.h>
> +#include <corosync/coroipcc.h>
> +#include <corosync/corodefs.h>
> +#include <corosync/hdb.h>
> +
> +#include <corosync/sam.h>
> +
> +#include "util.h"
> +
> +#include <stdio.h>
> +#include <sys/wait.h>
> +#include <signal.h>
> +
> +enum sam_internal_status_t {
> +     SAM_INTERNAL_STATUS_NOT_INITIALIZED = 0,
> +     SAM_INTERNAL_STATUS_INITIALIZED,
> +     SAM_INTERNAL_STATUS_REGISTERED,
> +     SAM_INTERNAL_STATUS_STARTED,
> +     SAM_INTERNAL_STATUS_FINALIZED
> +};
> +
> +enum sam_command_t {
> +     SAM_COMMAND_START,
> +     SAM_COMMAND_STOP,
> +     SAM_COMMAND_HB
> +};
> +
> +enum sam_parent_action_t {
> +     SAM_PARENT_ACTION_ERROR,
> +     SAM_PARENT_ACTION_RECOVERY,
> +     SAM_PARENT_ACTION_QUIT,
> +     SAM_PARENT_ACTION_CONTINUE
> +};
> +
> +static struct {
> +     int time_interval;
> +     sam_recovery_policy_t recovery_policy;
> +     enum sam_internal_status_t internal_status;
> +     unsigned int instance_id;
> +     int parent_fd;
> +     int term_send;
> +
> +     sam_hc_callback_t hc_callback;
> +     pthread_t cb_thread;
> +     int cb_rpipe_fd, cb_wpipe_fd;
> +     int cb_registered;
> +} sam_internal_data;
> +
> +cs_error_t sam_initialize (
> +     int time_interval,
> +     sam_recovery_policy_t recovery_policy)
> +{
> +     if (sam_internal_data.internal_status != 
> SAM_INTERNAL_STATUS_NOT_INITIALIZED) {
> +             return (CS_ERR_BAD_HANDLE);
> +     }
> +
> +     if (recovery_policy != SAM_RECOVERY_POLICY_QUIT && recovery_policy != 
> SAM_RECOVERY_POLICY_RESTART) {
> +             return (CS_ERR_INVALID_PARAM);
> +     }
> +
> +     sam_internal_data.recovery_policy = recovery_policy;
> +
> +     sam_internal_data.time_interval = time_interval;
> +
> +     sam_internal_data.internal_status = SAM_INTERNAL_STATUS_INITIALIZED;
> +
> +     return (CS_OK);
> +}
> +
> +/*
> + * Wrapper on top of write(2) function. It handles EAGAIN and EINTR states 
> and sends whole buffer if possible.
> + */
> +static size_t sam_safe_write (
> +     int d,
> +     const void *buf,
> +     size_t nbyte)
> +{
> +     ssize_t bytes_write;
> +     ssize_t tmp_bytes_write;
> +
> +     bytes_write = 0;
> +
> +     do {
> +             tmp_bytes_write = write (d, (const char *)buf + bytes_write, 
> nbyte - bytes_write);
> +
> +             if (tmp_bytes_write == -1) {
> +                     if (!(errno == EAGAIN || errno == EINTR))
> +                             return -1;
> +             } else {
> +                     bytes_write += tmp_bytes_write;
> +             }
> +     } while (bytes_write != nbyte);
> +
> +     return bytes_write;
> +}
> +
> +cs_error_t sam_start (void)
> +{
> +     char command;
> +
> +     if (sam_internal_data.internal_status != 
> SAM_INTERNAL_STATUS_REGISTERED) {
> +             return (CS_ERR_BAD_HANDLE);
> +     }
> +
> +     command = SAM_COMMAND_START;
> +
> +     if (sam_safe_write (sam_internal_data.parent_fd, &command, 1) == -1)
> +             return (CS_ERR_LIBRARY);
> +
> +     if (sam_internal_data.hc_callback)
> +             if (sam_safe_write (sam_internal_data.cb_wpipe_fd, &command, 1) 
> == -1)
> +                     return (CS_ERR_LIBRARY);
> +
> +     sam_internal_data.internal_status = SAM_INTERNAL_STATUS_STARTED;
> +
> +     return (CS_OK);
> +}
> +
> +cs_error_t sam_stop (void)
> +{
> +     char command;
> +
> +     if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED) {
> +             return (CS_ERR_BAD_HANDLE);
> +     }
> +
> +     command = SAM_COMMAND_STOP;
> +
> +     if (sam_safe_write (sam_internal_data.parent_fd, &command, 1) == -1)
> +             return (CS_ERR_LIBRARY);
> +
> +     if (sam_internal_data.hc_callback)
> +             if (sam_safe_write (sam_internal_data.cb_wpipe_fd, &command, 1) 
> == -1)
> +                     return (CS_ERR_LIBRARY);
> +
> +     sam_internal_data.internal_status = SAM_INTERNAL_STATUS_REGISTERED;
> +
> +     return (CS_OK);
> +}
> +
> +cs_error_t sam_hc_send (void)
> +{
> +     char command;
> +
> +     if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED) {
> +             return (CS_ERR_BAD_HANDLE);
> +     }
> +
> +     command = SAM_COMMAND_HB;
> +
> +     if (sam_safe_write (sam_internal_data.parent_fd, &command, 1) == -1)
> +             return (CS_ERR_LIBRARY);
> +
> +     return (CS_OK);
> +}
> +
> +cs_error_t sam_finalize (void)
> +{
> +     cs_error_t error;
> +
> +     if (sam_internal_data.internal_status != 
> SAM_INTERNAL_STATUS_INITIALIZED &&
> +             sam_internal_data.internal_status != 
> SAM_INTERNAL_STATUS_REGISTERED &&
> +             sam_internal_data.internal_status != 
> SAM_INTERNAL_STATUS_STARTED) {
> +             return (CS_ERR_BAD_HANDLE);
> +     }
> +
> +     if (sam_internal_data.internal_status == SAM_INTERNAL_STATUS_STARTED) {
> +             error = sam_stop ();
> +             if (error != CS_OK)
> +                     goto exit_error;
> +     }
> +
> +     sam_internal_data.internal_status = SAM_INTERNAL_STATUS_FINALIZED;
> +
> +exit_error:
> +     return (CS_OK);
> +}
> +
> +
> +
> +static enum sam_parent_action_t sam_parent_handler (int pipe_fd, pid_t 
> child_pid)
> +{
> +     int poll_error;
> +     int action;
> +     int status;
> +     ssize_t bytes_read;
> +     char command;
> +     int time_interval;
> +     struct pollfd pfds;
> +
> +     status = 0;
> +
> +     action = SAM_PARENT_ACTION_CONTINUE;
> +
> +     while (action == SAM_PARENT_ACTION_CONTINUE) {
> +             pfds.fd = pipe_fd;
> +             pfds.events = POLLIN;
> +             pfds.revents = 0;
> +
> +             if (status == 1 && sam_internal_data.time_interval != 0) {
> +                     time_interval = sam_internal_data.time_interval;
> +             } else {
> +                     time_interval = INFTIM;
> +             }
> +
> +             poll_error = poll (&pfds, 1, time_interval);
> +
> +             if (poll_error == -1) {
> +                     /*
> +                      *  Error in poll
> +                      *  If it is EINTR, continue, otherwise QUIT
> +                      */
> +                     if (errno != EINTR) {
> +                             action = SAM_PARENT_ACTION_ERROR;
> +                     }
> +             }
> +
> +             if (poll_error == 0) {
> +                     /*
> +                      *  Time limit expires
> +                      */
> +                     if (status == 0) {
> +                             action = SAM_PARENT_ACTION_QUIT;
> +                     } else {
> +                             /*
> +                              *  Kill child process
> +                              */
> +                             if (!sam_internal_data.term_send) {
> +                                     /*
> +                                      * We didn't send SIGTERM (warning) yet.
> +                                      */
> +
> +                                     kill (child_pid, SIGTERM);
> +
> +                                     sam_internal_data.term_send = 1;
> +                             } else {
> +                                     /*
> +                                      * We sent child warning. Now, we will 
> not be so nice
> +                                      */
> +                                     kill (child_pid, SIGKILL);
> +                                     action = SAM_PARENT_ACTION_RECOVERY;
> +                             }
> +                     }
> +             }
> +
> +             if (poll_error > 0) {
> +                     /*
> +                      *  We have EOF or command in pipe
> +                      */
> +                     bytes_read = read (pipe_fd, &command, 1);
> +
> +                     if (bytes_read == 0) {
> +                             /*
> +                              *  Handle EOF -> Take recovery action or quit 
> if sam_start wasn't called
> +                              */
> +                             if (status == 0)
> +                                     action = SAM_PARENT_ACTION_QUIT;
> +                             else
> +                                     action = SAM_PARENT_ACTION_RECOVERY;
> +
> +                             continue;
> +                     }
> +
> +                     if (bytes_read == -1) {
> +                             /*
> +                              * Something really bad happened in read side
> +                              */
> +                             if (errno == EAGAIN || errno == EINTR) {
> +                                     continue;
> +                             }
> +
> +                             action = SAM_PARENT_ACTION_ERROR;
> +                             goto action_exit;
> +                     }
> +
> +                     /*
> +                      * We have read command -> take status
> +                      */
> +                     switch (status) {
> +                     case 0:
> +                             /*
> +                              *  Not started yet
> +                              */
> +                             if (command == SAM_COMMAND_START)
> +                                     status = 1;
> +                     break;
> +
> +                     case 1:
> +                             /*
> +                              *  Started
> +                              */
> +                             if (command == SAM_COMMAND_STOP)
> +                                     status = 0;
> +                     break;
> +
> +                     }
> +             } /* select_error > 0 */
> +     } /* action == SAM_PARENT_ACTION_CONTINUE */
> +
> +action_exit:
> +     return action;
> +}
> +
> +cs_error_t sam_register (
> +     unsigned int *instance_id)
> +{
> +     cs_error_t error;
> +     pid_t pid;
> +     int pipe_error;
> +     int pipe_fd[2];
> +     enum sam_parent_action_t action;
> +     int child_status;
> +
> +     if (sam_internal_data.internal_status != 
> SAM_INTERNAL_STATUS_INITIALIZED) {
> +             return (CS_ERR_BAD_HANDLE);
> +     }
> +
> +     error = CS_OK;
> +
> +     while (1) {
> +             pipe_error = pipe (pipe_fd);
> +
> +             if (pipe_error != 0) {
> +                     /*
> +                      *  Pipe creation error
> +                      */
> +                     error = CS_ERR_LIBRARY;
> +                     goto error_exit;
> +             }
> +
> +             sam_internal_data.instance_id++;
> +
> +             sam_internal_data.term_send = 0;
> +
> +             pid = fork ();
> +
> +             if (pid == -1) {
> +                     /*
> +                      *  Fork error
> +                      */
> +                     sam_internal_data.instance_id--;
> +
> +                     error = CS_ERR_LIBRARY;
> +                     goto error_exit;
> +             }
> +
> +             if (pid == 0) {
> +                     /*
> +                      *  Child process
> +                      */
> +                     close (pipe_fd[0]);
> +
> +                     sam_internal_data.parent_fd = pipe_fd[1];
> +                     if (instance_id)
> +                             *instance_id = sam_internal_data.instance_id;
> +
> +                     sam_internal_data.internal_status = 
> SAM_INTERNAL_STATUS_REGISTERED;
> +
> +                     goto error_exit;
> +             } else {
> +                     /*
> +                      *  Parent process
> +                      */
> +                     close (pipe_fd[1]);
> +
> +                     action = sam_parent_handler (pipe_fd[0], pid);
> +
> +                     close (pipe_fd[0]);
> +
> +                     if (action == SAM_PARENT_ACTION_ERROR) {
> +                             error = CS_ERR_LIBRARY;
> +                             goto error_exit;
> +                     }
> +
> +                     /*
> +                      * We really don't like zombies
> +                      */
> +                     while (waitpid (pid, &child_status, 0) == -1 && errno 
> == EINTR)
> +                             ;
> +
> +                     if (action == SAM_PARENT_ACTION_RECOVERY) {
> +                             if (sam_internal_data.recovery_policy == 
> SAM_RECOVERY_POLICY_QUIT)
> +                                     action = SAM_PARENT_ACTION_QUIT;
> +                     }
> +
> +                     if (action == SAM_PARENT_ACTION_QUIT) {
> +                             exit (WEXITSTATUS (child_status));
> +                     }
> +
> +             }
> +     }
> +
> +error_exit:
> +     return (error);
> +}
> +
> +static void *hc_callback_thread (void *unused_param)
> +{
> +     int poll_error;
> +     int status;
> +     ssize_t bytes_readed;
> +     char command;
> +     int time_interval, tmp_time_interval;
> +     int counter;
> +     struct pollfd pfds;
> +
> +     status = 0;
> +     counter = 0;
> +
> +     time_interval = sam_internal_data.time_interval >> 2;
> +
> +     while (1) {
> +             pfds.fd = sam_internal_data.cb_rpipe_fd;
> +             pfds.events = POLLIN;
> +             pfds.revents = 0;
> +
> +             if (status == 1) {
> +                     tmp_time_interval = time_interval;
> +             } else {
> +                     tmp_time_interval = INFTIM;
> +             }
> +
> +             poll_error = poll (&pfds, 1, tmp_time_interval);
> +
> +             if (poll_error == 0) {
> +                     sam_hc_send ();
> +                     counter++;
> +
> +                     if (counter >= 4) {
> +                             if (sam_internal_data.hc_callback () != 0) {
> +                                     status = 3;
> +                             }
> +
> +                             counter = 0;
> +                     }
> +             }
> +
> +             if (poll_error > 0) {
> +                     bytes_readed = read (sam_internal_data.cb_rpipe_fd, 
> &command, 1);
> +
> +                     if (bytes_readed > 0) {
> +                             if (status == 0 && command == SAM_COMMAND_START)
> +                                     status = 1;
> +
> +                             if (status == 1 && command == SAM_COMMAND_STOP)
> +                                     status = 0;
> +
> +                     }
> +             }
> +     }
> +
> +     /*
> +      * This makes compiler happy, it's same as return (NULL);
> +      */
> +     return (unused_param);
> +}
> +
> +cs_error_t sam_hc_callback_register (sam_hc_callback_t cb)
> +{
> +     cs_error_t error = CS_OK;
> +     pthread_attr_t thread_attr;
> +     int pipe_error;
> +     int pipe_fd[2];
> +
> +     if (sam_internal_data.internal_status != 
> SAM_INTERNAL_STATUS_REGISTERED) {
> +             return (CS_ERR_BAD_HANDLE);
> +     }
> +
> +     if (sam_internal_data.time_interval == 0) {
> +             return (CS_ERR_INVALID_PARAM);
> +     }
> +
> +     if (sam_internal_data.cb_registered) {
> +             sam_internal_data.hc_callback = cb;
> +
> +             return (CS_OK);
> +     }
> +
> +     /*
> +      * We know, this is first registration
> +      */
> +
> +     if (cb == NULL) {
> +             return (CS_ERR_INVALID_PARAM);
> +     }
> +
> +     pipe_error = pipe (pipe_fd);
> +
> +     if (pipe_error != 0) {
> +             /*
> +              *  Pipe creation error
> +              */
> +             error = CS_ERR_LIBRARY;
> +             goto error_exit;
> +     }
> +
> +     sam_internal_data.cb_rpipe_fd = pipe_fd[0];
> +     sam_internal_data.cb_wpipe_fd = pipe_fd[1];
> +
> +     /*
> +      * Create thread attributes
> +      */
> +     error = pthread_attr_init (&thread_attr);
> +     if (error != 0) {
> +             error = CS_ERR_LIBRARY;
> +             goto error_close_fd_exit;
> +     }
> +
> +
> +     pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
> +     pthread_attr_setstacksize (&thread_attr, 32768);
> +
> +     /*
> +      * Create thread
> +      */
> +     error = pthread_create (&sam_internal_data.cb_thread, &thread_attr, 
> hc_callback_thread, NULL);
> +
> +     if (error != 0) {
> +             error = CS_ERR_LIBRARY;
> +             goto error_attr_destroy_exit;
> +     }
> +
> +     /*
> +      * Cleanup
> +      */
> +     pthread_attr_destroy(&thread_attr);
> +
> +     sam_internal_data.cb_registered = 1;
> +     sam_internal_data.hc_callback = cb;
> +
> +     return (CS_OK);
> +
> +error_attr_destroy_exit:
> +     pthread_attr_destroy(&thread_attr);
> +error_close_fd_exit:
> +     sam_internal_data.cb_rpipe_fd = sam_internal_data.cb_wpipe_fd = 0;
> +     close (pipe_fd[0]);
> +     close (pipe_fd[1]);
> +error_exit:
> +     return (error);
> +}
> diff --git a/trunk/test/Makefile.am b/trunk/test/Makefile.am
> index 3789385..2a88908 100644
> --- a/trunk/test/Makefile.am
> +++ b/trunk/test/Makefile.am
> @@ -36,7 +36,7 @@ INCLUDES                    = -I$(top_builddir)/include 
> -I$(top_srcdir)/include
>  noinst_PROGRAMS              = testevs evsbench evsverify cpgverify testcpg 
> testcpg2 cpgbench testconfdb     \
>                       logsysbench logsysrec testquorum testvotequorum1 
> testvotequorum2        \
>                       logsys_s logsys_t1 logsys_t2 testcpgzc cpgbenchzc 
> testzcgc \
> -                     stress_cpgzc stress_cpgfdget stress_cpgcontext cpgbound
> +                     stress_cpgzc stress_cpgfdget stress_cpgcontext cpgbound 
> testsam
>  
>  testevs_LDADD                = -levs -lcoroipcc
>  testevs_LDFLAGS              = -L../lib
> @@ -85,6 +85,8 @@ logsys_t1_LDADD             = -llogsys
>  logsys_t1_LDFLAGS    = -L../exec
>  logsys_t2_LDADD              = -llogsys
>  logsys_t2_LDFLAGS    = -L../exec
> +testsam_LDADD                = -lsam
> +testsam_LDFLAGS              = -L../lib
>  
>  lint:
>       -splint $(LINT_FLAGS) $(CFLAGS) *.c
> diff --git a/trunk/test/testsam.c b/trunk/test/testsam.c
> new file mode 100644
> index 0000000..ae9a4cf
> --- /dev/null
> +++ b/trunk/test/testsam.c
> @@ -0,0 +1,421 @@
> +/*
> + * Copyright (c) 2009 Red Hat, Inc.
> + *
> + * All rights reserved.
> + *
> + * Author: Jan Friesse ([email protected])
> + *
> + * This software licensed under BSD license, the text of which follows:
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions are 
> met:
> + *
> + * - Redistributions of source code must retain the above copyright notice,
> + *   this list of conditions and the following disclaimer.
> + * - Redistributions in binary form must reproduce the above copyright 
> notice,
> + *   this list of conditions and the following disclaimer in the 
> documentation
> + *   and/or other materials provided with the distribution.
> + * - Neither the name of the Red Hat, Inc. nor the names of its
> + *   contributors may be used to endorse or promote products derived from 
> this
> + *   software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
> IS"
> + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
> + * THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +/*
> + * Provides test of SAM API
> + */
> +
> +#include <config.h>
> +
> +#include <sys/types.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <corosync/corotypes.h>
> +#include <corosync/sam.h>
> +#include <signal.h>
> +#include <sys/wait.h>
> +
> +static int test2_sig_delivered = 0;
> +static int test4_hc_cb_count = 0;
> +
> +/*
> + * First test will just register SAM, with policy restart. First instance 
> will
> + * sleep one second, send hc and sleep another 3 seconds. This should force 
> restart.
> + * Second instance will sleep one second, send hc, stop hc and sleep 3 
> seconds.
> + * Then start hc again and sleep 3 seconds. This should force restart again.
> + * Last instance just calls initialize again. This should end with error.
> + * Then call start, followed by stop and start again. Finally, we will call 
> finalize
> + * twice. One should succeed, second should fail. After this, we will call 
> every function
> + * (none should succeed).
> + */
> +static int test1 (void)
> +{
> +     cs_error_t error;
> +     unsigned int instance_id;
> +     int i;
> +
> +     printf ("%s: initialize\n", __FUNCTION__);
> +     error = sam_initialize (2000, SAM_RECOVERY_POLICY_RESTART);
> +     if (error != CS_OK) {
> +             fprintf (stderr, "Can't initialize SAM API. Error %d\n", error);
> +             return 1;
> +     }
> +     printf ("%s: register\n", __FUNCTION__);
> +     error = sam_register (&instance_id);
> +     if (error != CS_OK) {
> +             fprintf (stderr, "Can't register. Error %d\n", error);
> +             return 1;
> +     }
> +
> +     if (instance_id == 1 || instance_id == 2) {
> +             printf ("%s iid %d: start\n", __FUNCTION__, instance_id);
> +             error = sam_start ();
> +             if (error != CS_OK) {
> +                     fprintf (stderr, "Can't start hc. Error %d\n", error);
> +                     return 1;
> +             }
> +
> +             for (i = 0; i < 10; i++) {
> +                     printf ("%s iid %d: sleep 1\n", __FUNCTION__, 
> instance_id);
> +                     sleep (1);
> +
> +                     printf ("%s iid %d: hc send\n", __FUNCTION__, 
> instance_id);
> +                     error = sam_hc_send ();
> +                     if (error != CS_OK) {
> +                             fprintf (stderr, "Can't send hc. Error %d\n", 
> error);
> +                             return 1;
> +                     }
> +             }
> +
> +             if (instance_id == 2) {
> +                     printf ("%s iid %d: stop\n", __FUNCTION__, instance_id);
> +                     error = sam_stop ();
> +
> +                     if (error != CS_OK) {
> +                             fprintf (stderr, "Can't send hc. Error %d\n", 
> error);
> +                             return 1;
> +                     }
> +             }
> +
> +             printf ("%s iid %d: sleep 3\n", __FUNCTION__, instance_id);
> +             sleep (3);
> +
> +             printf ("%s iid %d: start\n", __FUNCTION__, instance_id);
> +             error = sam_start ();
> +             if (error != CS_OK) {
> +                     fprintf (stderr, "Can't start hc. Error %d\n", error);
> +                     return 1;
> +             }
> +
> +             printf ("%s iid %d: sleep 3\n", __FUNCTION__, instance_id);
> +             sleep (3);
> +             return 0;
> +     }
> +
> +     if (instance_id == 3) {
> +             error = sam_initialize (2000, SAM_RECOVERY_POLICY_RESTART);
> +             if (error == CS_OK) {
> +                     fprintf (stderr, "Can initialize SAM API after 
> initialization");
> +                     return 1;
> +             }
> +
> +             error = sam_start ();
> +             if (error != CS_OK) {
> +                     fprintf (stderr, "Can't start hc. Error %d\n", error);
> +                     return 1;
> +             }
> +             error = sam_stop ();
> +             if (error != CS_OK) {
> +                     fprintf (stderr, "Can't stop hc. Error %d\n", error);
> +                     return 1;
> +             }
> +             error = sam_finalize ();
> +             if (error != CS_OK) {
> +                     fprintf (stderr, "Can't finalize sam. Error %d\n", 
> error);
> +                     return 1;
> +             }
> +             error = sam_finalize ();
> +             if (error == CS_OK) {
> +                     fprintf (stderr, "Can finalize sam after 
> finalization!\n");
> +                     return 1;
> +             }
> +
> +             if (sam_initialize (2, SAM_RECOVERY_POLICY_RESTART) == CS_OK ||
> +                     sam_start () == CS_OK || sam_stop () == CS_OK ||
> +                     sam_register (NULL) == CS_OK || sam_hc_send () == CS_OK 
> ||
> +                     sam_hc_callback_register (NULL) == CS_OK) {
> +
> +                     fprintf (stderr, "Can call one of function after 
> finalization!\n");
> +
> +                     return 1;
> +             }
> +
> +             return 0;
> +     }
> +
> +     return 1;
> +}
> +
> +
> +static void test2_signal (int sig) {
> +     printf ("%s\n", __FUNCTION__);
> +
> +     test2_sig_delivered = 1;
> +}
> +
> +/*
> + * This tests recovery policy quit and callback.
> + */
> +static int test2 (void) {
> +     cs_error_t error;
> +     unsigned int instance_id;
> +
> +     printf ("%s: initialize\n", __FUNCTION__);
> +     error = sam_initialize (2000, SAM_RECOVERY_POLICY_QUIT);
> +     if (error != CS_OK) {
> +             fprintf (stderr, "Can't initialize SAM API. Error %d\n", error);
> +             return 1;
> +     }
> +     printf ("%s: register\n", __FUNCTION__);
> +     error = sam_register (&instance_id);
> +     if (error != CS_OK) {
> +             fprintf (stderr, "Can't register. Error %d\n", error);
> +             return 1;
> +     }
> +
> +     if (instance_id == 1) {
> +             signal (SIGTERM, test2_signal);
> +
> +             printf ("%s iid %d: start\n", __FUNCTION__, instance_id);
> +             error = sam_start ();
> +             if (error != CS_OK) {
> +                     fprintf (stderr, "Can't start hc. Error %d\n", error);
> +                     return 1;
> +             }
> +
> +             printf ("%s iid %d: sleep 1\n", __FUNCTION__, instance_id);
> +             sleep (1);
> +
> +             printf ("%s iid %d: hc send\n", __FUNCTION__, instance_id);
> +             error = sam_hc_send ();
> +             if (error != CS_OK) {
> +                     fprintf (stderr, "Can't send hc. Error %d\n", error);
> +                     return 1;
> +             }
> +
> +
> +             printf ("%s iid %d: wait for delivery of signal\n", 
> __FUNCTION__, instance_id);
> +             while (!test2_sig_delivered) {
> +                     sleep (1);
> +             }
> +
> +             printf ("%s iid %d: wait for real kill\n", __FUNCTION__, 
> instance_id);
> +
> +             sleep (3);
> +     }
> +
> +     return 1;
> +
> +}
> +
> +/*
> + * Smoke test. Better to turn off coredump ;) This has no time limit, just 
> restart process
> + * when it dies.
> + */
> +static int test3 (void) {
> +     cs_error_t error;
> +     unsigned int instance_id;
> +     int tmp1, tmp2, tmp3;
> +
> +     printf ("%s: initialize\n", __FUNCTION__);
> +     error = sam_initialize (0, SAM_RECOVERY_POLICY_RESTART);
> +     if (error != CS_OK) {
> +             fprintf (stderr, "Can't initialize SAM API. Error %d\n", error);
> +             return 1;
> +     }
> +     printf ("%s: register\n", __FUNCTION__);
> +     error = sam_register (&instance_id);
> +     if (error != CS_OK) {
> +             fprintf (stderr, "Can't register. Error %d\n", error);
> +             return 1;
> +     }
> +
> +     if (instance_id < 100) {
> +             printf ("%s iid %d: start\n", __FUNCTION__, instance_id);
> +             error = sam_start ();
> +             if (error != CS_OK) {
> +                     fprintf (stderr, "Can't start hc. Error %d\n", error);
> +                     return 1;
> +             }
> +
> +             printf ("%s iid %d: divide by zero\n", __FUNCTION__, 
> instance_id);
> +             tmp2 = rand ();
> +             tmp3 = 0;
> +             tmp1 = tmp2 / tmp3;
> +
> +             return 1;
> +     }
> +
> +     return 0;
> +
> +}
> +
> +static int test4_hc_cb (void)
> +{
> +     printf ("%s %d\n", __FUNCTION__, ++test4_hc_cb_count);
> +
> +     if (test4_hc_cb_count > 10)
> +             return 1;
> +
> +     return 0;
> +}
> +/*
> + * Test event driven healtchecking.
> + */
> +static int test4 (void)
> +{
> +     cs_error_t error;
> +     unsigned int instance_id;
> +
> +     printf ("%s: initialize\n", __FUNCTION__);
> +     error = sam_initialize (100, SAM_RECOVERY_POLICY_RESTART);
> +     if (error != CS_OK) {
> +             fprintf (stderr, "Can't initialize SAM API. Error %d\n", error);
> +             return 1;
> +     }
> +     printf ("%s: register\n", __FUNCTION__);
> +     error = sam_register (&instance_id);
> +     if (error != CS_OK) {
> +             fprintf (stderr, "Can't register. Error %d\n", error);
> +             return 1;
> +     }
> +
> +     if (instance_id == 1) {
> +             printf ("%s iid %d: hc callback register\n", __FUNCTION__, 
> instance_id);
> +             error = sam_hc_callback_register (test4_hc_cb);
> +             if (error != CS_OK) {
> +                     fprintf (stderr, "Can't register hc cb. Error %d\n", 
> error);
> +                     return 1;
> +             }
> +
> +
> +             printf ("%s iid %d: start\n", __FUNCTION__, instance_id);
> +             error = sam_start ();
> +             if (error != CS_OK) {
> +                     fprintf (stderr, "Can't start hc. Error %d\n", error);
> +                     return 1;
> +             }
> +
> +             sleep (2);
> +
> +             printf ("%s iid %d: Failed. Wasn't killed.\n", __FUNCTION__, 
> instance_id);
> +             return 1;
> +     }
> +
> +     if (instance_id == 2) {
> +             return 0;
> +     }
> +
> +     return 1;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +     pid_t pid;
> +     int err;
> +     int stat;
> +     int all_passed = 1;
> +
> +     pid = fork ();
> +
> +     if (pid == -1) {
> +             fprintf (stderr, "Can't fork\n");
> +             return 1;
> +     }
> +
> +     if (pid == 0) {
> +             err = test1 ();
> +
> +             fprintf (stderr, "test1 %s\n", (err == 0 ? "passed" : 
> "failed"));
> +             if (err != 0)
> +                     all_passed = 0;
> +
> +             return err;
> +     }
> +
> +     waitpid (pid, NULL, 0);
> +
> +
> +     pid = fork ();
> +
> +     if (pid == -1) {
> +             fprintf (stderr, "Can't fork\n");
> +             return 1;
> +     }
> +
> +     if (pid == 0) {
> +             err = test2 ();
> +
> +             return err;
> +     }
> +
> +     waitpid (pid, &stat, 0);
> +
> +     fprintf (stderr, "test2 %s\n", (WEXITSTATUS (stat) == 0 ? "passed" : 
> "failed"));
> +     if (WEXITSTATUS (stat) != 0)
> +             all_passed = 0;
> +
> +     pid = fork ();
> +
> +     if (pid == -1) {
> +             fprintf (stderr, "Can't fork\n");
> +             return 1;
> +     }
> +
> +     if (pid == 0) {
> +             err = test3 ();
> +
> +             fprintf (stderr, "test3 %s\n", (err == 0 ? "passed" : 
> "failed"));
> +             if (err != 0)
> +                     all_passed = 0;
> +             return err;
> +     }
> +
> +     waitpid (pid, NULL, 0);
> +
> +     pid = fork ();
> +
> +     if (pid == -1) {
> +             fprintf (stderr, "Can't fork\n");
> +             return 1;
> +     }
> +
> +     if (pid == 0) {
> +             err = test4 ();
> +
> +             fprintf (stderr, "test4 %s\n", (err == 0 ? "passed" : 
> "failed"));
> +             if (err != 0)
> +                     all_passed = 0;
> +             return err;
> +     }
> +
> +     waitpid (pid, NULL, 0);
> +
> +     if (all_passed)
> +             fprintf (stderr, "All tests passed\n");
> +
> +     return (all_passed ? 0 : 1);
> +}

_______________________________________________
Openais mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/openais

Reply via email to