On Mon, Feb 8, 2010 at 4:11 PM, Jan Friesse <[email protected]> wrote:
> Attached is fixed version of fixed fix, and it looks like this one
> really fix all problems.
>
> Andrew, can you please run your tests with this patch (patch is against
> today trunk)?

ACK.

If there are any remaining issues, my tests can't find them.
I say good for merge.

>
> Regards,
>  Honza
>
> diff --git a/trunk/exec/coroipcs.c b/trunk/exec/coroipcs.c
> index 0476202..efa0db8 100644
> --- a/trunk/exec/coroipcs.c
> +++ b/trunk/exec/coroipcs.c
> @@ -98,6 +98,8 @@ static struct coroipcs_init_state_v2 *api = NULL;
>
>  DECLARE_LIST_INIT (conn_info_list_head);
>
> +DECLARE_LIST_INIT (conn_info_exit_list_head);
> +
>  struct outq_item {
>        void *msg;
>        size_t mlen;
> @@ -464,6 +466,7 @@ static inline int conn_info_destroy (struct conn_info 
> *conn_info)
>
>        list_del (&conn_info->list);
>        list_init (&conn_info->list);
> +       list_add (&conn_info->list, &conn_info_exit_list_head);
>
>        if (conn_info->state == CONN_STATE_THREAD_REQUEST_EXIT) {
>                res = pthread_join (conn_info->thread, &retval);
> @@ -1081,6 +1084,50 @@ void coroipcs_ipc_exit (void)
>        }
>  }
>
> +int coroipcs_ipc_service_exit (unsigned int service)
> +{
> +       struct list_head *list, *list_next;
> +       struct conn_info *conn_info;
> +
> +       for (list = conn_info_list_head.next; list != &conn_info_list_head;
> +               list = list_next) {
> +
> +               list_next = list->next;
> +
> +               conn_info = list_entry (list, struct conn_info, list);
> +
> +               if (conn_info->service != service ||
> +                   (conn_info->state != CONN_STATE_THREAD_ACTIVE && 
> conn_info->state != CONN_STATE_THREAD_REQUEST_EXIT)) {
> +                       continue;
> +               }
> +
> +               ipc_disconnect (conn_info);
> +               api->poll_dispatch_destroy (conn_info->fd, NULL);
> +               while (conn_info_destroy (conn_info) != -1)
> +                       ;
> +
> +               /*
> +                * We will return to prevent token loss. Schedwrk will call 
> us again.
> +                */
> +               return (-1);
> +       }
> +
> +       /*
> +        * No conn info left in active list. We will traverse thru exit list. 
> If there is any
> +        * conn_info->service == service, we will wait to proper end -> 
> return -1
> +        */
> +
> +       for (list = conn_info_exit_list_head.next; list != 
> &conn_info_exit_list_head; list = list->next) {
> +               conn_info = list_entry (list, struct conn_info, list);
> +
> +               if (conn_info->service == service) {
> +                       return (-1);
> +               }
> +       }
> +
> +       return (0);
> +}
> +
>  /*
>  * Get the conn info private data
>  */
> diff --git a/trunk/exec/main.c b/trunk/exec/main.c
> index beedc2c..34ceb97 100644
> --- a/trunk/exec/main.c
> +++ b/trunk/exec/main.c
> @@ -155,7 +155,6 @@ void corosync_state_dump (void)
>  static void unlink_all_completed (void)
>  {
>        poll_stop (corosync_poll_handle);
> -       coroipcs_ipc_exit ();
>        totempg_finalize ();
>
>        corosync_exit_error (AIS_DONE_EXIT);
> @@ -855,7 +854,7 @@ static int corosync_security_valid (int euid, int egid)
>
>  static int corosync_service_available (unsigned int service)
>  {
> -       return (ais_service[service] != NULL);
> +       return (ais_service[service] != NULL && 
> !ais_service_exiting[service]);
>  }
>
>  struct sending_allowed_private_data_struct {
> @@ -961,6 +960,12 @@ static void corosync_poll_dispatch_modify (
>                corosync_poll_handler_dispatch);
>  }
>
> +static void corosync_poll_dispatch_destroy (
> +    int fd,
> +    void *context)
> +{
> +       poll_dispatch_delete (corosync_poll_handle, fd);
> +}
>
>  static hdb_handle_t corosync_stats_create_connection (const char* name,
>                        const pid_t pid, const int fd)
> @@ -1098,6 +1103,7 @@ static struct coroipcs_init_state_v2 ipc_init_state_v2 
> = {
>        .poll_accept_add                = corosync_poll_accept_add,
>        .poll_dispatch_add              = corosync_poll_dispatch_add,
>        .poll_dispatch_modify           = corosync_poll_dispatch_modify,
> +       .poll_dispatch_destroy          = corosync_poll_dispatch_destroy,
>        .init_fn_get                    = corosync_init_fn_get,
>        .exit_fn_get                    = corosync_exit_fn_get,
>        .handler_fn_get                 = corosync_handler_fn_get,
> diff --git a/trunk/exec/service.c b/trunk/exec/service.c
> index d99e7bf..6c549fd 100644
> --- a/trunk/exec/service.c
> +++ b/trunk/exec/service.c
> @@ -55,6 +55,8 @@
>  #include <corosync/engine/coroapi.h>
>  #include "service.h"
>
> +#include <corosync/coroipcs.h>
> +
>  LOGSYS_DECLARE_SUBSYS ("SERV");
>
>  struct default_service {
> @@ -89,18 +91,29 @@ static struct default_service default_services[] = {
>        }
>  };
>
> +/*
> + * service exit and unlink schedwrk handler data structure
> + */
> +struct seus_handler_data {
> +       hdb_handle_t service_handle;
> +       int service_engine;
> +       struct corosync_api_v1 *api;
> +};
> +
>  struct corosync_service_engine *ais_service[SERVICE_HANDLER_MAXIMUM_COUNT];
>
>  hdb_handle_t service_stats_handle[SERVICE_HANDLER_MAXIMUM_COUNT][64];
>
> +int ais_service_exiting[SERVICE_HANDLER_MAXIMUM_COUNT];
> +
>  static hdb_handle_t object_internal_configuration_handle;
>
>  static hdb_handle_t object_stats_services_handle;
>
>  static void (*service_unlink_all_complete) (void) = NULL;
>
> -static hdb_handle_t unlink_all_handle;
> -
> +static hdb_handle_t swrk_service_exit_handle;
> +static hdb_handle_t swrk_service_unlink_handle;
>
>  static unsigned int default_services_requested (struct corosync_api_v1 
> *corosync_api)
>  {
> @@ -272,7 +285,8 @@ corosync_service_unlink_priority (
>        struct corosync_api_v1 *corosync_api,
>        int lowest_priority,
>        int *current_priority,
> -       int *current_service_engine)
> +       int *current_service_engine,
> +       hdb_handle_t *current_service_handle)
>  {
>        unsigned short *service_id;
>        hdb_handle_t object_service_handle;
> @@ -319,11 +333,6 @@ corosync_service_unlink_priority (
>                                                        return (-1);
>                                                }
>                                        }
> -                                       log_printf(LOGSYS_LEVEL_NOTICE,
> -                                               "Service engine unloaded: 
> %s\n",
> -                                                  
> ais_service[*current_service_engine]->name);
> -
> -                                       ais_service[*current_service_engine] 
> = NULL;
>
>                                        res = corosync_api->object_key_get (
>                                                object_service_handle,
> @@ -331,17 +340,26 @@ corosync_service_unlink_priority (
>                                                (void *)&found_service_handle,
>                                                NULL);
>
> -                                       lcr_ifact_release 
> (*found_service_handle);
> +                                       *current_service_handle = 
> *found_service_handle;
> +
> +                                       
> ais_service_exiting[*current_service_engine] = 1;
>
> -                                       corosync_api->object_destroy 
> (object_service_handle);
> -                                       break;
> +                                       corosync_api->object_find_destroy 
> (object_find_handle);
> +
> +                                       /*
> +                                        * Call should call this function 
> again
> +                                        */
> +                                       return (1);
>                                }
>                        }
>
>                        corosync_api->object_find_destroy (object_find_handle);
>                }
>        }
> -       return 0;
> +       /*
> +        * We finish unlink of all services -> no need to call this function 
> again
> +        */
> +       return (0);
>  }
>
>  static unsigned int service_unlink_and_exit (
> @@ -538,12 +556,46 @@ unsigned int corosync_service_defaults_link_and_init 
> (struct corosync_api_v1 *co
>        return (0);
>  }
>
> -static int unlink_all_schedwrk_handler (const void *data) {
> +/*
> + * Declaration of exit_schedwrk_handler, because of cycle
> + * (service_exit_schedwrk_handler calls service_unlink_schedwrk_handler, and 
> vice-versa)
> + */
> +static int service_exit_schedwrk_handler (const void *data);
> +
> +static int service_unlink_schedwrk_handler (const void *data) {
> +       struct seus_handler_data *cb_data = (struct seus_handler_data *)data;
> +       struct corosync_api_v1 *api = (struct corosync_api_v1 *)cb_data->api;
> +
> +       /*
> +        * Exit all ipc connections dependent on this service
> +        */
> +       if (coroipcs_ipc_service_exit (cb_data->service_engine) == -1)
> +               return -1;
> +
> +       log_printf(LOGSYS_LEVEL_NOTICE,
> +               "Service engine unloaded: %s\n",
> +               ais_service[cb_data->service_engine]->name);
> +
> +       ais_service[cb_data->service_engine] = NULL;
> +
> +       lcr_ifact_release (cb_data->service_handle);
> +
> +       api->schedwrk_create (
> +               &swrk_service_exit_handle,
> +               &service_exit_schedwrk_handler,
> +               data);
> +
> +       return 0;
> +}
> +
> +static int service_exit_schedwrk_handler (const void *data) {
>        int res;
>        static int current_priority = 0;
>        static int current_service_engine = 0;
>        static int called = 0;
> -       struct corosync_api_v1 *api = (struct corosync_api_v1 *)data;
> +       struct seus_handler_data *cb_data = (struct seus_handler_data *)data;
> +       struct corosync_api_v1 *api = (struct corosync_api_v1 *)cb_data->api;
> +       hdb_handle_t service_handle;
>
>        if (called == 0) {
>                log_printf(LOGSYS_LEVEL_NOTICE,
> @@ -556,10 +608,25 @@ static int unlink_all_schedwrk_handler (const void 
> *data) {
>                api,
>                0,
>                &current_priority,
> -               &current_service_engine);
> +               &current_service_engine,
> +               &service_handle);
>        if (res == 0) {
>                service_unlink_all_complete();
> +               return (res);
> +       }
> +
> +       if (res == 1) {
> +               cb_data->service_engine = current_service_engine;
> +               cb_data->service_handle = service_handle;
> +
> +               api->schedwrk_create_nolock (
> +                       &swrk_service_unlink_handle,
> +                       &service_unlink_schedwrk_handler,
> +                       data);
> +
> +               return (0);
>        }
> +
>        return (res);
>  }
>
> @@ -568,6 +635,7 @@ void corosync_service_unlink_all (
>        void (*unlink_all_complete) (void))
>  {
>        static int called = 0;
> +       static struct seus_handler_data cb_data;
>
>        assert (api);
>
> @@ -579,11 +647,13 @@ void corosync_service_unlink_all (
>        if (called == 0) {
>                called = 1;
>        }
> -
> +
> +       cb_data.api = api;
> +
>        api->schedwrk_create (
> -               &unlink_all_handle,
> -               &unlink_all_schedwrk_handler,
> -               api);
> +               &swrk_service_exit_handle,
> +               &service_exit_schedwrk_handler,
> +               &cb_data);
>  }
>
>  struct service_unlink_and_exit_data {
> diff --git a/trunk/exec/service.h b/trunk/exec/service.h
> index efb3e44..2cc3399 100644
> --- a/trunk/exec/service.h
> +++ b/trunk/exec/service.h
> @@ -58,8 +58,8 @@ extern unsigned int corosync_service_unlink_and_exit (
>  * Unlink and exit all corosync services
>  */
>  extern void corosync_service_unlink_all (
> -        struct corosync_api_v1 *api,
> -        void (*unlink_all_complete) (void));
> +       struct corosync_api_v1 *api,
> +       void (*unlink_all_complete) (void));
>
>  /*
>  * Load all of the default services
> @@ -69,6 +69,8 @@ extern unsigned int corosync_service_defaults_link_and_init 
> (
>
>  extern struct corosync_service_engine *ais_service[];
>
> +extern int ais_service_exiting[];
> +
>  extern hdb_handle_t service_stats_handle[SERVICE_HANDLER_MAXIMUM_COUNT][64];
>
>  #endif /* SERVICE_H_DEFINED */
> diff --git a/trunk/include/corosync/coroipcs.h 
> b/trunk/include/corosync/coroipcs.h
> index 3838af8..f08f0e5 100644
> --- a/trunk/include/corosync/coroipcs.h
> +++ b/trunk/include/corosync/coroipcs.h
> @@ -152,6 +152,8 @@ extern void coroipcs_refcount_dec (void *conn);
>
>  extern void coroipcs_ipc_exit (void);
>
> +extern int coroipcs_ipc_service_exit (unsigned int service);
> +
>  extern int coroipcs_handler_accept (
>        int fd,
>        int revent,
>
>
_______________________________________________
Openais mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/openais

Reply via email to