On Tue, Nov 09, 2010 at 11:18:43AM -0700, Steven Dake wrote:
> How well tested is this patch?  Changing around shutdown and signals
> - spent months getting this to work properly, don't want
> regressions.
Not extensively, but you can see my changes to the test service engine.
(I return -1 four times before returning 0).

It would help having some logical test assertions.

Angus
> 
> Regards
> -steve
> 
> On 11/08/2010 03:58 PM, Angus Salkeld wrote:
> >Signed-off-by: Angus Salkeld<[email protected]>
> >---
> >  cts/agents/syncv2.c |   13 +++++++
> >  exec/main.c         |   96 
> > ++++++++++++---------------------------------------
> >  exec/service.c      |   79 ++++++++++++++++++++++++------------------
> >  3 files changed, 80 insertions(+), 108 deletions(-)
> >
> >diff --git a/cts/agents/syncv2.c b/cts/agents/syncv2.c
> >index 354fab4..a98c539 100644
> >--- a/cts/agents/syncv2.c
> >+++ b/cts/agents/syncv2.c
> >@@ -91,6 +91,8 @@ static void tst_sv2_sync_activate (void);
> >
> >  static void tst_sv2_sync_abort (void);
> >
> >+static int tst_sv2_exec_exit_fn (void);
> >+
> >  struct corosync_service_engine tst_sv2_service_engine = {
> >     .name                   = "corosync test synv2 service",
> >     .id                     = TST_SV2_SERVICE,
> >@@ -105,6 +107,7 @@ struct corosync_service_engine tst_sv2_service_engine = {
> >     .exec_engine_count      = 0,
> >     .confchg_fn             = tst_sv2_confchg_fn,
> >     .exec_init_fn           = tst_sv2_exec_init_fn,
> >+    .exec_exit_fn           = tst_sv2_exec_exit_fn,
> >     .exec_dump_fn           = NULL,
> >     .sync_mode              = CS_SYNC_V2,
> >     .sync_init              = tst_sv2_sync_init_v2,
> >@@ -181,6 +184,16 @@ static int tst_sv2_exec_init_fn (
> >     return 0;
> >  }
> >
> >+static int32_t exit_count = 0;
> >+static int tst_sv2_exec_exit_fn (void)
> >+{
> >+    exit_count++;
> >+    log_printf (LOGSYS_LEVEL_INFO, "exit_count:%d", exit_count);
> >+    if (exit_count<  4) {
> >+            return -1;
> >+    }
> >+}
> >+
> >  static void tst_sv2_confchg_fn (
> >     enum totem_configuration_type configuration_type,
> >     const unsigned int *member_list, size_t member_list_entries,
> >diff --git a/exec/main.c b/exec/main.c
> >index 2c1a97d..c768055 100644
> >--- a/exec/main.c
> >+++ b/exec/main.c
> >@@ -131,10 +131,6 @@ static hdb_handle_t object_memb_handle;
> >
> >  static corosync_timer_handle_t corosync_stats_timer_handle;
> >
> >-static pthread_t corosync_exit_thread;
> >-
> >-static sem_t corosync_exit_sem;
> >-
> >  static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid";
> >
> >  static void serialize_unlock (void);
> >@@ -178,61 +174,29 @@ static void unlink_all_completed (void)
> >
> >  void corosync_shutdown_request (void)
> >  {
> >-    static int called = 0;
> >-    if (called) {
> >-            return;
> >-    }
> >-    if (called == 0) {
> >-            called = 1;
> >-    }
> >-
> >-    sem_post (&corosync_exit_sem);
> >-}
> >-
> >-static void *corosync_exit_thread_handler (void *arg)
> >-{
> >-    sem_wait (&corosync_exit_sem);
> >-
> >     corosync_service_unlink_all (api, unlink_all_completed);
> >-
> >-    return arg;
> >  }
> >
> >-static void sigusr2_handler (int num)
> >+static int32_t sig_diag_handler (int num, void *data)
> >  {
> >     corosync_state_dump ();
> >     logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata");
> >+    return 0;
> >  }
> >
> >-static void sigterm_handler (int num)
> >-{
> >-    corosync_shutdown_request ();
> >-}
> >-
> >-static void sigquit_handler (int num)
> >-{
> >-    corosync_shutdown_request ();
> >-}
> >-
> >-static void sigintr_handler (int num)
> >+static int32_t sig_exit_handler (int num, void *data)
> >  {
> >-    corosync_shutdown_request ();
> >-}
> >-
> >-static void sigsegv_handler (int num)
> >-{
> >-    (void)signal (SIGSEGV, SIG_DFL);
> >-    logsys_atexit();
> >-    logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata");
> >-    raise (SIGSEGV);
> >+    corosync_service_unlink_all (api, unlink_all_completed);
> >+    return 0;
> >  }
> >
> >-static void sigabrt_handler (int num)
> >+static int32_t sig_failure_handler (int num, void* data)
> >  {
> >-    (void)signal (SIGABRT, SIG_DFL);
> >+    (void)signal (num, SIG_DFL);
> >     logsys_atexit();
> >     logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata");
> >-    raise (SIGABRT);
> >+    raise (num);
> >+    return 0;
> >  }
> >
> >  #define LOCALHOST_IP inet_addr("127.0.0.1")
> >@@ -1239,13 +1203,20 @@ int main (int argc, char **argv, char **envp)
> >     log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine ('%s'): 
> > started and ready to provide service.\n", VERSION);
> >     log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" 
> > PACKAGE_FEATURES "\n");
> >
> >+    corosync_poll_handle = qb_loop_create ();
> >
> >-    (void)signal (SIGINT, sigintr_handler);
> >-    (void)signal (SIGUSR2, sigusr2_handler);
> >-    (void)signal (SIGSEGV, sigsegv_handler);
> >-    (void)signal (SIGABRT, sigabrt_handler);
> >-    (void)signal (SIGQUIT, sigquit_handler);
> >-    (void)signal (SIGTERM, sigterm_handler);
> >+    qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
> >+            SIGSEGV, NULL, sig_failure_handler, NULL);
> >+    qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
> >+            SIGABRT, NULL, sig_failure_handler, NULL);
> >+    qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW,
> >+            SIGUSR2, NULL, sig_diag_handler, NULL);
> >+    qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
> >+            SIGINT, NULL, sig_exit_handler, NULL);
> >+    qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
> >+            SIGQUIT, NULL, sig_exit_handler, NULL);
> >+    qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH,
> >+            SIGTERM, NULL, sig_exit_handler, NULL);
> >  #if MSG_NOSIGNAL != 0
> >     (void)signal (SIGPIPE, SIG_IGN);
> >  #endif
> >@@ -1419,29 +1390,6 @@ int main (int argc, char **argv, char **envp)
> >     corosync_poll_handle = qb_loop_create ();
> >
> >     /*
> >-     * Sleep for a while to let other nodes in the cluster
> >-     * understand that this node has been away (if it was
> >-     * an corosync restart).
> >-     */
> >-
> >-// TODO what is this hack for?      usleep(totem_config.token_timeout * 
> >2000);
> >-
> >-    /*
> >-     * Create semaphore and start "exit" thread
> >-     */
> >-    res = sem_init (&corosync_exit_sem, 0, 0);
> >-    if (res != 0) {
> >-            log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't 
> >create exit thread.\n");
> >-            corosync_exit_error (AIS_DONE_FATAL_ERR);
> >-    }
> >-
> >-    res = pthread_create (&corosync_exit_thread, NULL, 
> >corosync_exit_thread_handler, NULL);
> >-    if (res != 0) {
> >-            log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't 
> >create exit thread.\n");
> >-            corosync_exit_error (AIS_DONE_FATAL_ERR);
> >-    }
> >-
> >-    /*
> >      * if totempg_initialize doesn't have root priveleges, it cannot
> >      * bind to a specific interface.  This only matters if
> >      * there is more then one interface in a system, so
> >diff --git a/exec/service.c b/exec/service.c
> >index 908527c..3b31bad 100644
> >--- a/exec/service.c
> >+++ b/exec/service.c
> >@@ -55,6 +55,7 @@
> >  #include "service.h"
> >
> >  #include<qb/qbipcs.h>
> >+#include<qb/qbloop.h>
> >
> >  LOGSYS_DECLARE_SUBSYS ("SERV");
> >
> >@@ -123,9 +124,6 @@ static hdb_handle_t object_stats_services_handle;
> >
> >  static void (*service_unlink_all_complete) (void) = NULL;
> >
> >-static hdb_handle_t swrk_service_exit_handle;
> >-static hdb_handle_t swrk_service_unlink_handle;
> >-
> >  static unsigned int default_services_requested (struct corosync_api_v1 
> > *corosync_api)
> >  {
> >     hdb_handle_t object_service_handle;
> >@@ -572,17 +570,17 @@ unsigned int corosync_service_defaults_link_and_init 
> >(struct corosync_api_v1 *co
> >   * Declaration of exit_schedwrk_handler, because of cycle
> >   * (service_exit_schedwrk_handler calls service_unlink_schedwrk_handler, 
> > and vice-versa)
> >   */
> >-static int service_exit_schedwrk_handler (const void *data);
> >+static void service_exit_schedwrk_handler (void *data);
> >
> >-static int service_unlink_schedwrk_handler (const void *data) {
> >+static void service_unlink_schedwrk_handler (void *data) {
> >     struct seus_handler_data *cb_data = (struct seus_handler_data *)data;
> >-    struct corosync_api_v1 *api = (struct corosync_api_v1 *)cb_data->api;
> >
> >     /*
> >      * Exit all ipc connections dependent on this service
> >      */
> >-    if (cs_ipcs_service_destroy (cb_data->service_engine) == -1)
> >-            return -1;
> >+    if (cs_ipcs_service_destroy (cb_data->service_engine) == -1) {
> >+            goto redo_this_function;
> >+    }
> >
> >     log_printf(LOGSYS_LEVEL_NOTICE,
> >             "Service engine unloaded: %s\n",
> >@@ -592,15 +590,22 @@ static int service_unlink_schedwrk_handler (const void 
> >*data) {
> >
> >     lcr_ifact_release (cb_data->service_handle);
> >
> >-    api->schedwrk_create (
> >-            &swrk_service_exit_handle,
> >-            &service_exit_schedwrk_handler,
> >-            data);
> >+    qb_loop_job_add(corosync_poll_handle_get(),
> >+            QB_LOOP_LOW,
> >+            data,
> >+            service_exit_schedwrk_handler);
> >+
> >+    return;
> >+
> >+ redo_this_function:
> >+    qb_loop_job_add(corosync_poll_handle_get(),
> >+            QB_LOOP_LOW,
> >+            data,
> >+            service_unlink_schedwrk_handler);
> >
> >-    return 0;
> >  }
> >
> >-static int service_exit_schedwrk_handler (const void *data) {
> >+static void service_exit_schedwrk_handler (void *data) {
> >     int res;
> >     static int current_priority = 0;
> >     static int current_service_engine = 0;
> >@@ -624,24 +629,26 @@ static int service_exit_schedwrk_handler (const void 
> >*data) {
> >             &service_handle);
> >     if (res == 0) {
> >             service_unlink_all_complete();
> >-            return (res);
> >+            return;
> >     }
> >
> >     if (res == 1) {
> >             cb_data->service_engine = current_service_engine;
> >             cb_data->service_handle = service_handle;
> >
> >-            api->schedwrk_create_nolock (
> >-                    &swrk_service_unlink_handle,
> >-                    &service_unlink_schedwrk_handler,
> >-                    data);
> >-
> >-            return (0);
> >+            qb_loop_job_add(corosync_poll_handle_get(),
> >+                    QB_LOOP_LOW,
> >+                    data,
> >+                    service_unlink_schedwrk_handler);
> >+            return;
> >     }
> >
> >-    return (res);
> >+    qb_loop_job_add(corosync_poll_handle_get(),
> >+            QB_LOOP_LOW,
> >+            data,
> >+            service_exit_schedwrk_handler);
> >  }
> >-            
> >+
> >  void corosync_service_unlink_all (
> >     struct corosync_api_v1 *api,
> >     void (*unlink_all_complete) (void))
> >@@ -662,10 +669,10 @@ void corosync_service_unlink_all (
> >
> >     cb_data.api = api;
> >
> >-    api->schedwrk_create (
> >-            &swrk_service_exit_handle,
> >-            &service_exit_schedwrk_handler,
> >-            &cb_data);
> >+    qb_loop_job_add(corosync_poll_handle_get(),
> >+            QB_LOOP_LOW,
> >+            &cb_data,
> >+            service_exit_schedwrk_handler);
> >  }
> >
> >  struct service_unlink_and_exit_data {
> >@@ -675,7 +682,7 @@ struct service_unlink_and_exit_data {
> >     unsigned int ver;
> >  };
> >
> >-static int service_unlink_and_exit_schedwrk_handler (void *data)
> >+static void service_unlink_and_exit_schedwrk_handler (void *data)
> >  {
> >     struct service_unlink_and_exit_data *service_unlink_and_exit_data =
> >             data;
> >@@ -688,8 +695,12 @@ static int service_unlink_and_exit_schedwrk_handler 
> >(void *data)
> >
> >     if (res == 0) {
> >             free (service_unlink_and_exit_data);
> >+    } else {
> >+            qb_loop_job_add(corosync_poll_handle_get(),
> >+                    QB_LOOP_LOW,
> >+                    data,
> >+                    service_unlink_and_exit_schedwrk_handler);
> >     }
> >-    return (res);
> >  }
> >
> >  typedef int (*schedwrk_cast) (const void *);
> >@@ -706,10 +717,10 @@ unsigned int corosync_service_unlink_and_exit (
> >     service_unlink_and_exit_data->api = api;
> >     service_unlink_and_exit_data->name = strdup (service_name);
> >     service_unlink_and_exit_data->ver = service_ver;
> >-    
> >-    api->schedwrk_create (
> >-            &service_unlink_and_exit_data->handle,
> >-            (schedwrk_cast)service_unlink_and_exit_schedwrk_handler,
> >-            service_unlink_and_exit_data);
> >+
> >+    qb_loop_job_add(corosync_poll_handle_get(),
> >+            QB_LOOP_LOW,
> >+            service_unlink_and_exit_data,
> >+            service_unlink_and_exit_schedwrk_handler);
> >     return (0);
> >  }
_______________________________________________
Openais mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/openais

Reply via email to