On Tue, Nov 09, 2010 at 11:18:43AM -0700, Steven Dake wrote: > How well tested is this patch? Changing around shutdown and signals > - spent months getting this to work properly, don't want > regressions. Not extensively, but you can see my changes to the test service engine. (I return -1 four times before returning 0).
It would help having some logical test assertions. Angus > > Regards > -steve > > On 11/08/2010 03:58 PM, Angus Salkeld wrote: > >Signed-off-by: Angus Salkeld<[email protected]> > >--- > > cts/agents/syncv2.c | 13 +++++++ > > exec/main.c | 96 > > ++++++++++++--------------------------------------- > > exec/service.c | 79 ++++++++++++++++++++++++------------------ > > 3 files changed, 80 insertions(+), 108 deletions(-) > > > >diff --git a/cts/agents/syncv2.c b/cts/agents/syncv2.c > >index 354fab4..a98c539 100644 > >--- a/cts/agents/syncv2.c > >+++ b/cts/agents/syncv2.c > >@@ -91,6 +91,8 @@ static void tst_sv2_sync_activate (void); > > > > static void tst_sv2_sync_abort (void); > > > >+static int tst_sv2_exec_exit_fn (void); > >+ > > struct corosync_service_engine tst_sv2_service_engine = { > > .name = "corosync test synv2 service", > > .id = TST_SV2_SERVICE, > >@@ -105,6 +107,7 @@ struct corosync_service_engine tst_sv2_service_engine = { > > .exec_engine_count = 0, > > .confchg_fn = tst_sv2_confchg_fn, > > .exec_init_fn = tst_sv2_exec_init_fn, > >+ .exec_exit_fn = tst_sv2_exec_exit_fn, > > .exec_dump_fn = NULL, > > .sync_mode = CS_SYNC_V2, > > .sync_init = tst_sv2_sync_init_v2, > >@@ -181,6 +184,16 @@ static int tst_sv2_exec_init_fn ( > > return 0; > > } > > > >+static int32_t exit_count = 0; > >+static int tst_sv2_exec_exit_fn (void) > >+{ > >+ exit_count++; > >+ log_printf (LOGSYS_LEVEL_INFO, "exit_count:%d", exit_count); > >+ if (exit_count< 4) { > >+ return -1; > >+ } > >+} > >+ > > static void tst_sv2_confchg_fn ( > > enum totem_configuration_type configuration_type, > > const unsigned int *member_list, size_t member_list_entries, > >diff --git a/exec/main.c b/exec/main.c > >index 2c1a97d..c768055 100644 > >--- a/exec/main.c > >+++ b/exec/main.c > >@@ -131,10 +131,6 @@ static hdb_handle_t object_memb_handle; > > > > static corosync_timer_handle_t corosync_stats_timer_handle; > > > >-static pthread_t corosync_exit_thread; > >- > >-static sem_t corosync_exit_sem; > >- > > static const char *corosync_lock_file = LOCALSTATEDIR"/run/corosync.pid"; > > > > static void serialize_unlock (void); > >@@ -178,61 +174,29 @@ static void unlink_all_completed (void) > > > > void corosync_shutdown_request (void) > > { > >- static int called = 0; > >- if (called) { > >- return; > >- } > >- if (called == 0) { > >- called = 1; > >- } > >- > >- sem_post (&corosync_exit_sem); > >-} > >- > >-static void *corosync_exit_thread_handler (void *arg) > >-{ > >- sem_wait (&corosync_exit_sem); > >- > > corosync_service_unlink_all (api, unlink_all_completed); > >- > >- return arg; > > } > > > >-static void sigusr2_handler (int num) > >+static int32_t sig_diag_handler (int num, void *data) > > { > > corosync_state_dump (); > > logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata"); > >+ return 0; > > } > > > >-static void sigterm_handler (int num) > >-{ > >- corosync_shutdown_request (); > >-} > >- > >-static void sigquit_handler (int num) > >-{ > >- corosync_shutdown_request (); > >-} > >- > >-static void sigintr_handler (int num) > >+static int32_t sig_exit_handler (int num, void *data) > > { > >- corosync_shutdown_request (); > >-} > >- > >-static void sigsegv_handler (int num) > >-{ > >- (void)signal (SIGSEGV, SIG_DFL); > >- logsys_atexit(); > >- logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata"); > >- raise (SIGSEGV); > >+ corosync_service_unlink_all (api, unlink_all_completed); > >+ return 0; > > } > > > >-static void sigabrt_handler (int num) > >+static int32_t sig_failure_handler (int num, void* data) > > { > >- (void)signal (SIGABRT, SIG_DFL); > >+ (void)signal (num, SIG_DFL); > > logsys_atexit(); > > logsys_log_rec_store (LOCALSTATEDIR "/lib/corosync/fdata"); > >- raise (SIGABRT); > >+ raise (num); > >+ return 0; > > } > > > > #define LOCALHOST_IP inet_addr("127.0.0.1") > >@@ -1239,13 +1203,20 @@ int main (int argc, char **argv, char **envp) > > log_printf (LOGSYS_LEVEL_NOTICE, "Corosync Cluster Engine ('%s'): > > started and ready to provide service.\n", VERSION); > > log_printf (LOGSYS_LEVEL_INFO, "Corosync built-in features:" > > PACKAGE_FEATURES "\n"); > > > >+ corosync_poll_handle = qb_loop_create (); > > > >- (void)signal (SIGINT, sigintr_handler); > >- (void)signal (SIGUSR2, sigusr2_handler); > >- (void)signal (SIGSEGV, sigsegv_handler); > >- (void)signal (SIGABRT, sigabrt_handler); > >- (void)signal (SIGQUIT, sigquit_handler); > >- (void)signal (SIGTERM, sigterm_handler); > >+ qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, > >+ SIGSEGV, NULL, sig_failure_handler, NULL); > >+ qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, > >+ SIGABRT, NULL, sig_failure_handler, NULL); > >+ qb_loop_signal_add(corosync_poll_handle, QB_LOOP_LOW, > >+ SIGUSR2, NULL, sig_diag_handler, NULL); > >+ qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, > >+ SIGINT, NULL, sig_exit_handler, NULL); > >+ qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, > >+ SIGQUIT, NULL, sig_exit_handler, NULL); > >+ qb_loop_signal_add(corosync_poll_handle, QB_LOOP_HIGH, > >+ SIGTERM, NULL, sig_exit_handler, NULL); > > #if MSG_NOSIGNAL != 0 > > (void)signal (SIGPIPE, SIG_IGN); > > #endif > >@@ -1419,29 +1390,6 @@ int main (int argc, char **argv, char **envp) > > corosync_poll_handle = qb_loop_create (); > > > > /* > >- * Sleep for a while to let other nodes in the cluster > >- * understand that this node has been away (if it was > >- * an corosync restart). > >- */ > >- > >-// TODO what is this hack for? usleep(totem_config.token_timeout * > >2000); > >- > >- /* > >- * Create semaphore and start "exit" thread > >- */ > >- res = sem_init (&corosync_exit_sem, 0, 0); > >- if (res != 0) { > >- log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't > >create exit thread.\n"); > >- corosync_exit_error (AIS_DONE_FATAL_ERR); > >- } > >- > >- res = pthread_create (&corosync_exit_thread, NULL, > >corosync_exit_thread_handler, NULL); > >- if (res != 0) { > >- log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't > >create exit thread.\n"); > >- corosync_exit_error (AIS_DONE_FATAL_ERR); > >- } > >- > >- /* > > * if totempg_initialize doesn't have root priveleges, it cannot > > * bind to a specific interface. This only matters if > > * there is more then one interface in a system, so > >diff --git a/exec/service.c b/exec/service.c > >index 908527c..3b31bad 100644 > >--- a/exec/service.c > >+++ b/exec/service.c > >@@ -55,6 +55,7 @@ > > #include "service.h" > > > > #include<qb/qbipcs.h> > >+#include<qb/qbloop.h> > > > > LOGSYS_DECLARE_SUBSYS ("SERV"); > > > >@@ -123,9 +124,6 @@ static hdb_handle_t object_stats_services_handle; > > > > static void (*service_unlink_all_complete) (void) = NULL; > > > >-static hdb_handle_t swrk_service_exit_handle; > >-static hdb_handle_t swrk_service_unlink_handle; > >- > > static unsigned int default_services_requested (struct corosync_api_v1 > > *corosync_api) > > { > > hdb_handle_t object_service_handle; > >@@ -572,17 +570,17 @@ unsigned int corosync_service_defaults_link_and_init > >(struct corosync_api_v1 *co > > * Declaration of exit_schedwrk_handler, because of cycle > > * (service_exit_schedwrk_handler calls service_unlink_schedwrk_handler, > > and vice-versa) > > */ > >-static int service_exit_schedwrk_handler (const void *data); > >+static void service_exit_schedwrk_handler (void *data); > > > >-static int service_unlink_schedwrk_handler (const void *data) { > >+static void service_unlink_schedwrk_handler (void *data) { > > struct seus_handler_data *cb_data = (struct seus_handler_data *)data; > >- struct corosync_api_v1 *api = (struct corosync_api_v1 *)cb_data->api; > > > > /* > > * Exit all ipc connections dependent on this service > > */ > >- if (cs_ipcs_service_destroy (cb_data->service_engine) == -1) > >- return -1; > >+ if (cs_ipcs_service_destroy (cb_data->service_engine) == -1) { > >+ goto redo_this_function; > >+ } > > > > log_printf(LOGSYS_LEVEL_NOTICE, > > "Service engine unloaded: %s\n", > >@@ -592,15 +590,22 @@ static int service_unlink_schedwrk_handler (const void > >*data) { > > > > lcr_ifact_release (cb_data->service_handle); > > > >- api->schedwrk_create ( > >- &swrk_service_exit_handle, > >- &service_exit_schedwrk_handler, > >- data); > >+ qb_loop_job_add(corosync_poll_handle_get(), > >+ QB_LOOP_LOW, > >+ data, > >+ service_exit_schedwrk_handler); > >+ > >+ return; > >+ > >+ redo_this_function: > >+ qb_loop_job_add(corosync_poll_handle_get(), > >+ QB_LOOP_LOW, > >+ data, > >+ service_unlink_schedwrk_handler); > > > >- return 0; > > } > > > >-static int service_exit_schedwrk_handler (const void *data) { > >+static void service_exit_schedwrk_handler (void *data) { > > int res; > > static int current_priority = 0; > > static int current_service_engine = 0; > >@@ -624,24 +629,26 @@ static int service_exit_schedwrk_handler (const void > >*data) { > > &service_handle); > > if (res == 0) { > > service_unlink_all_complete(); > >- return (res); > >+ return; > > } > > > > if (res == 1) { > > cb_data->service_engine = current_service_engine; > > cb_data->service_handle = service_handle; > > > >- api->schedwrk_create_nolock ( > >- &swrk_service_unlink_handle, > >- &service_unlink_schedwrk_handler, > >- data); > >- > >- return (0); > >+ qb_loop_job_add(corosync_poll_handle_get(), > >+ QB_LOOP_LOW, > >+ data, > >+ service_unlink_schedwrk_handler); > >+ return; > > } > > > >- return (res); > >+ qb_loop_job_add(corosync_poll_handle_get(), > >+ QB_LOOP_LOW, > >+ data, > >+ service_exit_schedwrk_handler); > > } > >- > >+ > > void corosync_service_unlink_all ( > > struct corosync_api_v1 *api, > > void (*unlink_all_complete) (void)) > >@@ -662,10 +669,10 @@ void corosync_service_unlink_all ( > > > > cb_data.api = api; > > > >- api->schedwrk_create ( > >- &swrk_service_exit_handle, > >- &service_exit_schedwrk_handler, > >- &cb_data); > >+ qb_loop_job_add(corosync_poll_handle_get(), > >+ QB_LOOP_LOW, > >+ &cb_data, > >+ service_exit_schedwrk_handler); > > } > > > > struct service_unlink_and_exit_data { > >@@ -675,7 +682,7 @@ struct service_unlink_and_exit_data { > > unsigned int ver; > > }; > > > >-static int service_unlink_and_exit_schedwrk_handler (void *data) > >+static void service_unlink_and_exit_schedwrk_handler (void *data) > > { > > struct service_unlink_and_exit_data *service_unlink_and_exit_data = > > data; > >@@ -688,8 +695,12 @@ static int service_unlink_and_exit_schedwrk_handler > >(void *data) > > > > if (res == 0) { > > free (service_unlink_and_exit_data); > >+ } else { > >+ qb_loop_job_add(corosync_poll_handle_get(), > >+ QB_LOOP_LOW, > >+ data, > >+ service_unlink_and_exit_schedwrk_handler); > > } > >- return (res); > > } > > > > typedef int (*schedwrk_cast) (const void *); > >@@ -706,10 +717,10 @@ unsigned int corosync_service_unlink_and_exit ( > > service_unlink_and_exit_data->api = api; > > service_unlink_and_exit_data->name = strdup (service_name); > > service_unlink_and_exit_data->ver = service_ver; > >- > >- api->schedwrk_create ( > >- &service_unlink_and_exit_data->handle, > >- (schedwrk_cast)service_unlink_and_exit_schedwrk_handler, > >- service_unlink_and_exit_data); > >+ > >+ qb_loop_job_add(corosync_poll_handle_get(), > >+ QB_LOOP_LOW, > >+ service_unlink_and_exit_data, > >+ service_unlink_and_exit_schedwrk_handler); > > return (0); > > } _______________________________________________ Openais mailing list [email protected] https://lists.linux-foundation.org/mailman/listinfo/openais
