From: Liu Yuan <[email protected]> We should wait for all the worker's signal handler running like old code, or we will get random crash when signal handler is called after patching.
Signed-off-by: Liu Yuan <[email protected]> --- sheep/request.c | 2 +- sheep/trace/trace.c | 24 ++++++++++++++++++------ sheep/work.c | 4 ++-- sheep/work.h | 2 +- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/sheep/request.c b/sheep/request.c index 7c2dec8..bcf705a 100644 --- a/sheep/request.c +++ b/sheep/request.c @@ -446,7 +446,7 @@ int exec_local_req(struct sd_req *rq, void *data) ret = eventfd_read(req->wait_efd, &value); if (ret < 0) - eprintf("event fd read error %m"); + eprintf("%m\n"); close(req->wait_efd); ret = req->rp.result; diff --git a/sheep/trace/trace.c b/sheep/trace/trace.c index 79868b7..4b59b09 100644 --- a/sheep/trace/trace.c +++ b/sheep/trace/trace.c @@ -39,7 +39,9 @@ static int trace_efd; static int nr_short_thread; static int trace_in_patch; -pthread_mutex_t suspend_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t suspend_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t suspend_cond = PTHREAD_COND_INITIALIZER; +static int suspend_count; static struct strbuf *buffer; static int nr_cpu; @@ -54,11 +56,10 @@ union instruction { static notrace void suspend(int num) { - dprintf("going to suspend\n"); pthread_mutex_lock(&suspend_lock); - /* Now I am suspended and sleep on suspend_lock */ + suspend_count--; + pthread_cond_wait(&suspend_cond, &suspend_lock); pthread_mutex_unlock(&suspend_lock); - dprintf("going to resume\n"); } static inline int trace_hash(unsigned long ip) @@ -180,18 +181,29 @@ notrace int register_trace_function(trace_func_t func) static notrace void suspend_worker_threads(void) { struct worker_info *wi; + suspend_count = total_ordered_workers; /* Hold the lock, then all other worker can sleep on it */ - pthread_mutex_lock(&suspend_lock); list_for_each_entry(wi, &worker_info_list, worker_info_siblings) { - if (wi->worker_thread && + if (wi->ordered && pthread_kill(wi->worker_thread, SIGUSR2) != 0) dprintf("%m\n"); } + +wait_for_worker_suspend: + pthread_mutex_lock(&suspend_lock); + if (suspend_count > 0) { + pthread_mutex_unlock(&suspend_lock); + pthread_yield(); + goto wait_for_worker_suspend; + } + pthread_mutex_unlock(&suspend_lock); } static notrace void resume_worker_threads(void) { + pthread_mutex_lock(&suspend_lock); + pthread_cond_broadcast(&suspend_cond); pthread_mutex_unlock(&suspend_lock); } diff --git a/sheep/work.c b/sheep/work.c index 80096e3..d96026d 100644 --- a/sheep/work.c +++ b/sheep/work.c @@ -35,7 +35,7 @@ #include "trace/trace.h" static int efd; -int total_nr_workers; +int total_ordered_workers; LIST_HEAD(worker_info_list); enum wq_state { @@ -246,11 +246,11 @@ struct work_queue *init_work_queue(const char *name, bool ordered) } pthread_mutex_unlock(&wi->startup_lock); + total_ordered_workers++; } list_add(&wi->worker_info_siblings, &worker_info_list); - total_nr_workers++; return &wi->q; destroy_threads: diff --git a/sheep/work.h b/sheep/work.h index bcb8383..4d45dd6 100644 --- a/sheep/work.h +++ b/sheep/work.h @@ -42,7 +42,7 @@ struct worker_info { }; extern struct list_head worker_info_list; -extern int total_nr_workers; +extern int total_ordered_workers; /* if 'ordered' is true, the work queue are processes in order. */ struct work_queue *init_work_queue(const char *name, bool ordered); -- 1.7.12.84.gefa6462 -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
