From: Liu Yuan <[email protected]>

We should wait for all the worker's signal handler running like old code, or we
will get random crash when signal handler is called after patching.

Signed-off-by: Liu Yuan <[email protected]>
---
 sheep/request.c     |  2 +-
 sheep/trace/trace.c | 24 ++++++++++++++++++------
 sheep/work.c        |  4 ++--
 sheep/work.h        |  2 +-
 4 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/sheep/request.c b/sheep/request.c
index 7c2dec8..bcf705a 100644
--- a/sheep/request.c
+++ b/sheep/request.c
@@ -446,7 +446,7 @@ int exec_local_req(struct sd_req *rq, void *data)
 
        ret = eventfd_read(req->wait_efd, &value);
        if (ret < 0)
-               eprintf("event fd read error %m");
+               eprintf("%m\n");
 
        close(req->wait_efd);
        ret = req->rp.result;
diff --git a/sheep/trace/trace.c b/sheep/trace/trace.c
index 79868b7..4b59b09 100644
--- a/sheep/trace/trace.c
+++ b/sheep/trace/trace.c
@@ -39,7 +39,9 @@ static int trace_efd;
 static int nr_short_thread;
 static int trace_in_patch;
 
-pthread_mutex_t suspend_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_mutex_t suspend_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t suspend_cond = PTHREAD_COND_INITIALIZER;
+static int suspend_count;
 
 static struct strbuf *buffer;
 static int nr_cpu;
@@ -54,11 +56,10 @@ union instruction {
 
 static notrace void suspend(int num)
 {
-       dprintf("going to suspend\n");
        pthread_mutex_lock(&suspend_lock);
-       /* Now I am suspended and sleep on suspend_lock */
+       suspend_count--;
+       pthread_cond_wait(&suspend_cond, &suspend_lock);
        pthread_mutex_unlock(&suspend_lock);
-       dprintf("going to resume\n");
 }
 
 static inline int trace_hash(unsigned long ip)
@@ -180,18 +181,29 @@ notrace int register_trace_function(trace_func_t func)
 static notrace void suspend_worker_threads(void)
 {
        struct worker_info *wi;
+       suspend_count = total_ordered_workers;
 
        /* Hold the lock, then all other worker can sleep on it */
-       pthread_mutex_lock(&suspend_lock);
        list_for_each_entry(wi, &worker_info_list, worker_info_siblings) {
-               if (wi->worker_thread &&
+               if (wi->ordered &&
                    pthread_kill(wi->worker_thread, SIGUSR2) != 0)
                        dprintf("%m\n");
        }
+
+wait_for_worker_suspend:
+       pthread_mutex_lock(&suspend_lock);
+       if (suspend_count > 0) {
+               pthread_mutex_unlock(&suspend_lock);
+               pthread_yield();
+               goto wait_for_worker_suspend;
+       }
+       pthread_mutex_unlock(&suspend_lock);
 }
 
 static notrace void resume_worker_threads(void)
 {
+       pthread_mutex_lock(&suspend_lock);
+       pthread_cond_broadcast(&suspend_cond);
        pthread_mutex_unlock(&suspend_lock);
 }
 
diff --git a/sheep/work.c b/sheep/work.c
index 80096e3..d96026d 100644
--- a/sheep/work.c
+++ b/sheep/work.c
@@ -35,7 +35,7 @@
 #include "trace/trace.h"
 
 static int efd;
-int total_nr_workers;
+int total_ordered_workers;
 LIST_HEAD(worker_info_list);
 
 enum wq_state {
@@ -246,11 +246,11 @@ struct work_queue *init_work_queue(const char *name, bool 
ordered)
                }
 
                pthread_mutex_unlock(&wi->startup_lock);
+               total_ordered_workers++;
        }
 
        list_add(&wi->worker_info_siblings, &worker_info_list);
 
-       total_nr_workers++;
        return &wi->q;
 destroy_threads:
 
diff --git a/sheep/work.h b/sheep/work.h
index bcb8383..4d45dd6 100644
--- a/sheep/work.h
+++ b/sheep/work.h
@@ -42,7 +42,7 @@ struct worker_info {
 };
 
 extern struct list_head worker_info_list;
-extern int total_nr_workers;
+extern int total_ordered_workers;
 
 /* if 'ordered' is true, the work queue are processes in order. */
 struct work_queue *init_work_queue(const char *name, bool ordered);
-- 
1.7.12.84.gefa6462

-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to