From: Liu Yuan <[email protected]>

This is motivated by a crash log:
...
Sep 01 21:53:50 [block] suspend(64) going to resume
Sep 01 21:53:50 [block] exec_local_req(449) event fd read error Interrupted 
system call
Sep 01 21:53:50 [block] do_process_work(1245) failed: 12, 0 , 1, 8
Sep 01 21:53:50 [main] cluster_op_done(283) LOCK_VDI (0xc7a790)
Sep 01 21:53:50 [gway 4] do_process_work(1238) 83, c7c850, 32580
...

that eventfd_read() get EINTR without proper handling and then mess up the next
process.

- retry read in exec_local_req() (worker thread)
- return in enable/disable_tracer() (main thread)

Signed-off-by: Liu Yuan <[email protected]>
---
 sheep/request.c     | 10 +++++++---
 sheep/trace/trace.c | 16 ++++++++++++----
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/sheep/request.c b/sheep/request.c
index bcf705a..3f9b870 100644
--- a/sheep/request.c
+++ b/sheep/request.c
@@ -426,7 +426,8 @@ static struct request *alloc_local_request(void *data, int 
data_length)
 /*
  * Exec the request locally and synchronously.
  *
- * This function takes advantage of gateway's retry mechanism.
+ * This function takes advantage of gateway's retry mechanism and can be only
+ * called from worker thread.
  */
 int exec_local_req(struct sd_req *rq, void *data)
 {
@@ -444,9 +445,13 @@ int exec_local_req(struct sd_req *rq, void *data)
 
        eventfd_write(sys->req_efd, value);
 
+again:
+       /* In error case (for e.g, EINTR) just retry read */
        ret = eventfd_read(req->wait_efd, &value);
-       if (ret < 0)
+       if (ret < 0) {
                eprintf("%m\n");
+               goto again;
+       }
 
        close(req->wait_efd);
        ret = req->rp.result;
@@ -858,7 +863,6 @@ int create_listen_port(int port, void *data)
        return create_listen_ports(port, create_listen_port_fn, data);
 }
 
-
 static void req_handler(int listen_fd, int events, void *data)
 {
        eventfd_t value;
diff --git a/sheep/trace/trace.c b/sheep/trace/trace.c
index 4b59b09..f27ea39 100644
--- a/sheep/trace/trace.c
+++ b/sheep/trace/trace.c
@@ -242,8 +242,14 @@ static notrace void enable_tracer(int fd, int events, void 
*data)
        int ret;
 
        ret = eventfd_read(trace_efd, &value);
-       if (ret < 0)
-               eprintf("%m");
+       /*
+        * In error case we can't retry read in main thread, simply return and
+        * expected to be waken up by epoll again.
+        */
+       if (ret < 0) {
+               eprintf("%m\n");
+               return;
+       }
 
        if (short_thread_running())
                return;
@@ -262,8 +268,10 @@ static notrace void disable_tracer(int fd, int events, 
void *data)
        int ret;
 
        ret = eventfd_read(fd, &value);
-       if (ret < 0)
-               eprintf("%m");
+       if (ret < 0) {
+               eprintf("%m\n");
+               return;
+       }
 
        if (short_thread_running())
                return;
-- 
1.7.12.84.gefa6462

-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to