From: Liu Yuan <[email protected]>

This dead lock can be reprodiced by 026.

We should always service CREATE_AND_WRITE request instead of queueing it
on wait queues while in recovery. The recovery can be finished without any
any objects in the list (rw->count == 0 in some special case), in which case
that no one calls resume_wait_recovery_requests() or other flusher on rw_list or
obj_list.

Signed-off-by: Liu Yuan <[email protected]>
---
 sheep/request.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/sheep/request.c b/sheep/request.c
index 5981e14..fd210d3 100644
--- a/sheep/request.c
+++ b/sheep/request.c
@@ -141,6 +141,12 @@ static int check_request_epoch(struct request *req)
 
 static bool request_in_recovery(struct request *req)
 {
+
+       /* For CREATE request, we simply service it */
+       if (req->rq.opcode == SD_OP_CREATE_AND_WRITE_PEER ||
+           req->rq.opcode == SD_OP_CREATE_AND_WRITE_OBJ)
+               return false;
+
        /*
         * Request from recovery should go down the Farm even if
         * oid_in_recovery() returns true because we should also try snap
@@ -152,10 +158,12 @@ static bool request_in_recovery(struct request *req)
                 * Put request on wait queues of local node
                 */
                if (is_recovery_init()) {
+                       dprintf("%"PRIx64" on rw_queue\n", req->local_oid);
                        req->rp.result = SD_RES_OBJ_RECOVERING;
                        list_add_tail(&req->request_list,
                                      &sys->wait_rw_queue);
                } else {
+                       dprintf("%"PRIx64" on obj_queue\n", req->local_oid);
                        list_add_tail(&req->request_list,
                                      &sys->wait_obj_queue);
                }
@@ -328,7 +336,7 @@ static void queue_request(struct request *req)
                goto done;
        }
 
-       dprintf("%s\n", op_name(req->op));
+       dprintf("%s, %d\n", op_name(req->op), sys->status);
 
        switch (sys->status) {
        case SD_STATUS_KILLED:
-- 
1.7.12.84.gefa6462

-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to