From: Liu Yuan <[email protected]>

This hang is caused by cluster request (add new vdi):

1) cluster request blocks the cluster and wait its worker to finish.
2) a confchg happens, but is queued after this cluster request.
3) cluster_request_fn() issues write request but always fail because of one
   node failure and retry for ever.
4) cluster_request_done() is never called, so we can't unblock the event list

this can be reprodced reliably by following script:
================

for i in `seq 0 7`; do sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i 
-p $((7000+$i));done
sleep 1
collie/collie cluster format  -c 3
echo create new vdis
(
for i in `seq 0 40`;do
collie/collie vdi create test$i 4M
done
) &

echo kill nodes
sleep 1
for i in 1 2 3 4 5; do pkill -f "sheep/sheep -d 
/home/tailai.ly/sheepdog/store/$i -z $i -p 700$i";sleep 1;done;

for i in `seq 1 5`; do sheep/sheep -d /home/tailai.ly/sheepdog/store/$i -z $i 
-p $((7000+$i));done

echo wait for object recovery to finish
for ((;;)); do
        if [ "$(pgrep collie)" ]; then
                sleep 1
        else
                break
        fi
done
=================

The fix tries to add confchg to the head of event list.

Signed-off-by: Liu Yuan <[email protected]>
---
 include/list.h           |   27 +++++++++++++++++++++++++++
 sheep/cluster/corosync.c |   39 +++++++++++++++++++++++++++++++++++----
 2 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/include/list.h b/include/list.h
index 30ee3c4..c84469d 100644
--- a/include/list.h
+++ b/include/list.h
@@ -54,6 +54,33 @@ static inline int list_empty(const struct list_head *head)
             &pos->member != (head);                                    \
             pos = n, n = list_entry(n->member.next, typeof(*n), member))
 
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos:       the type * to use as a loop cursor.
+ * @head:      the head for your list.
+ * @member:    the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member)                 \
+       for (pos = list_entry((head)->prev, typeof(*pos), member);      \
+            &pos->member != (head);    \
+            pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_safe_reverse - iterate backwards over list safe against 
removal
+ * @pos:       the type * to use as a loop cursor.
+ * @n:         another type * to use as temporary storage
+ * @head:      the head for your list.
+ * @member:    the name of the list_struct within the struct.
+ *
+ * Iterate backwards over list of given type, safe against removal
+ * of list entry.
+ */
+#define list_for_each_entry_safe_reverse(pos, n, head, member)         \
+       for (pos = list_entry((head)->prev, typeof(*pos), member),      \
+               n = list_entry(pos->member.prev, typeof(*pos), member); \
+            &pos->member != (head);                                    \
+            pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+
 static inline void __list_add(struct list_head *new,
                              struct list_head *prev,
                              struct list_head *next)
diff --git a/sheep/cluster/corosync.c b/sheep/cluster/corosync.c
index 330cb71..55a19fe 100644
--- a/sheep/cluster/corosync.c
+++ b/sheep/cluster/corosync.c
@@ -198,8 +198,8 @@ retry:
        return 0;
 }
 
-static struct corosync_event *find_event(enum corosync_event_type type,
-               struct cpg_node *sender)
+static inline struct corosync_event *find_event(enum corosync_event_type type,
+                                               struct cpg_node *sender)
 {
        struct corosync_event *cevent;
 
@@ -212,6 +212,36 @@ static struct corosync_event *find_event(enum 
corosync_event_type type,
        return NULL;
 }
 
+static inline struct corosync_event *
+lookup_event_reverse(enum corosync_event_type type)
+{
+       struct corosync_event *cevent;
+
+       list_for_each_entry_reverse(cevent, &corosync_event_list, list) {
+               if (cevent->type == type)
+                       return cevent;
+       }
+
+       return NULL;
+}
+
+/*
+ * Add confchg to the event list
+ *
+ * We should add confchg event to head of the event list in order to process
+ * it ASAP and we keep relative order of confchg events.
+ */
+static inline void add_confchg_to_event_list(enum corosync_event_type type,
+                                            struct corosync_event *cevent)
+{
+       struct corosync_event *entry = lookup_event_reverse(type);
+
+       if (entry)
+               list_add_tail(&cevent->list, &entry->list);
+       else
+               list_add(&cevent->list, &corosync_event_list);
+}
+
 static int is_master(struct cpg_node *node)
 {
        int i;
@@ -561,7 +591,7 @@ static void cdrv_cpg_confchg(cpg_handle_t handle,
                cevent->type = COROSYNC_EVENT_TYPE_LEAVE;
                cevent->sender = left_sheep[i];
 
-               list_add_tail(&cevent->list, &corosync_event_list);
+               add_confchg_to_event_list(COROSYNC_EVENT_TYPE_LEAVE, cevent);
        }
 
        /* dispatch join_handler */
@@ -572,7 +602,8 @@ static void cdrv_cpg_confchg(cpg_handle_t handle,
 
                cevent->type = COROSYNC_EVENT_TYPE_JOIN_REQUEST;
                cevent->sender = joined_sheep[i];
-               list_add_tail(&cevent->list, &corosync_event_list);
+               add_confchg_to_event_list(COROSYNC_EVENT_TYPE_JOIN_REQUEST,
+                                         cevent);
        }
 
        if (!join_finished) {
-- 
1.7.10.2

-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to