Hi list:

  In my environment (2 sheep only), sheep always panic while recovering from a 
left node returning.

It turns out to be a intend behavior in get_nth_node:

=========================================
        if (idx == base) {
                panic("bug"); /* not found */
=========================================

  While I agree this is the correct in most scenarios, it does seem to be too 
intrusive while recovering in my trivial test.
To be specific, find_tgt_node calls get_nth_node

  I don't have a lot of faith in my own workaround either. Let me know what you 
think ;)

  Thanks.

diff --git a/include/sheep.h b/include/sheep.h
index e435b63..77516a8 100644
--- a/include/sheep.h
+++ b/include/sheep.h
@@ -180,7 +180,7 @@ static inline int same_zone(struct sd_vnode *e, int n1, int 
n2)
 
 /* traverse the virtual node list and return the n'th one */
 static inline int get_nth_node(struct sd_vnode *entries,
-                              int nr_entries, int base, int n)
+                              int nr_entries, int base, int n, int hard)
 {
        int nodes[SD_MAX_REDUNDANCY];
        int nr = 0, idx = base, i;
@@ -190,7 +190,12 @@ static inline int get_nth_node(struct sd_vnode *entries,
 next:
                idx = (idx + 1) % nr_entries;
                if (idx == base) {
-                       panic("bug"); /* not found */
+                       if (hard == 1)
+                               panic("bug"); /* not found */
+                       else
+                               //FIXME: we know it may fail with find_tgt_node
+                               //when search for previous target node in 
certain conditions
+                               return -1;
                }
                for (i = 0; i < nr; i++) {
                        if (same_node(entries, idx, nodes[i]))
@@ -216,7 +221,7 @@ static inline int hval_to_sheep(struct sd_vnode *entries,
                if (id > e->id && id <= n->id)
                        break;
        }
-       return get_nth_node(entries, nr_entries, (i + 1) % nr_entries, idx);
+       return get_nth_node(entries, nr_entries, (i + 1) % nr_entries, idx, 1);
 }
 
 static inline int obj_to_sheep(struct sd_vnode *entries,
diff --git a/sheep/store.c b/sheep/store.c
index 256feae..3076fca 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -1028,7 +1028,7 @@ static int contains_node(struct sd_vnode *key,
        int i;
 
        for (i = 0; i < copies; i++) {
-               int idx = get_nth_node(entry, nr, base_idx, i);
+               int idx = get_nth_node(entry, nr, base_idx, i, 1);
                if (memcmp(key->addr, entry[idx].addr, sizeof(key->addr)) == 0
                    && key->port == entry[idx].port)
                        return idx;
@@ -1107,29 +1107,35 @@ static int find_tgt_node(struct sd_vnode *old_entry,
                         int copy_idx)
 {
        int i, j, idx;
+       int e = get_nth_node(cur_entry, cur_nr, cur_idx, copy_idx, 0);
 
        dprintf("%"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32", 
%"PRIu32", %"PRIu32"\n",
                old_idx, old_nr, old_copies, cur_idx, cur_nr, cur_copies, 
copy_idx);
 
        /* If the same node is in the previous target nodes, return its index */
-       idx = contains_node(cur_entry + get_nth_node(cur_entry, cur_nr, 
cur_idx, copy_idx),
-                           old_entry, old_nr, old_idx, old_copies);
-       if (idx >= 0) {
-               dprintf("%"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32"\n", idx, 
copy_idx, cur_idx, cur_nr);
-               return idx;
+       if (e != -1) {
+               idx = contains_node(cur_entry + e,
+                               old_entry, old_nr, old_idx, old_copies);
+               if (idx >= 0) {
+                       dprintf("%"PRIu32", %"PRIu32", %"PRIu32", %"PRIu32"\n", 
idx, copy_idx, cur_idx, cur_nr);
+                       return idx;
+               }
        }
 
+       dprintf("it's not the same node as previous target\n");
+
        for (i = 0, j = 0; ; i++, j++) {
                if (i < copy_idx) {
                        /* Skip if the node can recover from its local */
-                       idx = contains_node(cur_entry + get_nth_node(cur_entry, 
cur_nr, cur_idx, i),
+                       idx = contains_node(cur_entry + get_nth_node(cur_entry, 
cur_nr, cur_idx, i, 1),
                                            old_entry, old_nr, old_idx, 
old_copies);
                        if (idx >= 0)
                                continue;
 
+                       dprintf("it cannot recover from its local\n");
                        /* Find the next target which needs to recover from 
remote */
                        while (j < old_copies &&
-                              contains_node(old_entry + 
get_nth_node(old_entry, old_nr, old_idx, j),
+                              contains_node(old_entry + 
get_nth_node(old_entry, old_nr, old_idx, j, 1),
                                             cur_entry, cur_nr, cur_idx, 
cur_copies) >= 0)
                                j++;
                }
@@ -1145,9 +1151,9 @@ static int find_tgt_node(struct sd_vnode *old_entry,
                if (i == copy_idx) {
                        /* Found the target node correspoinding to copy_idx */
                        dprintf("%"PRIu32", %"PRIu32", %"PRIu32"\n",
-                               get_nth_node(old_entry, old_nr, old_idx, j),
+                               get_nth_node(old_entry, old_nr, old_idx, j, 1),
                                copy_idx, (cur_idx + i) % cur_nr);
-                       return get_nth_node(old_entry, old_nr, old_idx, j);
+                       return get_nth_node(old_entry, old_nr, old_idx, j, 1);
                }
 
        }
-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to