Re: [Ocfs2-devel] [PATCH] ocfs2/dlm: delay the migration when the lockres is in recovery
Any comment on this? wengang. On 10-05-25 15:59, Wengang Wang wrote: We shouldn't migrate a lockres in recovery state. Otherwise, it has the following problem: 1) Recovery happened as recovery master on a node(node A) which is in umount migrating all lockres' it owned(master is node A) to other nodes, say a node B. 2) So node A wants to take over all the lockres' those are mastered by the crashed node C. 3) Receiving request_locks request from node A, node B send mig_lockres requests(for recovery) to node A for all lockres' that was mastered by the crashed node C. It can also send the request for a lockres(lockres A) which is not in node A's hashtable. 4) Receiving the mig_lockres request for lockres A from node B, a new lockres object lockres A', with INRECOVERING flag set, is created and inserted to hash table. 5) The recovery for lockres A' is going on on node A, it finally mastered the lockres A'. And now, RECOVERING flag is not cleared from lockres A' nor from lockres A on node B. 6) The migration for lockres A' goes since now node A mastered lockres A' already. the mig_lockres request(for migration) is sent to node B. 7) Node B responsed with -EFAULT because now lockres A is still in recovery state. 8) Node A BUG() on the -EFAULT. fix: The recovery state is cleared on node A(recovery master) after it's cleared on node B. We wait until the in recovery state is cleared from node A and migrate it to node B. Signed-off-by: Wengang Wang wen.gang.w...@oracle.com --- fs/ocfs2/dlm/dlmmaster.c |3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 9289b43..de9c128 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2371,6 +2371,9 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, goto leave; } + if (unlikely(res-state DLM_LOCK_RES_RECOVERING)) + goto leave; + ret = 0; queue = res-granted; for (i = 0; i 3; i++) { -- 1.6.6.1 ___ Ocfs2-devel mailing list Ocfs2-devel@oss.oracle.com http://oss.oracle.com/mailman/listinfo/ocfs2-devel ___ Ocfs2-devel mailing list Ocfs2-devel@oss.oracle.com http://oss.oracle.com/mailman/listinfo/ocfs2-devel
Re: [Ocfs2-devel] [PATCH] ocfs2/dlm: correct the refmap on recovery master
Any comment? On 10-06-11 00:25, Wengang Wang wrote: If the dlm recovery goes on the non-master node where purging work is going on, There could be unexpected reference left on some lockres' on recovery master. That is because we migrated the lockres' to recovery master but didn't send deref requests to it accordingly(was sending to the dead original master or to the UNKNOWN). Fix: For the lockres which is in progress of dropping reference, we don't migrate it to recovery master and unhash the lockres in the purge work. For those not in progress of the dropping, delay the purge work until recovery finished so that it can send deref request to the correct master(recovery master) later. Signed-off-by: Wengang Wang wen.gang.w...@oracle.com --- fs/ocfs2/dlm/dlmrecovery.c | 17 +++-- fs/ocfs2/dlm/dlmthread.c | 36 ++-- 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index f8b75ce..43530ce 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1997,6 +1997,8 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, struct list_head *queue; struct dlm_lock *lock, *next; + assert_spin_locked(dlm-spinlock); + assert_spin_locked(res-spinlock); res-state |= DLM_LOCK_RES_RECOVERING; if (!list_empty(res-recovering)) { mlog(0, @@ -2336,9 +2338,20 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) /* the wake_up for this will happen when the * RECOVERING flag is dropped later */ - res-state = ~DLM_LOCK_RES_DROPPING_REF; + if (res-state DLM_LOCK_RES_DROPPING_REF) { + /* + * don't migrate a lockres which is in + * progress of dropping ref + */ + mlog(ML_NOTICE, %.*s ignored for + migration\n, res-lockname.len, + res-lockname.name); + res-state = + ~DLM_LOCK_RES_DROPPING_REF; + } else + dlm_move_lockres_to_recovery_list(dlm, + res); - dlm_move_lockres_to_recovery_list(dlm, res); } else if (res-owner == dlm-node_num) { dlm_free_dead_locks(dlm, res, dead_node); __dlm_lockres_calc_usage(dlm, res); diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index d4f73ca..0771420 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -92,17 +92,23 @@ int __dlm_lockres_has_locks(struct dlm_lock_resource *res) * truly ready to be freed. */ int __dlm_lockres_unused(struct dlm_lock_resource *res) { - if (!__dlm_lockres_has_locks(res) - (list_empty(res-dirty) !(res-state DLM_LOCK_RES_DIRTY))) { - /* try not to scan the bitmap unless the first two - * conditions are already true */ - int bit = find_next_bit(res-refmap, O2NM_MAX_NODES, 0); - if (bit = O2NM_MAX_NODES) { - /* since the bit for dlm-node_num is not - * set, inflight_locks better be zero */ - BUG_ON(res-inflight_locks != 0); - return 1; - } + int bit; + + if (__dlm_lockres_has_locks(res)) + return 0; + + if (!list_empty(res-dirty) || res-state DLM_LOCK_RES_DIRTY) + return 0; + + if (res-state DLM_LOCK_RES_RECOVERING) + return 0; + + bit = find_next_bit(res-refmap, O2NM_MAX_NODES, 0); + if (bit = O2NM_MAX_NODES) { + /* since the bit for dlm-node_num is not + * set, inflight_locks better be zero */ + BUG_ON(res-inflight_locks != 0); + return 1; } return 0; } @@ -158,6 +164,8 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, int master; int ret = 0; + assert_spin_locked(dlm-spinlock); + spin_lock(res-spinlock); if (!__dlm_lockres_unused(res)) { mlog(0, %s:%.*s: tried to purge but not unused\n, @@ -216,13 +224,13 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, master = %d\n, res-lockname.len, res-lockname.name, res, master); list_del_init(res-purge); - spin_unlock(res-spinlock); + /* not the last ref */
[Ocfs2-devel] [PATCH 1/1] ocfs2 fix o2dlm dlm run purgelist (rev 3)
This patch fixes two problems in dlm_run_purgelist 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge the same lockres instead of trying the next lockres. 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres. spinlock is reacquired but in this window lockres can get reused. This leads to BUG. This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the lockres spinlock protecting it from getting reused. Signed-off-by: Srinivas Eeda srinivas.e...@oracle.com Acked-by: Sunil Mushran sunil.mush...@oracle.com --- fs/ocfs2/dlm/dlmthread.c | 80 +++-- 1 files changed, 34 insertions(+), 46 deletions(-) diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 11a6d1f..960dc8d 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -152,45 +152,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, spin_unlock(dlm-spinlock); } -static int dlm_purge_lockres(struct dlm_ctxt *dlm, +static void dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { int master; int ret = 0; - spin_lock(res-spinlock); - if (!__dlm_lockres_unused(res)) { - mlog(0, %s:%.*s: tried to purge but not unused\n, -dlm-name, res-lockname.len, res-lockname.name); - __dlm_print_one_lock_resource(res); - spin_unlock(res-spinlock); - BUG(); - } - - if (res-state DLM_LOCK_RES_MIGRATING) { - mlog(0, %s:%.*s: Delay dropref as this lockres is -being remastered\n, dlm-name, res-lockname.len, -res-lockname.name); - /* Re-add the lockres to the end of the purge list */ - if (!list_empty(res-purge)) { - list_del_init(res-purge); - list_add_tail(res-purge, dlm-purge_list); - } - spin_unlock(res-spinlock); - return 0; - } + assert_spin_locked(dlm-spinlock); + assert_spin_locked(res-spinlock); master = (res-owner == dlm-node_num); - if (!master) - res-state |= DLM_LOCK_RES_DROPPING_REF; - spin_unlock(res-spinlock); mlog(0, purging lockres %.*s, master = %d\n, res-lockname.len, res-lockname.name, master); if (!master) { + res-state |= DLM_LOCK_RES_DROPPING_REF; /* drop spinlock... retake below */ + spin_unlock(res-spinlock); spin_unlock(dlm-spinlock); spin_lock(res-spinlock); @@ -208,31 +188,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, mlog(0, %s:%.*s: dlm_deref_lockres returned %d\n, dlm-name, res-lockname.len, res-lockname.name, ret); spin_lock(dlm-spinlock); + spin_lock(res-spinlock); } - spin_lock(res-spinlock); if (!list_empty(res-purge)) { mlog(0, removing lockres %.*s:%p from purgelist, master = %d\n, res-lockname.len, res-lockname.name, res, master); list_del_init(res-purge); - spin_unlock(res-spinlock); dlm_lockres_put(res); dlm-purge_count--; - } else - spin_unlock(res-spinlock); + } + + if (!__dlm_lockres_unused(res)) { + mlog(ML_ERROR, found lockres %s:%.*s: in use after deref\n, +dlm-name, res-lockname.len, res-lockname.name); + __dlm_print_one_lock_resource(res); + BUG(); + } __dlm_unhash_lockres(res); /* lockres is not in the hash now. drop the flag and wake up * any processes waiting in dlm_get_lock_resource. */ if (!master) { - spin_lock(res-spinlock); res-state = ~DLM_LOCK_RES_DROPPING_REF; spin_unlock(res-spinlock); wake_up(res-wq); - } - return 0; + } else + spin_unlock(res-spinlock); } static void dlm_run_purge_list(struct dlm_ctxt *dlm, @@ -251,17 +235,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, lockres = list_entry(dlm-purge_list.next, struct dlm_lock_resource, purge); - /* Status of the lockres *might* change so double -* check. If the lockres is unused, holding the dlm -* spinlock will prevent people from getting and more -* refs on it -- there's no need to keep the lockres -* spinlock. */ spin_lock(lockres-spinlock); -
Re: [Ocfs2-devel] [PATCH] ocfs2/dlm: remove potential deadlock -V2
A better (and simpler) fix would be to not dlm_grab() in dlm_init_lockres(). Then we can remove the corresponding dlm_put() in dlm_lockres_release(). That grab is not required because we don't call dlm_unregister_domain() based on refcount. On 07/19/2010 03:11 AM, Wengang Wang wrote: Any comment? On 10-06-21 21:43, Wengang Wang wrote: When we need to take both dlm_domain_lock and dlm-spinlock, we should take them in order of: dlm_domain_lock then dlm-spinlock. There is pathes disobey this order. That is calling dlm_lockres_put() with dlm-spinlock held in dlm_run_purge_list. dlm_lockres_put() calls dlm_put() at the ref and dlm_put() locks on dlm_domain_lock. Fix: In dlm_run_purge_list, if it could the last ref on the lockres, unlock dlm-spinlock before calling dlm_lockres_put() and lock it back after dlm_lockres_put(). Signed-off-by: Wengang Wangwen.gang.w...@oracle.com --- fs/ocfs2/dlm/dlmthread.c | 27 +++ 1 files changed, 19 insertions(+), 8 deletions(-) diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index d4f73ca..b1bd624 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -152,6 +152,7 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, spin_unlock(dlm-spinlock); } +/* returns 1 if the lockres is removed from purge list, 0 otherwise */ static int dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { @@ -217,7 +218,9 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, res, master); list_del_init(res-purge); spin_unlock(res-spinlock); +/* not the last ref, dlm_run_purge_list() holds another */ dlm_lockres_put(res); +ret = 1; dlm-purge_count--; } else spin_unlock(res-spinlock); @@ -232,13 +235,13 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, spin_unlock(res-spinlock); wake_up(res-wq); } -return 0; +return ret; } static void dlm_run_purge_list(struct dlm_ctxt *dlm, int purge_now) { -unsigned int run_max, unused; +unsigned int run_max, unused, removed; unsigned long purge_jiffies; struct dlm_lock_resource *lockres; @@ -280,13 +283,21 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, /* This may drop and reacquire the dlm spinlock if it * has to do migration. */ -if (dlm_purge_lockres(dlm, lockres)) -BUG(); - +removed = dlm_purge_lockres(dlm, lockres); +/* If the lockres is removed from purge list, this could be + * the last ref. Unlock dlm-spinlock to avoid deadlock + * --dlm_lockres_put() does spin_lock on dlm_domain_lock on the + * last ref while there, in other path, could be lock requests + * in normal order: dlm_domain_lock then dlm-spinlock. + */ +if (removed) +spin_unlock(dlm-spinlock); dlm_lockres_put(lockres); - -/* Avoid adding any scheduling latencies */ -cond_resched_lock(dlm-spinlock); +if (removed) +spin_lock(dlm-spinlock); +else +/* Avoid adding any scheduling latencies */ +cond_resched_lock(dlm-spinlock); } spin_unlock(dlm-spinlock); -- 1.6.6.1 ___ Ocfs2-devel mailing list Ocfs2-devel@oss.oracle.com http://oss.oracle.com/mailman/listinfo/ocfs2-devel ___ Ocfs2-devel mailing list Ocfs2-devel@oss.oracle.com http://oss.oracle.com/mailman/listinfo/ocfs2-devel ___ Ocfs2-devel mailing list Ocfs2-devel@oss.oracle.com http://oss.oracle.com/mailman/listinfo/ocfs2-devel
Re: [Ocfs2-devel] [PATCH] ocfs2/dlm: correct the refmap on recovery master
Srini has a patch that handles a race concerning DLM_LOCK_RES_DROPPING_REF. You may want to look a this afresh with that patch. My problem with this patch is that the description is not clear. That is because we migrated the lockres' to recovery master but didn't send deref requests to it accordingly(was sending to the dead original master or to the UNKNOWN). Do you have the message sequencing that would lead to this situation? If we migrate the lockres to the reco master, the reco master will send an assert that will make us change the master. I think your problem is the one race we have concerning reco/migration. If so, this fix is not enough. Sunil On 07/19/2010 03:09 AM, Wengang Wang wrote: Any comment? On 10-06-11 00:25, Wengang Wang wrote: If the dlm recovery goes on the non-master node where purging work is going on, There could be unexpected reference left on some lockres' on recovery master. That is because we migrated the lockres' to recovery master but didn't send deref requests to it accordingly(was sending to the dead original master or to the UNKNOWN). Fix: For the lockres which is in progress of dropping reference, we don't migrate it to recovery master and unhash the lockres in the purge work. For those not in progress of the dropping, delay the purge work until recovery finished so that it can send deref request to the correct master(recovery master) later. Signed-off-by: Wengang Wangwen.gang.w...@oracle.com --- fs/ocfs2/dlm/dlmrecovery.c | 17 +++-- fs/ocfs2/dlm/dlmthread.c | 36 ++-- 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index f8b75ce..43530ce 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1997,6 +1997,8 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, struct list_head *queue; struct dlm_lock *lock, *next; +assert_spin_locked(dlm-spinlock); +assert_spin_locked(res-spinlock); res-state |= DLM_LOCK_RES_RECOVERING; if (!list_empty(res-recovering)) { mlog(0, @@ -2336,9 +2338,20 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) /* the wake_up for this will happen when the * RECOVERING flag is dropped later */ -res-state= ~DLM_LOCK_RES_DROPPING_REF; +if (res-state DLM_LOCK_RES_DROPPING_REF) { +/* + * don't migrate a lockres which is in + * progress of dropping ref + */ +mlog(ML_NOTICE, %.*s ignored for + migration\n, res-lockname.len, + res-lockname.name); +res-state= +~DLM_LOCK_RES_DROPPING_REF; +} else +dlm_move_lockres_to_recovery_list(dlm, + res); -dlm_move_lockres_to_recovery_list(dlm, res); } else if (res-owner == dlm-node_num) { dlm_free_dead_locks(dlm, res, dead_node); __dlm_lockres_calc_usage(dlm, res); diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index d4f73ca..0771420 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -92,17 +92,23 @@ int __dlm_lockres_has_locks(struct dlm_lock_resource *res) * truly ready to be freed. */ int __dlm_lockres_unused(struct dlm_lock_resource *res) { -if (!__dlm_lockres_has_locks(res) -(list_empty(res-dirty) !(res-state DLM_LOCK_RES_DIRTY))) { -/* try not to scan the bitmap unless the first two - * conditions are already true */ -int bit = find_next_bit(res-refmap, O2NM_MAX_NODES, 0); -if (bit= O2NM_MAX_NODES) { -/* since the bit for dlm-node_num is not - * set, inflight_locks better be zero */ -BUG_ON(res-inflight_locks != 0); -return 1; -} +int bit; + +if (__dlm_lockres_has_locks(res)) +return 0; + +if (!list_empty(res-dirty) || res-state DLM_LOCK_RES_DIRTY) +return 0; + +if (res-state DLM_LOCK_RES_RECOVERING) +return 0; + +bit = find_next_bit(res-refmap, O2NM_MAX_NODES, 0); +if (bit= O2NM_MAX_NODES) { +/* since the bit for dlm-node_num is not + * set, inflight_locks better be zero */ +BUG_ON(res-inflight_locks
Re: [Ocfs2-devel] [PATCH] ocfs2/dlm: fix a dead lock
Just a reminder. wengang. On 10-07-16 23:13, Wengang Wang wrote: When we have to take both dlm-master_lock and lockres-spinlock, take them in order lockres-spinlock and then dlm-master_lock. The patch fixes a violation of the rule. We can simply move taking dlm-master_lock to where we have dropped res-spinlock since when we access res-state and free mle memory we don't need master_lock's protection. Signed-off-by: Wengang Wang wen.gang.w...@oracle.com --- fs/ocfs2/dlm/dlmmaster.c |5 ++--- 1 files changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 94b97fc..6d098b8 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -3050,8 +3050,6 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, /* check for pre-existing lock */ spin_lock(dlm-spinlock); res = __dlm_lookup_lockres(dlm, name, namelen, hash); - spin_lock(dlm-master_lock); - if (res) { spin_lock(res-spinlock); if (res-state DLM_LOCK_RES_RECOVERING) { @@ -3069,14 +3067,15 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, spin_unlock(res-spinlock); } + spin_lock(dlm-master_lock); /* ignore status. only nonzero status would BUG. */ ret = dlm_add_migration_mle(dlm, res, mle, oldmle, name, namelen, migrate-new_master, migrate-master); -unlock: spin_unlock(dlm-master_lock); +unlock: spin_unlock(dlm-spinlock); if (oldmle) { -- 1.6.6.1 ___ Ocfs2-devel mailing list Ocfs2-devel@oss.oracle.com http://oss.oracle.com/mailman/listinfo/ocfs2-devel ___ Ocfs2-devel mailing list Ocfs2-devel@oss.oracle.com http://oss.oracle.com/mailman/listinfo/ocfs2-devel
Re: [Ocfs2-devel] [PATCH] ocfs2/dlm: correct the refmap on recovery master
On 10-07-19 16:52, Sunil Mushran wrote: Srini has a patch that handles a race concerning DLM_LOCK_RES_DROPPING_REF. You may want to look a this afresh with that patch. My problem with this patch is that the description is not clear. That is because we migrated the lockres' to recovery master but didn't send deref requests to it accordingly(was sending to the dead original master or to the UNKNOWN). Do you have the message sequencing that would lead to this situation? If we migrate the lockres to the reco master, the reco master will send an assert that will make us change the master. So first, the problem is not about the changing owner. It is that the bit(in refmap) on behalf of the node in question is not cleared on the new master(recovery master). So that the new master will fail at purging the lockres due to the incorrect bit in refmap. Second, I have no messages at hand for the situation. But I think it is simple enough. 1) node A has no interest on lockres A any longer, so it is purging it. 2) the owner of lockres A is node B, so node A is sending de-ref message to node B. 3) at this time, node B crashed. node C becomes the recovery master. it recovers lockres A(because the master is the dead node B). 4) node A migrated lockres A to node C with a refbit there. 5) node A failed to send de-ref message to node B because it crashed. The failure is ignored. no other action is done for lockres A any more. So node A means to drop the ref on the master. But in such a situation, node C keeps the ref on behalf of node A unexpectedly. Node C finally fails at purging lockres A and hang on umount. I think your problem is the one race we have concerning reco/migration. If so, this fix is not enough. It's a problem of purging + recovery. no pure migration for umount here. So what's your concern? regards, wengang. Sunil On 07/19/2010 03:09 AM, Wengang Wang wrote: Any comment? On 10-06-11 00:25, Wengang Wang wrote: If the dlm recovery goes on the non-master node where purging work is going on, There could be unexpected reference left on some lockres' on recovery master. That is because we migrated the lockres' to recovery master but didn't send deref requests to it accordingly(was sending to the dead original master or to the UNKNOWN). Fix: For the lockres which is in progress of dropping reference, we don't migrate it to recovery master and unhash the lockres in the purge work. For those not in progress of the dropping, delay the purge work until recovery finished so that it can send deref request to the correct master(recovery master) later. Signed-off-by: Wengang Wangwen.gang.w...@oracle.com --- fs/ocfs2/dlm/dlmrecovery.c | 17 +++-- fs/ocfs2/dlm/dlmthread.c | 36 ++-- 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index f8b75ce..43530ce 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1997,6 +1997,8 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, struct list_head *queue; struct dlm_lock *lock, *next; + assert_spin_locked(dlm-spinlock); + assert_spin_locked(res-spinlock); res-state |= DLM_LOCK_RES_RECOVERING; if (!list_empty(res-recovering)) { mlog(0, @@ -2336,9 +2338,20 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) /* the wake_up for this will happen when the * RECOVERING flag is dropped later */ - res-state= ~DLM_LOCK_RES_DROPPING_REF; + if (res-state DLM_LOCK_RES_DROPPING_REF) { + /* +* don't migrate a lockres which is in +* progress of dropping ref +*/ + mlog(ML_NOTICE, %.*s ignored for +migration\n, res-lockname.len, +res-lockname.name); + res-state= + ~DLM_LOCK_RES_DROPPING_REF; + } else + dlm_move_lockres_to_recovery_list(dlm, + res); - dlm_move_lockres_to_recovery_list(dlm, res); } else if (res-owner == dlm-node_num) { dlm_free_dead_locks(dlm, res, dead_node); __dlm_lockres_calc_usage(dlm, res); diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index d4f73ca..0771420 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -92,17 +92,23 @@ int
Re: [Ocfs2-devel] [PATCH] ocfs2/dlm: remove potential deadlock -V2
Very cool fix! I will post the patch after test. regards, wengang. On 10-07-19 16:14, Sunil Mushran wrote: A better (and simpler) fix would be to not dlm_grab() in dlm_init_lockres(). Then we can remove the corresponding dlm_put() in dlm_lockres_release(). That grab is not required because we don't call dlm_unregister_domain() based on refcount. On 07/19/2010 03:11 AM, Wengang Wang wrote: Any comment? On 10-06-21 21:43, Wengang Wang wrote: When we need to take both dlm_domain_lock and dlm-spinlock, we should take them in order of: dlm_domain_lock then dlm-spinlock. There is pathes disobey this order. That is calling dlm_lockres_put() with dlm-spinlock held in dlm_run_purge_list. dlm_lockres_put() calls dlm_put() at the ref and dlm_put() locks on dlm_domain_lock. Fix: In dlm_run_purge_list, if it could the last ref on the lockres, unlock dlm-spinlock before calling dlm_lockres_put() and lock it back after dlm_lockres_put(). Signed-off-by: Wengang Wangwen.gang.w...@oracle.com --- fs/ocfs2/dlm/dlmthread.c | 27 +++ 1 files changed, 19 insertions(+), 8 deletions(-) diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index d4f73ca..b1bd624 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -152,6 +152,7 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, spin_unlock(dlm-spinlock); } +/* returns 1 if the lockres is removed from purge list, 0 otherwise */ static int dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { @@ -217,7 +218,9 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, res, master); list_del_init(res-purge); spin_unlock(res-spinlock); + /* not the last ref, dlm_run_purge_list() holds another */ dlm_lockres_put(res); + ret = 1; dlm-purge_count--; } else spin_unlock(res-spinlock); @@ -232,13 +235,13 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, spin_unlock(res-spinlock); wake_up(res-wq); } - return 0; + return ret; } static void dlm_run_purge_list(struct dlm_ctxt *dlm, int purge_now) { - unsigned int run_max, unused; + unsigned int run_max, unused, removed; unsigned long purge_jiffies; struct dlm_lock_resource *lockres; @@ -280,13 +283,21 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, /* This may drop and reacquire the dlm spinlock if it * has to do migration. */ - if (dlm_purge_lockres(dlm, lockres)) - BUG(); - + removed = dlm_purge_lockres(dlm, lockres); + /* If the lockres is removed from purge list, this could be +* the last ref. Unlock dlm-spinlock to avoid deadlock +* --dlm_lockres_put() does spin_lock on dlm_domain_lock on the +* last ref while there, in other path, could be lock requests +* in normal order: dlm_domain_lock then dlm-spinlock. +*/ + if (removed) + spin_unlock(dlm-spinlock); dlm_lockres_put(lockres); - - /* Avoid adding any scheduling latencies */ - cond_resched_lock(dlm-spinlock); + if (removed) + spin_lock(dlm-spinlock); + else + /* Avoid adding any scheduling latencies */ + cond_resched_lock(dlm-spinlock); } spin_unlock(dlm-spinlock); -- 1.6.6.1 ___ Ocfs2-devel mailing list Ocfs2-devel@oss.oracle.com http://oss.oracle.com/mailman/listinfo/ocfs2-devel ___ Ocfs2-devel mailing list Ocfs2-devel@oss.oracle.com http://oss.oracle.com/mailman/listinfo/ocfs2-devel ___ Ocfs2-devel mailing list Ocfs2-devel@oss.oracle.com http://oss.oracle.com/mailman/listinfo/ocfs2-devel