[Ocfs2-devel] [PATCH 5/6] ocfs2: Avoid blocking in ocfs2_mark_lockres_freeing() in downconvert thread

2014-02-26 Thread Jan Kara
If we are dropping last inode reference from downconvert thread, we will
end up calling ocfs2_mark_lockres_freeing() which can block if the lock
we are freeing is queued thus creating an A-A deadlock. Luckily, since
we are the downconvert thread, we can immediately dequeue the lock and
thus avoid waiting in this case.

Reviewed-by: Mark Fasheh mfas...@suse.de
Reviewed-by: Srinivas Eeda srinivas.e...@oracle.com
Signed-off-by: Jan Kara j...@suse.cz
---
 fs/ocfs2/dlmglue.c | 44 +---
 fs/ocfs2/dlmglue.h |  3 ++-
 fs/ocfs2/inode.c   |  7 ---
 3 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 19986959d149..6bd690b5a061 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3144,22 +3144,60 @@ out:
return 0;
 }
 
+static void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
+  struct ocfs2_lock_res *lockres);
+
 /* Mark the lockres as being dropped. It will no longer be
  * queued if blocking, but we still may have to wait on it
  * being dequeued from the downconvert thread before we can consider
  * it safe to drop.
  *
  * You can *not* attempt to call cluster_lock on this lockres anymore. */
-void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
+void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
+   struct ocfs2_lock_res *lockres)
 {
int status;
struct ocfs2_mask_waiter mw;
-   unsigned long flags;
+   unsigned long flags, flags2;
 
ocfs2_init_mask_waiter(mw);
 
spin_lock_irqsave(lockres-l_lock, flags);
lockres-l_flags |= OCFS2_LOCK_FREEING;
+   if (lockres-l_flags  OCFS2_LOCK_QUEUED  current == osb-dc_task) {
+   /*
+* We know the downconvert is queued but not in progress
+* because we are the downconvert thread and processing
+* different lock. So we can just remove the lock from the
+* queue. This is not only an optimization but also a way
+* to avoid the following deadlock:
+*   ocfs2_dentry_post_unlock()
+* ocfs2_dentry_lock_put()
+*   ocfs2_drop_dentry_lock()
+* iput()
+*   ocfs2_evict_inode()
+* ocfs2_clear_inode()
+*   ocfs2_mark_lockres_freeing()
+* ... blocks waiting for OCFS2_LOCK_QUEUED
+* since we are the downconvert thread which
+* should clear the flag.
+*/
+   spin_unlock_irqrestore(lockres-l_lock, flags);
+   spin_lock_irqsave(osb-dc_task_lock, flags2);
+   list_del_init(lockres-l_blocked_list);
+   osb-blocked_lock_count--;
+   spin_unlock_irqrestore(osb-dc_task_lock, flags2);
+   /*
+* Warn if we recurse into another post_unlock call.  Strictly
+* speaking it isn't a problem but we need to be careful if
+* that happens (stack overflow, deadlocks, ...) so warn if
+* ocfs2 grows a path for which this can happen.
+*/
+   WARN_ON_ONCE(lockres-l_ops-post_unlock);
+   /* Since the lock is freeing we don't do much in the fn below */
+   ocfs2_process_blocked_lock(osb, lockres);
+   return;
+   }
while (lockres-l_flags  OCFS2_LOCK_QUEUED) {
lockres_add_mask_waiter(lockres, mw, OCFS2_LOCK_QUEUED, 0);
spin_unlock_irqrestore(lockres-l_lock, flags);
@@ -3180,7 +3218,7 @@ void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
 {
int ret;
 
-   ocfs2_mark_lockres_freeing(lockres);
+   ocfs2_mark_lockres_freeing(osb, lockres);
ret = ocfs2_drop_lock(osb, lockres);
if (ret)
mlog_errno(ret);
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 1d596d8c4a4a..d293a22c32c5 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -157,7 +157,8 @@ int ocfs2_refcount_lock(struct ocfs2_refcount_tree 
*ref_tree, int ex);
 void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex);
 
 
-void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
+void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
+   struct ocfs2_lock_res *lockres);
 void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
   struct ocfs2_lock_res *lockres);
 
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 3b0d722de35e..9661f8db21dc 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1053,6 +1053,7 @@ static void ocfs2_clear_inode(struct inode *inode)
 {
int status;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
+   struct ocfs2_super 

Re: [Ocfs2-devel] [PATCH 5/6] ocfs2: Avoid blocking in ocfs2_mark_lockres_freeing() in downconvert thread

2014-02-25 Thread Mark Fasheh
On Fri, Feb 21, 2014 at 10:45:03AM +0100, Jan Kara wrote:
 If we are dropping last inode reference from downconvert thread, we will
 end up calling ocfs2_mark_lockres_freeing() which can block if the lock
 we are freeing is queued thus creating an A-A deadlock. Luckily, since
 we are the downconvert thread, we can immediately dequeue the lock and
 thus avoid waiting in this case.
 
 Signed-off-by: Jan Kara j...@suse.cz

Latest version of this looks good, thanks Jan!

Reviewed-by: Mark Fasheh mfas...@suse.de
--Mark

--
Mark Fasheh

___
Ocfs2-devel mailing list
Ocfs2-devel@oss.oracle.com
https://oss.oracle.com/mailman/listinfo/ocfs2-devel


Re: [Ocfs2-devel] [PATCH 5/6] ocfs2: Avoid blocking in ocfs2_mark_lockres_freeing() in downconvert thread

2014-02-21 Thread Jan Kara
On Thu 20-02-14 21:21:14, Srinivas Eeda wrote:
 I like the idea of dc_task handling queued basts in
 ocfs2_mark_lockres_freeing.
 
 I am wondering if we should call lockres-l_ops-post_unlock(osb,
 lockres) ? Would there be another node waiting for a bast response ?
  Ah, right. Somehow I missed that. I'll send fixed version in a moment.

Honza
 
 On 02/20/2014 07:18 AM, Jan Kara wrote:
 If we are dropping last inode reference from downconvert thread, we will
 end up calling ocfs2_mark_lockres_freeing() which can block if the lock
 we are freeing is queued thus creating an A-A deadlock. Luckily, since
 we are the downconvert thread, we can immediately dequeue the lock and
 thus avoid waiting in this case.
 
 Signed-off-by: Jan Kara j...@suse.cz
 ---
   fs/ocfs2/dlmglue.c | 33 +++--
   fs/ocfs2/dlmglue.h |  3 ++-
   fs/ocfs2/inode.c   |  7 ---
   3 files changed, 37 insertions(+), 6 deletions(-)
 
 diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
 index 19986959d149..b7580157ef01 100644
 --- a/fs/ocfs2/dlmglue.c
 +++ b/fs/ocfs2/dlmglue.c
 @@ -3150,7 +3150,8 @@ out:
* it safe to drop.
*
* You can *not* attempt to call cluster_lock on this lockres anymore. */
 -void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
 +void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
 +struct ocfs2_lock_res *lockres)
   {
  int status;
  struct ocfs2_mask_waiter mw;
 @@ -3160,6 +3161,33 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res 
 *lockres)
  spin_lock_irqsave(lockres-l_lock, flags);
  lockres-l_flags |= OCFS2_LOCK_FREEING;
 +if (lockres-l_flags  OCFS2_LOCK_QUEUED  current == osb-dc_task) {
 +unsigned long flags;
 +
 +/*
 + * We know the downconvert is queued but not in progress
 + * because we are the downconvert thread and processing
 + * different lock. So we can just remove the lock from the
 + * queue. This is not only an optimization but also a way
 + * to avoid the following deadlock:
 + *   ocfs2_dentry_post_unlock()
 + * ocfs2_dentry_lock_put()
 + *   ocfs2_drop_dentry_lock()
 + * iput()
 + *   ocfs2_evict_inode()
 + * ocfs2_clear_inode()
 + *   ocfs2_mark_lockres_freeing()
 + * ... blocks waiting for OCFS2_LOCK_QUEUED
 + * since we are the downconvert thread which
 + * should clear the flag.
 + */
 +spin_lock_irqsave(osb-dc_task_lock, flags);
 +list_del_init(lockres-l_blocked_list);
 +osb-blocked_lock_count--;
 +spin_unlock_irqrestore(osb-dc_task_lock, flags);
 +lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
 +goto out_unlock;
 +}
  while (lockres-l_flags  OCFS2_LOCK_QUEUED) {
  lockres_add_mask_waiter(lockres, mw, OCFS2_LOCK_QUEUED, 0);
  spin_unlock_irqrestore(lockres-l_lock, flags);
 @@ -3172,6 +3200,7 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res 
 *lockres)
  spin_lock_irqsave(lockres-l_lock, flags);
  }
 +out_unlock:
  spin_unlock_irqrestore(lockres-l_lock, flags);
   }
 @@ -3180,7 +3209,7 @@ void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
   {
  int ret;
 -ocfs2_mark_lockres_freeing(lockres);
 +ocfs2_mark_lockres_freeing(osb, lockres);
  ret = ocfs2_drop_lock(osb, lockres);
  if (ret)
  mlog_errno(ret);
 diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
 index 1d596d8c4a4a..d293a22c32c5 100644
 --- a/fs/ocfs2/dlmglue.h
 +++ b/fs/ocfs2/dlmglue.h
 @@ -157,7 +157,8 @@ int ocfs2_refcount_lock(struct ocfs2_refcount_tree 
 *ref_tree, int ex);
   void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex);
 -void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
 +void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
 +struct ocfs2_lock_res *lockres);
   void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
 struct ocfs2_lock_res *lockres);
 diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
 index 3b0d722de35e..9661f8db21dc 100644
 --- a/fs/ocfs2/inode.c
 +++ b/fs/ocfs2/inode.c
 @@ -1053,6 +1053,7 @@ static void ocfs2_clear_inode(struct inode *inode)
   {
  int status;
  struct ocfs2_inode_info *oi = OCFS2_I(inode);
 +struct ocfs2_super *osb = OCFS2_SB(inode-i_sb);
  clear_inode(inode);
  trace_ocfs2_clear_inode((unsigned long long)oi-ip_blkno,
 @@ -1069,9 +1070,9 @@ static void ocfs2_clear_inode(struct inode *inode)
  /* Do these before all the other work so that we don't bounce
   * the downconvert thread while waiting to destroy the 

Re: [Ocfs2-devel] [PATCH 5/6] ocfs2: Avoid blocking in ocfs2_mark_lockres_freeing() in downconvert thread

2014-02-21 Thread Jan Kara
On Fri 21-02-14 10:14:15, Jan Kara wrote:
 On Thu 20-02-14 21:21:14, Srinivas Eeda wrote:
  I like the idea of dc_task handling queued basts in
  ocfs2_mark_lockres_freeing.
  
  I am wondering if we should call lockres-l_ops-post_unlock(osb,
  lockres) ? Would there be another node waiting for a bast response ?
   Ah, right. Somehow I missed that. I'll send fixed version in a moment.
  Actually it doesn't matter in practice because the locks for which we can
hit this path don't have -post_unlock() method. But still I agree we
should be consistent.

Honza
  
  On 02/20/2014 07:18 AM, Jan Kara wrote:
  If we are dropping last inode reference from downconvert thread, we will
  end up calling ocfs2_mark_lockres_freeing() which can block if the lock
  we are freeing is queued thus creating an A-A deadlock. Luckily, since
  we are the downconvert thread, we can immediately dequeue the lock and
  thus avoid waiting in this case.
  
  Signed-off-by: Jan Kara j...@suse.cz
  ---
fs/ocfs2/dlmglue.c | 33 +++--
fs/ocfs2/dlmglue.h |  3 ++-
fs/ocfs2/inode.c   |  7 ---
3 files changed, 37 insertions(+), 6 deletions(-)
  
  diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
  index 19986959d149..b7580157ef01 100644
  --- a/fs/ocfs2/dlmglue.c
  +++ b/fs/ocfs2/dlmglue.c
  @@ -3150,7 +3150,8 @@ out:
 * it safe to drop.
 *
 * You can *not* attempt to call cluster_lock on this lockres anymore. */
  -void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
  +void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
  +  struct ocfs2_lock_res *lockres)
{
 int status;
 struct ocfs2_mask_waiter mw;
  @@ -3160,6 +3161,33 @@ void ocfs2_mark_lockres_freeing(struct 
  ocfs2_lock_res *lockres)
 spin_lock_irqsave(lockres-l_lock, flags);
 lockres-l_flags |= OCFS2_LOCK_FREEING;
  +  if (lockres-l_flags  OCFS2_LOCK_QUEUED  current == osb-dc_task) {
  +  unsigned long flags;
  +
  +  /*
  +   * We know the downconvert is queued but not in progress
  +   * because we are the downconvert thread and processing
  +   * different lock. So we can just remove the lock from the
  +   * queue. This is not only an optimization but also a way
  +   * to avoid the following deadlock:
  +   *   ocfs2_dentry_post_unlock()
  +   * ocfs2_dentry_lock_put()
  +   *   ocfs2_drop_dentry_lock()
  +   * iput()
  +   *   ocfs2_evict_inode()
  +   * ocfs2_clear_inode()
  +   *   ocfs2_mark_lockres_freeing()
  +   * ... blocks waiting for OCFS2_LOCK_QUEUED
  +   * since we are the downconvert thread which
  +   * should clear the flag.
  +   */
  +  spin_lock_irqsave(osb-dc_task_lock, flags);
  +  list_del_init(lockres-l_blocked_list);
  +  osb-blocked_lock_count--;
  +  spin_unlock_irqrestore(osb-dc_task_lock, flags);
  +  lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
  +  goto out_unlock;
  +  }
 while (lockres-l_flags  OCFS2_LOCK_QUEUED) {
 lockres_add_mask_waiter(lockres, mw, OCFS2_LOCK_QUEUED, 0);
 spin_unlock_irqrestore(lockres-l_lock, flags);
  @@ -3172,6 +3200,7 @@ void ocfs2_mark_lockres_freeing(struct 
  ocfs2_lock_res *lockres)
 spin_lock_irqsave(lockres-l_lock, flags);
 }
  +out_unlock:
 spin_unlock_irqrestore(lockres-l_lock, flags);
}
  @@ -3180,7 +3209,7 @@ void ocfs2_simple_drop_lockres(struct ocfs2_super 
  *osb,
{
 int ret;
  -  ocfs2_mark_lockres_freeing(lockres);
  +  ocfs2_mark_lockres_freeing(osb, lockres);
 ret = ocfs2_drop_lock(osb, lockres);
 if (ret)
 mlog_errno(ret);
  diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
  index 1d596d8c4a4a..d293a22c32c5 100644
  --- a/fs/ocfs2/dlmglue.h
  +++ b/fs/ocfs2/dlmglue.h
  @@ -157,7 +157,8 @@ int ocfs2_refcount_lock(struct ocfs2_refcount_tree 
  *ref_tree, int ex);
void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex);
  -void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
  +void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
  +  struct ocfs2_lock_res *lockres);
void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres);
  diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
  index 3b0d722de35e..9661f8db21dc 100644
  --- a/fs/ocfs2/inode.c
  +++ b/fs/ocfs2/inode.c
  @@ -1053,6 +1053,7 @@ static void ocfs2_clear_inode(struct inode *inode)
{
 int status;
 struct ocfs2_inode_info *oi = OCFS2_I(inode);
  +  struct ocfs2_super *osb = OCFS2_SB(inode-i_sb);
 clear_inode(inode);
 trace_ocfs2_clear_inode((unsigned long 

[Ocfs2-devel] [PATCH 5/6] ocfs2: Avoid blocking in ocfs2_mark_lockres_freeing() in downconvert thread

2014-02-21 Thread Jan Kara
If we are dropping last inode reference from downconvert thread, we will
end up calling ocfs2_mark_lockres_freeing() which can block if the lock
we are freeing is queued thus creating an A-A deadlock. Luckily, since
we are the downconvert thread, we can immediately dequeue the lock and
thus avoid waiting in this case.

Signed-off-by: Jan Kara j...@suse.cz
---
 fs/ocfs2/dlmglue.c | 42 +++---
 fs/ocfs2/dlmglue.h |  3 ++-
 fs/ocfs2/inode.c   |  7 ---
 3 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 19986959d149..43d5e65b1e09 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3150,16 +3150,51 @@ out:
  * it safe to drop.
  *
  * You can *not* attempt to call cluster_lock on this lockres anymore. */
-void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
+void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
+   struct ocfs2_lock_res *lockres)
 {
int status;
struct ocfs2_mask_waiter mw;
-   unsigned long flags;
+   unsigned long flags, flags2;
 
ocfs2_init_mask_waiter(mw);
 
spin_lock_irqsave(lockres-l_lock, flags);
lockres-l_flags |= OCFS2_LOCK_FREEING;
+   if (lockres-l_flags  OCFS2_LOCK_QUEUED  current == osb-dc_task) {
+   /*
+* We know the downconvert is queued but not in progress
+* because we are the downconvert thread and processing
+* different lock. So we can just remove the lock from the
+* queue. This is not only an optimization but also a way
+* to avoid the following deadlock:
+*   ocfs2_dentry_post_unlock()
+* ocfs2_dentry_lock_put()
+*   ocfs2_drop_dentry_lock()
+* iput()
+*   ocfs2_evict_inode()
+* ocfs2_clear_inode()
+*   ocfs2_mark_lockres_freeing()
+* ... blocks waiting for OCFS2_LOCK_QUEUED
+* since we are the downconvert thread which
+* should clear the flag.
+*/
+   spin_unlock_irqrestore(lockres-l_lock, flags);
+   spin_lock_irqsave(osb-dc_task_lock, flags2);
+   list_del_init(lockres-l_blocked_list);
+   osb-blocked_lock_count--;
+   spin_unlock_irqrestore(osb-dc_task_lock, flags2);
+   /*
+* Warn if we recurse into another post_unlock call.  Strictly
+* speaking it isn't a problem but we need to be careful if
+* that happens (stack overflow, deadlocks, ...) so warn if
+* ocfs2 grows a path for which this can happen.
+*/
+   WARN_ON_ONCE(lockres-l_ops-post_unlock);
+   /* Since the lock is freeing we don't do much in the fn below */
+   ocfs2_process_blocked_lock(osb, lockres);
+   return;
+   }
while (lockres-l_flags  OCFS2_LOCK_QUEUED) {
lockres_add_mask_waiter(lockres, mw, OCFS2_LOCK_QUEUED, 0);
spin_unlock_irqrestore(lockres-l_lock, flags);
@@ -3172,6 +3207,7 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res 
*lockres)
 
spin_lock_irqsave(lockres-l_lock, flags);
}
+out_unlock:
spin_unlock_irqrestore(lockres-l_lock, flags);
 }
 
@@ -3180,7 +3216,7 @@ void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
 {
int ret;
 
-   ocfs2_mark_lockres_freeing(lockres);
+   ocfs2_mark_lockres_freeing(osb, lockres);
ret = ocfs2_drop_lock(osb, lockres);
if (ret)
mlog_errno(ret);
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 1d596d8c4a4a..d293a22c32c5 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -157,7 +157,8 @@ int ocfs2_refcount_lock(struct ocfs2_refcount_tree 
*ref_tree, int ex);
 void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex);
 
 
-void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
+void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
+   struct ocfs2_lock_res *lockres);
 void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
   struct ocfs2_lock_res *lockres);
 
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 3b0d722de35e..9661f8db21dc 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1053,6 +1053,7 @@ static void ocfs2_clear_inode(struct inode *inode)
 {
int status;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
+   struct ocfs2_super *osb = OCFS2_SB(inode-i_sb);
 
clear_inode(inode);
trace_ocfs2_clear_inode((unsigned long long)oi-ip_blkno,
@@ -1069,9 +1070,9 @@ static void ocfs2_clear_inode(struct inode *inode)
 
/* Do these 

[Ocfs2-devel] [PATCH 5/6] ocfs2: Avoid blocking in ocfs2_mark_lockres_freeing() in downconvert thread

2014-02-20 Thread Jan Kara
If we are dropping last inode reference from downconvert thread, we will
end up calling ocfs2_mark_lockres_freeing() which can block if the lock
we are freeing is queued thus creating an A-A deadlock. Luckily, since
we are the downconvert thread, we can immediately dequeue the lock and
thus avoid waiting in this case.

Signed-off-by: Jan Kara j...@suse.cz
---
 fs/ocfs2/dlmglue.c | 33 +++--
 fs/ocfs2/dlmglue.h |  3 ++-
 fs/ocfs2/inode.c   |  7 ---
 3 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 19986959d149..b7580157ef01 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3150,7 +3150,8 @@ out:
  * it safe to drop.
  *
  * You can *not* attempt to call cluster_lock on this lockres anymore. */
-void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
+void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
+   struct ocfs2_lock_res *lockres)
 {
int status;
struct ocfs2_mask_waiter mw;
@@ -3160,6 +3161,33 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res 
*lockres)
 
spin_lock_irqsave(lockres-l_lock, flags);
lockres-l_flags |= OCFS2_LOCK_FREEING;
+   if (lockres-l_flags  OCFS2_LOCK_QUEUED  current == osb-dc_task) {
+   unsigned long flags;
+
+   /*
+* We know the downconvert is queued but not in progress
+* because we are the downconvert thread and processing
+* different lock. So we can just remove the lock from the
+* queue. This is not only an optimization but also a way
+* to avoid the following deadlock:
+*   ocfs2_dentry_post_unlock()
+* ocfs2_dentry_lock_put()
+*   ocfs2_drop_dentry_lock()
+* iput()
+*   ocfs2_evict_inode()
+* ocfs2_clear_inode()
+*   ocfs2_mark_lockres_freeing()
+* ... blocks waiting for OCFS2_LOCK_QUEUED
+* since we are the downconvert thread which
+* should clear the flag.
+*/
+   spin_lock_irqsave(osb-dc_task_lock, flags);
+   list_del_init(lockres-l_blocked_list);
+   osb-blocked_lock_count--;
+   spin_unlock_irqrestore(osb-dc_task_lock, flags);
+   lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
+   goto out_unlock;
+   }
while (lockres-l_flags  OCFS2_LOCK_QUEUED) {
lockres_add_mask_waiter(lockres, mw, OCFS2_LOCK_QUEUED, 0);
spin_unlock_irqrestore(lockres-l_lock, flags);
@@ -3172,6 +3200,7 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res 
*lockres)
 
spin_lock_irqsave(lockres-l_lock, flags);
}
+out_unlock:
spin_unlock_irqrestore(lockres-l_lock, flags);
 }
 
@@ -3180,7 +3209,7 @@ void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
 {
int ret;
 
-   ocfs2_mark_lockres_freeing(lockres);
+   ocfs2_mark_lockres_freeing(osb, lockres);
ret = ocfs2_drop_lock(osb, lockres);
if (ret)
mlog_errno(ret);
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 1d596d8c4a4a..d293a22c32c5 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -157,7 +157,8 @@ int ocfs2_refcount_lock(struct ocfs2_refcount_tree 
*ref_tree, int ex);
 void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex);
 
 
-void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
+void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
+   struct ocfs2_lock_res *lockres);
 void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
   struct ocfs2_lock_res *lockres);
 
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 3b0d722de35e..9661f8db21dc 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1053,6 +1053,7 @@ static void ocfs2_clear_inode(struct inode *inode)
 {
int status;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
+   struct ocfs2_super *osb = OCFS2_SB(inode-i_sb);
 
clear_inode(inode);
trace_ocfs2_clear_inode((unsigned long long)oi-ip_blkno,
@@ -1069,9 +1070,9 @@ static void ocfs2_clear_inode(struct inode *inode)
 
/* Do these before all the other work so that we don't bounce
 * the downconvert thread while waiting to destroy the locks. */
-   ocfs2_mark_lockres_freeing(oi-ip_rw_lockres);
-   ocfs2_mark_lockres_freeing(oi-ip_inode_lockres);
-   ocfs2_mark_lockres_freeing(oi-ip_open_lockres);
+   ocfs2_mark_lockres_freeing(osb, oi-ip_rw_lockres);
+   ocfs2_mark_lockres_freeing(osb, oi-ip_inode_lockres);
+   ocfs2_mark_lockres_freeing(osb, oi-ip_open_lockres);
 

Re: [Ocfs2-devel] [PATCH 5/6] ocfs2: Avoid blocking in ocfs2_mark_lockres_freeing() in downconvert thread

2014-02-20 Thread Srinivas Eeda
I like the idea of dc_task handling queued basts in 
ocfs2_mark_lockres_freeing.

I am wondering if we should call lockres-l_ops-post_unlock(osb, 
lockres) ? Would there be another node waiting for a bast response ?

On 02/20/2014 07:18 AM, Jan Kara wrote:
 If we are dropping last inode reference from downconvert thread, we will
 end up calling ocfs2_mark_lockres_freeing() which can block if the lock
 we are freeing is queued thus creating an A-A deadlock. Luckily, since
 we are the downconvert thread, we can immediately dequeue the lock and
 thus avoid waiting in this case.

 Signed-off-by: Jan Kara j...@suse.cz
 ---
   fs/ocfs2/dlmglue.c | 33 +++--
   fs/ocfs2/dlmglue.h |  3 ++-
   fs/ocfs2/inode.c   |  7 ---
   3 files changed, 37 insertions(+), 6 deletions(-)

 diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
 index 19986959d149..b7580157ef01 100644
 --- a/fs/ocfs2/dlmglue.c
 +++ b/fs/ocfs2/dlmglue.c
 @@ -3150,7 +3150,8 @@ out:
* it safe to drop.
*
* You can *not* attempt to call cluster_lock on this lockres anymore. */
 -void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
 +void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
 + struct ocfs2_lock_res *lockres)
   {
   int status;
   struct ocfs2_mask_waiter mw;
 @@ -3160,6 +3161,33 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res 
 *lockres)
   
   spin_lock_irqsave(lockres-l_lock, flags);
   lockres-l_flags |= OCFS2_LOCK_FREEING;
 + if (lockres-l_flags  OCFS2_LOCK_QUEUED  current == osb-dc_task) {
 + unsigned long flags;
 +
 + /*
 +  * We know the downconvert is queued but not in progress
 +  * because we are the downconvert thread and processing
 +  * different lock. So we can just remove the lock from the
 +  * queue. This is not only an optimization but also a way
 +  * to avoid the following deadlock:
 +  *   ocfs2_dentry_post_unlock()
 +  * ocfs2_dentry_lock_put()
 +  *   ocfs2_drop_dentry_lock()
 +  * iput()
 +  *   ocfs2_evict_inode()
 +  * ocfs2_clear_inode()
 +  *   ocfs2_mark_lockres_freeing()
 +  * ... blocks waiting for OCFS2_LOCK_QUEUED
 +  * since we are the downconvert thread which
 +  * should clear the flag.
 +  */
 + spin_lock_irqsave(osb-dc_task_lock, flags);
 + list_del_init(lockres-l_blocked_list);
 + osb-blocked_lock_count--;
 + spin_unlock_irqrestore(osb-dc_task_lock, flags);
 + lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
 + goto out_unlock;
 + }
   while (lockres-l_flags  OCFS2_LOCK_QUEUED) {
   lockres_add_mask_waiter(lockres, mw, OCFS2_LOCK_QUEUED, 0);
   spin_unlock_irqrestore(lockres-l_lock, flags);
 @@ -3172,6 +3200,7 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res 
 *lockres)
   
   spin_lock_irqsave(lockres-l_lock, flags);
   }
 +out_unlock:
   spin_unlock_irqrestore(lockres-l_lock, flags);
   }
   
 @@ -3180,7 +3209,7 @@ void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
   {
   int ret;
   
 - ocfs2_mark_lockres_freeing(lockres);
 + ocfs2_mark_lockres_freeing(osb, lockres);
   ret = ocfs2_drop_lock(osb, lockres);
   if (ret)
   mlog_errno(ret);
 diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
 index 1d596d8c4a4a..d293a22c32c5 100644
 --- a/fs/ocfs2/dlmglue.h
 +++ b/fs/ocfs2/dlmglue.h
 @@ -157,7 +157,8 @@ int ocfs2_refcount_lock(struct ocfs2_refcount_tree 
 *ref_tree, int ex);
   void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex);
   
   
 -void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
 +void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb,
 + struct ocfs2_lock_res *lockres);
   void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
  struct ocfs2_lock_res *lockres);
   
 diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
 index 3b0d722de35e..9661f8db21dc 100644
 --- a/fs/ocfs2/inode.c
 +++ b/fs/ocfs2/inode.c
 @@ -1053,6 +1053,7 @@ static void ocfs2_clear_inode(struct inode *inode)
   {
   int status;
   struct ocfs2_inode_info *oi = OCFS2_I(inode);
 + struct ocfs2_super *osb = OCFS2_SB(inode-i_sb);
   
   clear_inode(inode);
   trace_ocfs2_clear_inode((unsigned long long)oi-ip_blkno,
 @@ -1069,9 +1070,9 @@ static void ocfs2_clear_inode(struct inode *inode)
   
   /* Do these before all the other work so that we don't bounce
* the downconvert thread while waiting to destroy the locks. */
 - ocfs2_mark_lockres_freeing(oi-ip_rw_lockres);
 -