Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-15 Thread Eric Ren

On 01/16/2017 11:13 AM, Junxiao Bi wrote:

On 01/16/2017 11:06 AM, Eric Ren wrote:

Hi Junxiao,

On 01/16/2017 10:46 AM, Junxiao Bi wrote:

If had_lock==true, it is a bug? I think we should BUG_ON for it, that
can help us catch bug at the first time.

Good idea! But I'm not sure if "ocfs2_setattr" is always the first one
who takes the cluster lock.
It's harder for me to name all the possible paths;-/

The BUG_ON() can help catch the path where ocfs2_setattr is not the
first one.

Yes, I understand. But, the problem is that the vfs entries calling
order is out of our control.
I don't want to place an assertion where I'm not 100% sure it's
absolutely right;-)

If it is not the first one, is it another recursive locking bug? In this
case, if you don't like BUG_ON(), you can dump the call trace and print
some warning message.


Yes! I like this idea, will add it in next version, thanks!

Eric



Thanks,
Junxiao.

Thanks,
Eric


Thanks,
Junxiao.


+if (had_lock)
+arg_flags = OCFS2_META_LOCK_GETBH;
+status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
goto bail_unlock_rw;
}
-inode_locked = 1;
+if (!had_lock) {
+ocfs2_add_holder(lockres, &oh);
+inode_locked = 1;
+}
  if (size_change) {
status = inode_newsize_ok(inode, attr->ia_size);
@@ -1260,7 +1270,8 @@ int ocfs2_setattr(struct dentry *dentry, struct
iattr *attr)
bail_commit:
ocfs2_commit_trans(osb, handle);
bail_unlock:
-if (status) {
+if (status && inode_locked) {
+ocfs2_remove_holder(lockres, &oh);
ocfs2_inode_unlock(inode, 1);
inode_locked = 0;
}
@@ -1278,8 +1289,10 @@ int ocfs2_setattr(struct dentry *dentry,
struct iattr *attr)
if (status < 0)
mlog_errno(status);
}
-if (inode_locked)
+if (inode_locked) {
+ocfs2_remove_holder(lockres, &oh);
ocfs2_inode_unlock(inode, 1);
+}
  brelse(bh);
return status;
@@ -1321,20 +1334,31 @@ int ocfs2_getattr(struct vfsmount *mnt,
int ocfs2_permission(struct inode *inode, int mask)
{
int ret;
+int has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
  if (mask & MAY_NOT_BLOCK)
return -ECHILD;
-ret = ocfs2_inode_lock(inode, NULL, 0);
-if (ret) {
-if (ret != -ENOENT)
-mlog_errno(ret);
-goto out;
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);

The same thing as ocfs2_setattr.

OK. I will think over your suggestions!

Thanks,
Eric


Thanks,
Junxiao.

+if (!has_locked) {
+ret = ocfs2_inode_lock(inode, NULL, 0);
+if (ret) {
+if (ret != -ENOENT)
+mlog_errno(ret);
+goto out;
+}
+ocfs2_add_holder(lockres, &oh);
}
  ret = generic_permission(inode, mask);
-ocfs2_inode_unlock(inode, 0);
+if (!has_locked) {
+ocfs2_remove_holder(lockres, &oh);
+ocfs2_inode_unlock(inode, 0);
+}
out:
return ret;
}







Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-15 Thread Junxiao Bi
On 01/16/2017 11:06 AM, Eric Ren wrote:
> Hi Junxiao,
> 
> On 01/16/2017 10:46 AM, Junxiao Bi wrote:
 If had_lock==true, it is a bug? I think we should BUG_ON for it, that
 can help us catch bug at the first time.
>>> Good idea! But I'm not sure if "ocfs2_setattr" is always the first one
>>> who takes the cluster lock.
>>> It's harder for me to name all the possible paths;-/
>> The BUG_ON() can help catch the path where ocfs2_setattr is not the
>> first one.
> Yes, I understand. But, the problem is that the vfs entries calling
> order is out of our control.
> I don't want to place an assertion where I'm not 100% sure it's
> absolutely right;-)
If it is not the first one, is it another recursive locking bug? In this
case, if you don't like BUG_ON(), you can dump the call trace and print
some warning message.

Thanks,
Junxiao.
> 
> Thanks,
> Eric
> 
>>
>> Thanks,
>> Junxiao.
>>

> +if (had_lock)
> +arg_flags = OCFS2_META_LOCK_GETBH;
> +status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
>if (status < 0) {
>if (status != -ENOENT)
>mlog_errno(status);
>goto bail_unlock_rw;
>}
> -inode_locked = 1;
> +if (!had_lock) {
> +ocfs2_add_holder(lockres, &oh);
> +inode_locked = 1;
> +}
>  if (size_change) {
>status = inode_newsize_ok(inode, attr->ia_size);
> @@ -1260,7 +1270,8 @@ int ocfs2_setattr(struct dentry *dentry, struct
> iattr *attr)
>bail_commit:
>ocfs2_commit_trans(osb, handle);
>bail_unlock:
> -if (status) {
> +if (status && inode_locked) {
> +ocfs2_remove_holder(lockres, &oh);
>ocfs2_inode_unlock(inode, 1);
>inode_locked = 0;
>}
> @@ -1278,8 +1289,10 @@ int ocfs2_setattr(struct dentry *dentry,
> struct iattr *attr)
>if (status < 0)
>mlog_errno(status);
>}
> -if (inode_locked)
> +if (inode_locked) {
> +ocfs2_remove_holder(lockres, &oh);
>ocfs2_inode_unlock(inode, 1);
> +}
>  brelse(bh);
>return status;
> @@ -1321,20 +1334,31 @@ int ocfs2_getattr(struct vfsmount *mnt,
>int ocfs2_permission(struct inode *inode, int mask)
>{
>int ret;
> +int has_locked;
> +struct ocfs2_holder oh;
> +struct ocfs2_lock_res *lockres;
>  if (mask & MAY_NOT_BLOCK)
>return -ECHILD;
>-ret = ocfs2_inode_lock(inode, NULL, 0);
> -if (ret) {
> -if (ret != -ENOENT)
> -mlog_errno(ret);
> -goto out;
> +lockres = &OCFS2_I(inode)->ip_inode_lockres;
> +has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
 The same thing as ocfs2_setattr.
>>> OK. I will think over your suggestions!
>>>
>>> Thanks,
>>> Eric
>>>
 Thanks,
 Junxiao.
> +if (!has_locked) {
> +ret = ocfs2_inode_lock(inode, NULL, 0);
> +if (ret) {
> +if (ret != -ENOENT)
> +mlog_errno(ret);
> +goto out;
> +}
> +ocfs2_add_holder(lockres, &oh);
>}
>  ret = generic_permission(inode, mask);
>-ocfs2_inode_unlock(inode, 0);
> +if (!has_locked) {
> +ocfs2_remove_holder(lockres, &oh);
> +ocfs2_inode_unlock(inode, 0);
> +}
>out:
>return ret;
>}
>
>>
> 



Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-15 Thread Eric Ren

Hi Junxiao,

On 01/16/2017 10:46 AM, Junxiao Bi wrote:

If had_lock==true, it is a bug? I think we should BUG_ON for it, that
can help us catch bug at the first time.

Good idea! But I'm not sure if "ocfs2_setattr" is always the first one
who takes the cluster lock.
It's harder for me to name all the possible paths;-/

The BUG_ON() can help catch the path where ocfs2_setattr is not the
first one.

Yes, I understand. But, the problem is that the vfs entries calling order is 
out of our control.
I don't want to place an assertion where I'm not 100% sure it's absolutely 
right;-)

Thanks,
Eric



Thanks,
Junxiao.




+if (had_lock)
+arg_flags = OCFS2_META_LOCK_GETBH;
+status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
   if (status < 0) {
   if (status != -ENOENT)
   mlog_errno(status);
   goto bail_unlock_rw;
   }
-inode_locked = 1;
+if (!had_lock) {
+ocfs2_add_holder(lockres, &oh);
+inode_locked = 1;
+}
 if (size_change) {
   status = inode_newsize_ok(inode, attr->ia_size);
@@ -1260,7 +1270,8 @@ int ocfs2_setattr(struct dentry *dentry, struct
iattr *attr)
   bail_commit:
   ocfs2_commit_trans(osb, handle);
   bail_unlock:
-if (status) {
+if (status && inode_locked) {
+ocfs2_remove_holder(lockres, &oh);
   ocfs2_inode_unlock(inode, 1);
   inode_locked = 0;
   }
@@ -1278,8 +1289,10 @@ int ocfs2_setattr(struct dentry *dentry,
struct iattr *attr)
   if (status < 0)
   mlog_errno(status);
   }
-if (inode_locked)
+if (inode_locked) {
+ocfs2_remove_holder(lockres, &oh);
   ocfs2_inode_unlock(inode, 1);
+}
 brelse(bh);
   return status;
@@ -1321,20 +1334,31 @@ int ocfs2_getattr(struct vfsmount *mnt,
   int ocfs2_permission(struct inode *inode, int mask)
   {
   int ret;
+int has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
 if (mask & MAY_NOT_BLOCK)
   return -ECHILD;
   -ret = ocfs2_inode_lock(inode, NULL, 0);
-if (ret) {
-if (ret != -ENOENT)
-mlog_errno(ret);
-goto out;
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);

The same thing as ocfs2_setattr.

OK. I will think over your suggestions!

Thanks,
Eric


Thanks,
Junxiao.

+if (!has_locked) {
+ret = ocfs2_inode_lock(inode, NULL, 0);
+if (ret) {
+if (ret != -ENOENT)
+mlog_errno(ret);
+goto out;
+}
+ocfs2_add_holder(lockres, &oh);
   }
 ret = generic_permission(inode, mask);
   -ocfs2_inode_unlock(inode, 0);
+if (!has_locked) {
+ocfs2_remove_holder(lockres, &oh);
+ocfs2_inode_unlock(inode, 0);
+}
   out:
   return ret;
   }







Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-15 Thread Junxiao Bi
On 01/13/2017 02:19 PM, Eric Ren wrote:
> Hi!
> 
> On 01/13/2017 12:22 PM, Junxiao Bi wrote:
>> On 01/05/2017 11:31 PM, Eric Ren wrote:
>>> Commit 743b5f1434f5 ("ocfs2: take inode lock in
>>> ocfs2_iop_set/get_acl()")
>>> results in a deadlock, as the author "Tariq Saeed" realized shortly
>>> after the patch was merged. The discussion happened here
>>> (https://oss.oracle.com/pipermail/ocfs2-devel/2015-September/011085.html).
>>>
>>>
>>> The reason why taking cluster inode lock at vfs entry points opens up
>>> a self deadlock window, is explained in the previous patch of this
>>> series.
>>>
>>> So far, we have seen two different code paths that have this issue.
>>> 1. do_sys_open
>>>   may_open
>>>inode_permission
>>> ocfs2_permission
>>>  ocfs2_inode_lock() <=== take PR
>>>   generic_permission
>>>get_acl
>>> ocfs2_iop_get_acl
>>>  ocfs2_inode_lock() <=== take PR
>>> 2. fchmod|fchmodat
>>>  chmod_common
>>>   notify_change
>>>ocfs2_setattr <=== take EX
>>> posix_acl_chmod
>>>  get_acl
>>>   ocfs2_iop_get_acl <=== take PR
>>>  ocfs2_iop_set_acl <=== take EX
>>>
>>> Fixes them by adding the tracking logic (in the previous patch) for
>>> these funcs above, ocfs2_permission(), ocfs2_iop_[set|get]_acl(),
>>> ocfs2_setattr().
>>>
>>> Signed-off-by: Eric Ren 
>>> ---
>>>   fs/ocfs2/acl.c  | 39 ++-
>>>   fs/ocfs2/file.c | 44 ++--
>>>   2 files changed, 68 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
>>> index bed1fcb..c539890 100644
>>> --- a/fs/ocfs2/acl.c
>>> +++ b/fs/ocfs2/acl.c
>>> @@ -284,16 +284,31 @@ int ocfs2_iop_set_acl(struct inode *inode,
>>> struct posix_acl *acl, int type)
>>>   {
>>>   struct buffer_head *bh = NULL;
>>>   int status = 0;
>>> -
>>> -status = ocfs2_inode_lock(inode, &bh, 1);
>>> +int arg_flags = 0, has_locked;
>>> +struct ocfs2_holder oh;
>>> +struct ocfs2_lock_res *lockres;
>>> +
>>> +lockres = &OCFS2_I(inode)->ip_inode_lockres;
>>> +has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
>>> +if (has_locked)
>>> +arg_flags = OCFS2_META_LOCK_GETBH;
>>> +status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
>>>   if (status < 0) {
>>>   if (status != -ENOENT)
>>>   mlog_errno(status);
>>>   return status;
>>>   }
>>> +if (!has_locked)
>>> +ocfs2_add_holder(lockres, &oh);
>>> +
>>>   status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL);
>>> -ocfs2_inode_unlock(inode, 1);
>>> +
>>> +if (!has_locked) {
>>> +ocfs2_remove_holder(lockres, &oh);
>>> +ocfs2_inode_unlock(inode, 1);
>>> +}
>>>   brelse(bh);
>>> +
>>>   return status;
>>>   }
>>>   @@ -303,21 +318,35 @@ struct posix_acl *ocfs2_iop_get_acl(struct
>>> inode *inode, int type)
>>>   struct buffer_head *di_bh = NULL;
>>>   struct posix_acl *acl;
>>>   int ret;
>>> +int arg_flags = 0, has_locked;
>>> +struct ocfs2_holder oh;
>>> +struct ocfs2_lock_res *lockres;
>>> osb = OCFS2_SB(inode->i_sb);
>>>   if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
>>>   return NULL;
>>> -ret = ocfs2_inode_lock(inode, &di_bh, 0);
>>> +
>>> +lockres = &OCFS2_I(inode)->ip_inode_lockres;
>>> +has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
>>> +if (has_locked)
>>> +arg_flags = OCFS2_META_LOCK_GETBH;
>>> +ret = ocfs2_inode_lock_full(inode, &di_bh, 0, arg_flags);
>>>   if (ret < 0) {
>>>   if (ret != -ENOENT)
>>>   mlog_errno(ret);
>>>   return ERR_PTR(ret);
>>>   }
>>> +if (!has_locked)
>>> +ocfs2_add_holder(lockres, &oh);
>>> acl = ocfs2_get_acl_nolock(inode, type, di_bh);
>>>   -ocfs2_inode_unlock(inode, 0);
>>> +if (!has_locked) {
>>> +ocfs2_remove_holder(lockres, &oh);
>>> +ocfs2_inode_unlock(inode, 0);
>>> +}
>>>   brelse(di_bh);
>>> +
>>>   return acl;
>>>   }
>>>   diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
>>> index c488965..62be75d 100644
>>> --- a/fs/ocfs2/file.c
>>> +++ b/fs/ocfs2/file.c
>>> @@ -1138,6 +1138,9 @@ int ocfs2_setattr(struct dentry *dentry, struct
>>> iattr *attr)
>>>   handle_t *handle = NULL;
>>>   struct dquot *transfer_to[MAXQUOTAS] = { };
>>>   int qtype;
>>> +int arg_flags = 0, had_lock;
>>> +struct ocfs2_holder oh;
>>> +struct ocfs2_lock_res *lockres;
>>> trace_ocfs2_setattr(inode, dentry,
>>>   (unsigned long long)OCFS2_I(inode)->ip_blkno,
>>> @@ -1173,13 +1176,20 @@ int ocfs2_setattr(struct dentry *dentry,
>>> struct iattr *attr)
>>>   }
>>>   }
>>>   -status = ocfs2_inode_lock(inode, &bh, 1);
>>> +lockres = &OCFS2_I(inode)->ip_inode_lockres;
>>> +had_lock = (ocfs2_is_locked_by_me(lockres) != NULL);
>> I

Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-12 Thread Eric Ren

Hi!

On 01/13/2017 12:22 PM, Junxiao Bi wrote:

On 01/05/2017 11:31 PM, Eric Ren wrote:

Commit 743b5f1434f5 ("ocfs2: take inode lock in ocfs2_iop_set/get_acl()")
results in a deadlock, as the author "Tariq Saeed" realized shortly
after the patch was merged. The discussion happened here
(https://oss.oracle.com/pipermail/ocfs2-devel/2015-September/011085.html).

The reason why taking cluster inode lock at vfs entry points opens up
a self deadlock window, is explained in the previous patch of this
series.

So far, we have seen two different code paths that have this issue.
1. do_sys_open
  may_open
   inode_permission
ocfs2_permission
 ocfs2_inode_lock() <=== take PR
  generic_permission
   get_acl
ocfs2_iop_get_acl
 ocfs2_inode_lock() <=== take PR
2. fchmod|fchmodat
 chmod_common
  notify_change
   ocfs2_setattr <=== take EX
posix_acl_chmod
 get_acl
  ocfs2_iop_get_acl <=== take PR
 ocfs2_iop_set_acl <=== take EX

Fixes them by adding the tracking logic (in the previous patch) for
these funcs above, ocfs2_permission(), ocfs2_iop_[set|get]_acl(),
ocfs2_setattr().

Signed-off-by: Eric Ren 
---
  fs/ocfs2/acl.c  | 39 ++-
  fs/ocfs2/file.c | 44 ++--
  2 files changed, 68 insertions(+), 15 deletions(-)

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index bed1fcb..c539890 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -284,16 +284,31 @@ int ocfs2_iop_set_acl(struct inode *inode, struct 
posix_acl *acl, int type)
  {
struct buffer_head *bh = NULL;
int status = 0;
-
-   status = ocfs2_inode_lock(inode, &bh, 1);
+   int arg_flags = 0, has_locked;
+   struct ocfs2_holder oh;
+   struct ocfs2_lock_res *lockres;
+
+   lockres = &OCFS2_I(inode)->ip_inode_lockres;
+   has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+   if (has_locked)
+   arg_flags = OCFS2_META_LOCK_GETBH;
+   status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
return status;
}
+   if (!has_locked)
+   ocfs2_add_holder(lockres, &oh);
+
status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL);
-   ocfs2_inode_unlock(inode, 1);
+
+   if (!has_locked) {
+   ocfs2_remove_holder(lockres, &oh);
+   ocfs2_inode_unlock(inode, 1);
+   }
brelse(bh);
+
return status;
  }
  
@@ -303,21 +318,35 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type)

struct buffer_head *di_bh = NULL;
struct posix_acl *acl;
int ret;
+   int arg_flags = 0, has_locked;
+   struct ocfs2_holder oh;
+   struct ocfs2_lock_res *lockres;
  
  	osb = OCFS2_SB(inode->i_sb);

if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return NULL;
-   ret = ocfs2_inode_lock(inode, &di_bh, 0);
+
+   lockres = &OCFS2_I(inode)->ip_inode_lockres;
+   has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+   if (has_locked)
+   arg_flags = OCFS2_META_LOCK_GETBH;
+   ret = ocfs2_inode_lock_full(inode, &di_bh, 0, arg_flags);
if (ret < 0) {
if (ret != -ENOENT)
mlog_errno(ret);
return ERR_PTR(ret);
}
+   if (!has_locked)
+   ocfs2_add_holder(lockres, &oh);
  
  	acl = ocfs2_get_acl_nolock(inode, type, di_bh);
  
-	ocfs2_inode_unlock(inode, 0);

+   if (!has_locked) {
+   ocfs2_remove_holder(lockres, &oh);
+   ocfs2_inode_unlock(inode, 0);
+   }
brelse(di_bh);
+
return acl;
  }
  
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c

index c488965..62be75d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1138,6 +1138,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr 
*attr)
handle_t *handle = NULL;
struct dquot *transfer_to[MAXQUOTAS] = { };
int qtype;
+   int arg_flags = 0, had_lock;
+   struct ocfs2_holder oh;
+   struct ocfs2_lock_res *lockres;
  
  	trace_ocfs2_setattr(inode, dentry,

(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -1173,13 +1176,20 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr 
*attr)
}
}
  
-	status = ocfs2_inode_lock(inode, &bh, 1);

+   lockres = &OCFS2_I(inode)->ip_inode_lockres;
+   had_lock = (ocfs2_is_locked_by_me(lockres) != NULL);

If had_lock==true, it is a bug? I think we should BUG_ON for it, that
can help us catch bug at the first time.


Good idea! But I'm not sure if "ocfs2_setattr" is always the first one who takes the cluster 
lock.

It's harder for me to name all the possible paths;-/





+   if (had_lock)
+   arg_flags = OCFS2_META_LOCK_G

Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-12 Thread Junxiao Bi
On 01/05/2017 11:31 PM, Eric Ren wrote:
> Commit 743b5f1434f5 ("ocfs2: take inode lock in ocfs2_iop_set/get_acl()")
> results in a deadlock, as the author "Tariq Saeed" realized shortly
> after the patch was merged. The discussion happened here
> (https://oss.oracle.com/pipermail/ocfs2-devel/2015-September/011085.html).
> 
> The reason why taking cluster inode lock at vfs entry points opens up
> a self deadlock window, is explained in the previous patch of this
> series.
> 
> So far, we have seen two different code paths that have this issue.
> 1. do_sys_open
>  may_open
>   inode_permission
>ocfs2_permission
> ocfs2_inode_lock() <=== take PR
>  generic_permission
>   get_acl
>ocfs2_iop_get_acl
> ocfs2_inode_lock() <=== take PR
> 2. fchmod|fchmodat
> chmod_common
>  notify_change
>   ocfs2_setattr <=== take EX
>posix_acl_chmod
> get_acl
>  ocfs2_iop_get_acl <=== take PR
> ocfs2_iop_set_acl <=== take EX
> 
> Fixes them by adding the tracking logic (in the previous patch) for
> these funcs above, ocfs2_permission(), ocfs2_iop_[set|get]_acl(),
> ocfs2_setattr().
> 
> Signed-off-by: Eric Ren 
> ---
>  fs/ocfs2/acl.c  | 39 ++-
>  fs/ocfs2/file.c | 44 ++--
>  2 files changed, 68 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
> index bed1fcb..c539890 100644
> --- a/fs/ocfs2/acl.c
> +++ b/fs/ocfs2/acl.c
> @@ -284,16 +284,31 @@ int ocfs2_iop_set_acl(struct inode *inode, struct 
> posix_acl *acl, int type)
>  {
>   struct buffer_head *bh = NULL;
>   int status = 0;
> -
> - status = ocfs2_inode_lock(inode, &bh, 1);
> + int arg_flags = 0, has_locked;
> + struct ocfs2_holder oh;
> + struct ocfs2_lock_res *lockres;
> +
> + lockres = &OCFS2_I(inode)->ip_inode_lockres;
> + has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
> + if (has_locked)
> + arg_flags = OCFS2_META_LOCK_GETBH;
> + status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
>   if (status < 0) {
>   if (status != -ENOENT)
>   mlog_errno(status);
>   return status;
>   }
> + if (!has_locked)
> + ocfs2_add_holder(lockres, &oh);
> +
>   status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL);
> - ocfs2_inode_unlock(inode, 1);
> +
> + if (!has_locked) {
> + ocfs2_remove_holder(lockres, &oh);
> + ocfs2_inode_unlock(inode, 1);
> + }
>   brelse(bh);
> +
>   return status;
>  }
>  
> @@ -303,21 +318,35 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode 
> *inode, int type)
>   struct buffer_head *di_bh = NULL;
>   struct posix_acl *acl;
>   int ret;
> + int arg_flags = 0, has_locked;
> + struct ocfs2_holder oh;
> + struct ocfs2_lock_res *lockres;
>  
>   osb = OCFS2_SB(inode->i_sb);
>   if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
>   return NULL;
> - ret = ocfs2_inode_lock(inode, &di_bh, 0);
> +
> + lockres = &OCFS2_I(inode)->ip_inode_lockres;
> + has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
> + if (has_locked)
> + arg_flags = OCFS2_META_LOCK_GETBH;
> + ret = ocfs2_inode_lock_full(inode, &di_bh, 0, arg_flags);
>   if (ret < 0) {
>   if (ret != -ENOENT)
>   mlog_errno(ret);
>   return ERR_PTR(ret);
>   }
> + if (!has_locked)
> + ocfs2_add_holder(lockres, &oh);
>  
>   acl = ocfs2_get_acl_nolock(inode, type, di_bh);
>  
> - ocfs2_inode_unlock(inode, 0);
> + if (!has_locked) {
> + ocfs2_remove_holder(lockres, &oh);
> + ocfs2_inode_unlock(inode, 0);
> + }
>   brelse(di_bh);
> +
>   return acl;
>  }
>  
> diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
> index c488965..62be75d 100644
> --- a/fs/ocfs2/file.c
> +++ b/fs/ocfs2/file.c
> @@ -1138,6 +1138,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr 
> *attr)
>   handle_t *handle = NULL;
>   struct dquot *transfer_to[MAXQUOTAS] = { };
>   int qtype;
> + int arg_flags = 0, had_lock;
> + struct ocfs2_holder oh;
> + struct ocfs2_lock_res *lockres;
>  
>   trace_ocfs2_setattr(inode, dentry,
>   (unsigned long long)OCFS2_I(inode)->ip_blkno,
> @@ -1173,13 +1176,20 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr 
> *attr)
>   }
>   }
>  
> - status = ocfs2_inode_lock(inode, &bh, 1);
> + lockres = &OCFS2_I(inode)->ip_inode_lockres;
> + had_lock = (ocfs2_is_locked_by_me(lockres) != NULL);

If had_lock==true, it is a bug? I think we should BUG_ON for it, that
can help us catch bug at the first time.


> + if (had_lock)
> + arg_flags = OCFS2_META_LOCK_GETBH;
> + status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
>

Re: [Ocfs2-devel] [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-12 Thread Joseph Qi


On 17/1/12 19:24, Eric Ren wrote:

Hi Joseph,

On 01/09/2017 10:13 AM, Eric Ren wrote:
So you are trying to fix it by making phase3 finish without really 
doing
Phase3 can go ahead because this node is already under protection 
of cluster lock.

You said it was blocked...

Oh, sorry, I meant phase3 can go ahead if this patch set is applied;-)

"Another hand, the recursive cluster lock (the second one) will be 
blocked in

__ocfs2_cluster_lock() because of OCFS2_LOCK_BLOCKED."

__ocfs2_cluster_lock, then Process B can continue either.
Let us bear in mind that phase1 and phase3 are in the same context 
and
executed in order. That's why I think there is no need to check if 
locked

by myself in phase1.
Sorry, I still cannot see it. Without keeping track of the first 
cluster lock, how can we

know if
we are under a context that has already been in the protecting of 
cluster lock? How can we

handle
the recursive locking (the second cluster lock) if we don't have this 
information?

If phase1 finds it is already locked by myself, that means the holder
is left by last operation without dec holder. That's why I think 
it is a bug

instead of a recursive lock case.
I think I got your point here. Do you mean that we should just add 
the lock holder at the

first locking position
without checking before that? Unfortunately, it's tricky here to know 
exactly which ocfs2

routine will be the first vfs
entry point, such as ocfs2_get_acl() which can be both the first vfs 
entry point and the

second vfs entry point after
ocfs2_permission(), right?

It will be a coding bug if the problem you concern about happens. I 
think we don't need to

worry about this much because
the code logic here is quite simple;-)

Ping...

Did I clear your doubts by the last email? I really want to get your 
point, if not.


If there's any problem, I will fix them in the next version;-)
Yes, but I still worry about the code bug case will be hidden behind 
recursive lock...

Anyway, It depends on others...

Thanks,
Joseph


Thanks,
Eric



Thanks for your patience!
Eric


D






Re: [Ocfs2-devel] [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-12 Thread Eric Ren

Hi Joseph,

On 01/09/2017 10:13 AM, Eric Ren wrote:

So you are trying to fix it by making phase3 finish without really doing

Phase3 can go ahead because this node is already under protection of cluster 
lock.

You said it was blocked...

Oh, sorry, I meant phase3 can go ahead if this patch set is applied;-)


"Another hand, the recursive cluster lock (the second one) will be blocked in
__ocfs2_cluster_lock() because of OCFS2_LOCK_BLOCKED."

__ocfs2_cluster_lock, then Process B can continue either.
Let us bear in mind that phase1 and phase3 are in the same context and
executed in order. That's why I think there is no need to check if locked
by myself in phase1.

Sorry, I still cannot see it. Without keeping track of the first cluster lock, 
how can we
know if
we are under a context that has already been in the protecting of cluster lock? 
How can we
handle
the recursive locking (the second cluster lock) if we don't have this 
information?

If phase1 finds it is already locked by myself, that means the holder
is left by last operation without dec holder. That's why I think it is a bug
instead of a recursive lock case.

I think I got your point here. Do you mean that we should just add the lock 
holder at the
first locking position
without checking before that? Unfortunately, it's tricky here to know exactly 
which ocfs2
routine will be the first vfs
entry point, such as ocfs2_get_acl() which can be both the first vfs entry 
point and the
second vfs entry point after
ocfs2_permission(), right?

It will be a coding bug if the problem you concern about happens. I think we 
don't need to
worry about this much because
the code logic here is quite simple;-)

Ping...

Did I clear your doubts by the last email? I really want to get your point, if 
not.

If there's any problem, I will fix them in the next version;-)

Thanks,
Eric



Thanks for your patience!
Eric


D




Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-08 Thread Eric Ren

Hi Fengguang,

On 01/06/2017 10:52 PM, kbuild test robot wrote:

Hi Eric,

[auto build test ERROR on linus/master]
[also build test ERROR on v4.10-rc2 next-20170106]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Eric-Ren/fix-deadlock-caused-by-recursive-cluster-locking/20170106-200837
config: ia64-allyesconfig (attached as .config)
compiler: ia64-linux-gcc (GCC) 6.2.0
reproduce:
 wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
 chmod +x ~/bin/make.cross
 # save the attached .config to linux build tree
 make.cross ARCH=ia64


I failed to reproduce this issue locally by following the above instructions, after rebasing 
my patch set onto the lastest

mainline(Linux 4.10-rc3), only seeing this compiler error message:
"
test:/mnt/build/linux # make.cross ARCH=ia64
make CROSS_COMPILE=/opt/gcc-4.9.0-nolibc/ia64-linux/bin/ia64-linux- --jobs=4 
ARCH=ia64
...
  CALLscripts/checksyscalls.sh
:1184:2: warning: #warning syscall perf_event_open not implemented 
[-Wcpp]
:1238:2: warning: #warning syscall seccomp not implemented [-Wcpp]
:1316:2: warning: #warning syscall pkey_mprotect not implemented [-Wcpp]
:1319:2: warning: #warning syscall pkey_alloc not implemented [-Wcpp]
:1322:2: warning: #warning syscall pkey_free not implemented [-Wcpp]
...
 AS  arch/ia64/kernel/gate.o
arch/ia64/kernel/entry.S: Assembler messages:
arch/ia64/kernel/entry.S:622: Error: Operand 2 of `adds' should be a 14-bit 
integer (-8192-8191)
arch/ia64/kernel/entry.S:729: Error: Operand 2 of `adds' should be a 14-bit 
integer (-8192-8191)
arch/ia64/kernel/entry.S:860: Error: Operand 2 of `adds' should be a 14-bit 
integer (-8192-8191)
make[1]: *** [scripts/Makefile.build:393: arch/ia64/kernel/entry.o] Error 1
make[1]: *** Waiting for unfinished jobs
make: *** [Makefile:988: arch/ia64/kernel] Error 2
make: *** Waiting for unfinished jobs
"

The obvious difference I noticed is my gcc version is little newer than kbuild, not sure if 
it's related:

"
test:/mnt/build/linux # gcc -v
gcc version 6.2.1 20160830 [gcc-6-branch revision 239856] (SUSE Linux)
"



All errors (new ones prefixed by >>):

In file included from fs/ocfs2/acl.c:31:0:
fs/ocfs2/acl.c: In function 'ocfs2_iop_set_acl':

fs/ocfs2/dlmglue.h:189:29: error: inlining failed in call to always_inline 
'ocfs2_is_locked_by_me': function body not available

 inline struct ocfs2_holder *ocfs2_is_locked_by_me(struct ocfs2_lock_res 
*lockres);


This error is probably because I should not add "inline" at the declaration while putting 
the function body into source file.

But, no error or warning occurred when I built and tested locally this way:
"
test:/mnt/build/linux/fs/ocfs2 # make -C /lib/modules/4.9.0-2-vanilla/build 
M=`pwd` modules
"

Anyway, I wanna make kbuild silent before resending again;-) Please correct me if I'm 
missing something?


Thanks!
Eric

 ^
fs/ocfs2/acl.c:292:16: note: called from here
  has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
^~
In file included from fs/ocfs2/acl.c:31:0:

fs/ocfs2/dlmglue.h:189:29: error: inlining failed in call to always_inline 
'ocfs2_is_locked_by_me': function body not available

 inline struct ocfs2_holder *ocfs2_is_locked_by_me(struct ocfs2_lock_res 
*lockres);
 ^
fs/ocfs2/acl.c:292:16: note: called from here
  has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
^~
In file included from fs/ocfs2/acl.c:31:0:

fs/ocfs2/dlmglue.h:185:13: error: inlining failed in call to always_inline 
'ocfs2_add_holder': function body not available

 inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres,
 ^~~~
fs/ocfs2/acl.c:302:3: note: called from here
   ocfs2_add_holder(lockres, &oh);
   ^~
In file included from fs/ocfs2/acl.c:31:0:

fs/ocfs2/dlmglue.h:187:13: error: inlining failed in call to always_inline 
'ocfs2_remove_holder': function body not available

 inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres,
 ^~~
fs/ocfs2/acl.c:307:3: note: called from here
   ocfs2_remove_holder(lockres, &oh);
   ^
--
In file included from arch/ia64/include/uapi/asm/intrinsics.h:21:0,
 from arch/ia64/include/asm/intrinsics.h:10,
 from arch/ia64/include/asm/bitops.h:18,
 from include/linux/bitops.h:36,
 from include/linux/kernel.h:10,
 from include/linux/list.h:8,
 from include/linux/wait.h:6,
 from in

Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-08 Thread Eric Ren

Hi,

On 01/09/2017 09:13 AM, Joseph Qi wrote:

...



The issue case you are trying to fix is:
Process A
take inode lock (phase1)
...
<<< race window (phase2, Process B)


The deadlock only happens if process B is on a remote node and request EX lock.

Quote the patch[1/2]'s commit message:

A deadlock will occur if a remote EX request comes in between two of
ocfs2_inode_lock().  Briefly describe how the deadlock is formed:

On one hand, OCFS2_LOCK_BLOCKED flag of this lockres is set in
BAST(ocfs2_generic_handle_bast) when downconvert is started on behalf of
the remote EX lock request.  Another hand, the recursive cluster lock (the
second one) will be blocked in in __ocfs2_cluster_lock() because of
OCFS2_LOCK_BLOCKED.  But, the downconvert never complete, why? because
there is no chance for the first cluster lock on this node to be unlocked
- we block ourselves in the code path.
---


...
take inode lock again (phase3)

Deadlock happens because Process B in phase2 and Process A in phase3
are waiting for each other.

It's local lock's (like i_mutex) responsibility to protect critical section 
from racing
among processes on the same node.

I know we are talking a cluster lock issue. And the Process B I described is
downconvert thread.


That's fine!




So you are trying to fix it by making phase3 finish without really doing


Phase3 can go ahead because this node is already under protection of cluster 
lock.

You said it was blocked...


Oh, sorry, I meant phase3 can go ahead if this patch set is applied;-)


"Another hand, the recursive cluster lock (the second one) will be blocked in
__ocfs2_cluster_lock() because of OCFS2_LOCK_BLOCKED."



__ocfs2_cluster_lock, then Process B can continue either.
Let us bear in mind that phase1 and phase3 are in the same context and
executed in order. That's why I think there is no need to check if locked
by myself in phase1.
Sorry, I still cannot see it. Without keeping track of the first cluster lock, how can we 
know if
we are under a context that has already been in the protecting of cluster lock? How can we 
handle

the recursive locking (the second cluster lock) if we don't have this 
information?

If phase1 finds it is already locked by myself, that means the holder
is left by last operation without dec holder. That's why I think it is a bug
instead of a recursive lock case.
I think I got your point here. Do you mean that we should just add the lock holder at the 
first locking position
without checking before that? Unfortunately, it's tricky here to know exactly which ocfs2 
routine will be the first vfs
entry point, such as ocfs2_get_acl() which can be both the first vfs entry point and the 
second vfs entry point after

ocfs2_permission(), right?

It will be a coding bug if the problem you concern about happens. I think we don't need to 
worry about this much because

the code logic here is quite simple;-)

Thanks for your patience!
Eric



Did I answer your question?

Thanks!
Eric



Thanks,
Joseph


Thanks,
Eric



Thanks,
Joseph


Thanks,
Eric


Thanks,
Joseph


Thanks for your review;-)
Eric



Thanks,
Joseph


Signed-off-by: Eric Ren 
---
  fs/ocfs2/acl.c  | 39 ++-
  fs/ocfs2/file.c | 44 ++--
  2 files changed, 68 insertions(+), 15 deletions(-)

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index bed1fcb..c539890 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -284,16 +284,31 @@ int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl 
*acl, int type)

  {
  struct buffer_head *bh = NULL;
  int status = 0;
-
-status = ocfs2_inode_lock(inode, &bh, 1);
+int arg_flags = 0, has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
+
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+if (has_locked)
+arg_flags = OCFS2_META_LOCK_GETBH;
+status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
  if (status < 0) {
  if (status != -ENOENT)
  mlog_errno(status);
  return status;
  }
+if (!has_locked)
+ocfs2_add_holder(lockres, &oh);
+
  status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL);
-ocfs2_inode_unlock(inode, 1);
+
+if (!has_locked) {
+ocfs2_remove_holder(lockres, &oh);
+ocfs2_inode_unlock(inode, 1);
+}
  brelse(bh);
+
  return status;
  }
  @@ -303,21 +318,35 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, 
int type)

  struct buffer_head *di_bh = NULL;
  struct posix_acl *acl;
  int ret;
+int arg_flags = 0, has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
osb = OCFS2_SB(inode->i_sb);
  if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
  return NULL;
-ret = ocfs2_inode_lock(inode, &di_bh, 0);
+
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres

Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-08 Thread Joseph Qi


On 17/1/6 19:56, Eric Ren wrote:

Hi!

On 01/06/2017 05:55 PM, Joseph Qi wrote:

On 17/1/6 17:13, Eric Ren wrote:

Hi,



Fixes them by adding the tracking logic (in the previous 
patch) for

these funcs above, ocfs2_permission(), ocfs2_iop_[set|get]_acl(),
ocfs2_setattr().
As described cases above, shall we just add the tracking logic 
only for set/get_acl()?


The idea is to detect recursive locking on the running task 
stack. Take case 1) for example if ocfs2_permisssion()

is not changed:

ocfs2_permission() <=== take PR, ocfs2_holder is not added
   ocfs2_iop_get_acl <=== still take PR, because there is no 
lock holder on the tracking list
I mean we have no need to check if locked by me, just do inode 
lock and add holder.

This will make code more clean, IMO.
Oh, sorry, I get your point this time. I think we need to check it 
if there are more than one processes that hold
PR lock on the same resource.  If I don't understand you 
correctly, please tell me why you think it's not neccessary

to check before getting lock?

The code logic can only check if it is locked by myself. In the case

Why only...?
described above, ocfs2_permission is the first entry to take inode 
lock.
And even if check succeeds, it is a bug without unlock, but not the 
case

of recursive lock.


By checking succeeds, you mean it's locked by me, right? If so, this 
flag

  "arg_flags = OCFS2_META_LOCK_GETBH"
will be passed down to ocfs2_inode_lock_full(), which gets back 
buffer head of
the disk inode for us if necessary, but doesn't take cluster locking 
again. So, there is

no need to unlock in such case.

I am trying to state my point more clearly...


Thanks a lot!


The issue case you are trying to fix is:
Process A
take inode lock (phase1)
...
<<< race window (phase2, Process B)


The deadlock only happens if process B is on a remote node and request 
EX lock.


Quote the patch[1/2]'s commit message:

A deadlock will occur if a remote EX request comes in between two of
ocfs2_inode_lock().  Briefly describe how the deadlock is formed:

On one hand, OCFS2_LOCK_BLOCKED flag of this lockres is set in
BAST(ocfs2_generic_handle_bast) when downconvert is started on behalf of
the remote EX lock request.  Another hand, the recursive cluster lock 
(the

second one) will be blocked in in __ocfs2_cluster_lock() because of
OCFS2_LOCK_BLOCKED.  But, the downconvert never complete, why? because
there is no chance for the first cluster lock on this node to be unlocked
- we block ourselves in the code path.
---


...
take inode lock again (phase3)

Deadlock happens because Process B in phase2 and Process A in phase3
are waiting for each other.
It's local lock's (like i_mutex) responsibility to protect critical 
section from racing

among processes on the same node.

I know we are talking a cluster lock issue. And the Process B I described is
downconvert thread.



So you are trying to fix it by making phase3 finish without really doing


Phase3 can go ahead because this node is already under protection of 
cluster lock.

You said it was blocked...
"Another hand, the recursive cluster lock (the second one) will be 
blocked in

__ocfs2_cluster_lock() because of OCFS2_LOCK_BLOCKED."



__ocfs2_cluster_lock, then Process B can continue either.
Let us bear in mind that phase1 and phase3 are in the same context and
executed in order. That's why I think there is no need to check if 
locked

by myself in phase1.
If phase1 finds it is already locked by myself, that means the holder
is left by last operation without dec holder. That's why I think it 
is a bug

instead of a recursive lock case.


Did I answer your question?

Thanks!
Eric



Thanks,
Joseph


Thanks,
Eric



Thanks,
Joseph


Thanks,
Eric


Thanks,
Joseph


Thanks for your review;-)
Eric



Thanks,
Joseph


Signed-off-by: Eric Ren 
---
  fs/ocfs2/acl.c  | 39 ++-
  fs/ocfs2/file.c | 44 
++--

  2 files changed, 68 insertions(+), 15 deletions(-)

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index bed1fcb..c539890 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -284,16 +284,31 @@ int ocfs2_iop_set_acl(struct inode 
*inode, struct posix_acl *acl, int type)

  {
  struct buffer_head *bh = NULL;
  int status = 0;
-
-status = ocfs2_inode_lock(inode, &bh, 1);
+int arg_flags = 0, has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
+
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+if (has_locked)
+arg_flags = OCFS2_META_LOCK_GETBH;
+status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
  if (status < 0) {
  if (status != -ENOENT)
  mlog_errno(status);
  return status;
  }
+if (!has_locked)
+ocfs2_add_holder(lockres, &oh);
+
  status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, 
NULL);

-ocfs2_inode_unlock(inode, 1)

Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-06 Thread kbuild test robot
Hi Eric,

[auto build test WARNING on linus/master]
[also build test WARNING on v4.10-rc2 next-20170106]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Eric-Ren/fix-deadlock-caused-by-recursive-cluster-locking/20170106-200837
reproduce:
# apt-get install sparse
make ARCH=x86_64 allmodconfig
make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

   include/linux/compiler.h:253:8: sparse: attribute 'no_sanitize_address': 
unknown attribute
>> fs/ocfs2/dlmglue.h:189:50: sparse: marked inline, but without a definition
   fs/ocfs2/dlmglue.h:185:29: sparse: marked inline, but without a definition
   fs/ocfs2/dlmglue.h:187:32: sparse: marked inline, but without a definition
>> fs/ocfs2/dlmglue.h:189:50: sparse: marked inline, but without a definition
   fs/ocfs2/dlmglue.h:185:29: sparse: marked inline, but without a definition
   fs/ocfs2/dlmglue.h:187:32: sparse: marked inline, but without a definition
--
   include/linux/compiler.h:253:8: sparse: attribute 'no_sanitize_address': 
unknown attribute
>> fs/ocfs2/dlmglue.h:189:50: sparse: marked inline, but without a definition
   fs/ocfs2/dlmglue.h:185:29: sparse: marked inline, but without a definition
   fs/ocfs2/dlmglue.h:187:32: sparse: marked inline, but without a definition
   fs/ocfs2/dlmglue.h:187:32: sparse: marked inline, but without a definition
>> fs/ocfs2/dlmglue.h:189:50: sparse: marked inline, but without a definition
   fs/ocfs2/dlmglue.h:185:29: sparse: marked inline, but without a definition
   fs/ocfs2/dlmglue.h:187:32: sparse: marked inline, but without a definition

vim +189 fs/ocfs2/dlmglue.h

34d024f8 Mark Fasheh 2007-09-24  173  void ocfs2_wake_downconvert_thread(struct 
ocfs2_super *osb);
ccd979bd Mark Fasheh 2005-12-15  174  
ccd979bd Mark Fasheh 2005-12-15  175  struct ocfs2_dlm_debug 
*ocfs2_new_dlm_debug(void);
ccd979bd Mark Fasheh 2005-12-15  176  void ocfs2_put_dlm_debug(struct 
ocfs2_dlm_debug *dlm_debug);
ccd979bd Mark Fasheh 2005-12-15  177  
63e0c48a Joel Becker 2008-01-30  178  /* To set the locking protocol on module 
initialization */
63e0c48a Joel Becker 2008-01-30  179  void ocfs2_set_locking_protocol(void);
9fb5ed3a Eric Ren2017-01-05  180  
9fb5ed3a Eric Ren2017-01-05  181  /*
9fb5ed3a Eric Ren2017-01-05  182   * Keep a list of processes who have 
interest in a lockres.
9fb5ed3a Eric Ren2017-01-05  183   * Note: this is now only uesed for check 
recursive cluster lock.
9fb5ed3a Eric Ren2017-01-05  184   */
9fb5ed3a Eric Ren2017-01-05  185  inline void ocfs2_add_holder(struct 
ocfs2_lock_res *lockres,
9fb5ed3a Eric Ren2017-01-05  186 struct 
ocfs2_holder *oh);
9fb5ed3a Eric Ren2017-01-05  187  inline void ocfs2_remove_holder(struct 
ocfs2_lock_res *lockres,
9fb5ed3a Eric Ren2017-01-05  188 struct 
ocfs2_holder *oh);
9fb5ed3a Eric Ren2017-01-05 @189  inline struct ocfs2_holder 
*ocfs2_is_locked_by_me(struct ocfs2_lock_res *lockres);
9fb5ed3a Eric Ren2017-01-05  190  
ccd979bd Mark Fasheh 2005-12-15  191  #endif/* DLMGLUE_H */

:: The code at line 189 was first introduced by commit
:: 9fb5ed3abab2100ae8d99cee9b25fb92e3154224 ocfs2/dlmglue: prepare tracking 
logic to avoid recursive cluster lock

:: TO: Eric Ren 
:: CC: 0day robot 

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-06 Thread kbuild test robot
Hi Eric,

[auto build test ERROR on linus/master]
[also build test ERROR on v4.10-rc2 next-20170106]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Eric-Ren/fix-deadlock-caused-by-recursive-cluster-locking/20170106-200837
config: ia64-allyesconfig (attached as .config)
compiler: ia64-linux-gcc (GCC) 6.2.0
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=ia64 

All errors (new ones prefixed by >>):

   In file included from fs/ocfs2/acl.c:31:0:
   fs/ocfs2/acl.c: In function 'ocfs2_iop_set_acl':
>> fs/ocfs2/dlmglue.h:189:29: error: inlining failed in call to always_inline 
>> 'ocfs2_is_locked_by_me': function body not available
inline struct ocfs2_holder *ocfs2_is_locked_by_me(struct ocfs2_lock_res 
*lockres);
^
   fs/ocfs2/acl.c:292:16: note: called from here
 has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
   ^~
   In file included from fs/ocfs2/acl.c:31:0:
>> fs/ocfs2/dlmglue.h:189:29: error: inlining failed in call to always_inline 
>> 'ocfs2_is_locked_by_me': function body not available
inline struct ocfs2_holder *ocfs2_is_locked_by_me(struct ocfs2_lock_res 
*lockres);
^
   fs/ocfs2/acl.c:292:16: note: called from here
 has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
   ^~
   In file included from fs/ocfs2/acl.c:31:0:
>> fs/ocfs2/dlmglue.h:185:13: error: inlining failed in call to always_inline 
>> 'ocfs2_add_holder': function body not available
inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres,
^~~~
   fs/ocfs2/acl.c:302:3: note: called from here
  ocfs2_add_holder(lockres, &oh);
  ^~
   In file included from fs/ocfs2/acl.c:31:0:
>> fs/ocfs2/dlmglue.h:187:13: error: inlining failed in call to always_inline 
>> 'ocfs2_remove_holder': function body not available
inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres,
^~~
   fs/ocfs2/acl.c:307:3: note: called from here
  ocfs2_remove_holder(lockres, &oh);
  ^
--
   In file included from arch/ia64/include/uapi/asm/intrinsics.h:21:0,
from arch/ia64/include/asm/intrinsics.h:10,
from arch/ia64/include/asm/bitops.h:18,
from include/linux/bitops.h:36,
from include/linux/kernel.h:10,
from include/linux/list.h:8,
from include/linux/wait.h:6,
from include/linux/fs.h:5,
from fs/ocfs2/file.c:27:
   fs/ocfs2/file.c: In function 'ocfs2_file_write_iter':
   arch/ia64/include/uapi/asm/cmpxchg.h:56:2: warning: value computed is not 
used [-Wunused-value]
((__typeof__(*(ptr))) __xchg((unsigned long) (x), (ptr), sizeof(*(ptr
~^~~~
   fs/ocfs2/file.c:2334:3: note: in expansion of macro 'xchg'
  xchg(&iocb->ki_complete, saved_ki_complete);
  ^~~~
   In file included from fs/ocfs2/file.c:49:0:
   fs/ocfs2/file.c: In function 'ocfs2_permission':
>> fs/ocfs2/dlmglue.h:189:29: error: inlining failed in call to always_inline 
>> 'ocfs2_is_locked_by_me': function body not available
inline struct ocfs2_holder *ocfs2_is_locked_by_me(struct ocfs2_lock_res 
*lockres);
^
   fs/ocfs2/file.c:1345:16: note: called from here
 has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
   ^~
   In file included from fs/ocfs2/file.c:49:0:
>> fs/ocfs2/dlmglue.h:189:29: error: inlining failed in call to always_inline 
>> 'ocfs2_is_locked_by_me': function body not available
inline struct ocfs2_holder *ocfs2_is_locked_by_me(struct ocfs2_lock_res 
*lockres);
^
   fs/ocfs2/file.c:1345:16: note: called from here
 has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
   ^~
   In file included from fs/ocfs2/file.c:49:0:
>> fs/ocfs2/dlmglue.h:185:13: error: inlining failed in call to always_inline 
>> 'ocfs2_add_holder': function body not available
inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres,
^~~~
   fs/ocfs2/file.c:1353:3: note: called from here
  ocfs2_add_holder(lockres, &oh);
  ^~
   In file included from fs/ocfs2/file.c:49:0:
>> fs/ocfs2/dlmglue.h:187:13: error: inlining failed in call to always_inl

Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-06 Thread Eric Ren

Hi!

On 01/06/2017 05:55 PM, Joseph Qi wrote:

On 17/1/6 17:13, Eric Ren wrote:

Hi,



Fixes them by adding the tracking logic (in the previous patch) for
these funcs above, ocfs2_permission(), ocfs2_iop_[set|get]_acl(),
ocfs2_setattr().

As described cases above, shall we just add the tracking logic only for 
set/get_acl()?


The idea is to detect recursive locking on the running task stack. Take case 1) for 
example if ocfs2_permisssion()

is not changed:

ocfs2_permission() <=== take PR, ocfs2_holder is not added
   ocfs2_iop_get_acl <=== still take PR, because there is no lock holder on the 
tracking list

I mean we have no need to check if locked by me, just do inode lock and add 
holder.
This will make code more clean, IMO.
Oh, sorry, I get your point this time. I think we need to check it if there are more 
than one processes that hold
PR lock on the same resource.  If I don't understand you correctly, please tell me why 
you think it's not neccessary

to check before getting lock?

The code logic can only check if it is locked by myself. In the case

Why only...?

described above, ocfs2_permission is the first entry to take inode lock.
And even if check succeeds, it is a bug without unlock, but not the case
of recursive lock.


By checking succeeds, you mean it's locked by me, right? If so, this flag
  "arg_flags = OCFS2_META_LOCK_GETBH"
will be passed down to ocfs2_inode_lock_full(), which gets back buffer head of
the disk inode for us if necessary, but doesn't take cluster locking again. So, 
there is
no need to unlock in such case.

I am trying to state my point more clearly...


Thanks a lot!


The issue case you are trying to fix is:
Process A
take inode lock (phase1)
...
<<< race window (phase2, Process B)


The deadlock only happens if process B is on a remote node and request EX lock.

Quote the patch[1/2]'s commit message:

A deadlock will occur if a remote EX request comes in between two of
ocfs2_inode_lock().  Briefly describe how the deadlock is formed:

On one hand, OCFS2_LOCK_BLOCKED flag of this lockres is set in
BAST(ocfs2_generic_handle_bast) when downconvert is started on behalf of
the remote EX lock request.  Another hand, the recursive cluster lock (the
second one) will be blocked in in __ocfs2_cluster_lock() because of
OCFS2_LOCK_BLOCKED.  But, the downconvert never complete, why?  because
there is no chance for the first cluster lock on this node to be unlocked
- we block ourselves in the code path.
---


...
take inode lock again (phase3)

Deadlock happens because Process B in phase2 and Process A in phase3
are waiting for each other.

It's local lock's (like i_mutex) responsibility to protect critical section 
from racing
among processes on the same node.


So you are trying to fix it by making phase3 finish without really doing


Phase3 can go ahead because this node is already under protection of cluster 
lock.


__ocfs2_cluster_lock, then Process B can continue either.
Let us bear in mind that phase1 and phase3 are in the same context and
executed in order. That's why I think there is no need to check if locked
by myself in phase1.
If phase1 finds it is already locked by myself, that means the holder
is left by last operation without dec holder. That's why I think it is a bug
instead of a recursive lock case.


Did I answer your question?

Thanks!
Eric



Thanks,
Joseph


Thanks,
Eric



Thanks,
Joseph


Thanks,
Eric


Thanks,
Joseph


Thanks for your review;-)
Eric



Thanks,
Joseph


Signed-off-by: Eric Ren 
---
  fs/ocfs2/acl.c  | 39 ++-
  fs/ocfs2/file.c | 44 ++--
  2 files changed, 68 insertions(+), 15 deletions(-)

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index bed1fcb..c539890 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -284,16 +284,31 @@ int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl 
*acl, int type)

  {
  struct buffer_head *bh = NULL;
  int status = 0;
-
-status = ocfs2_inode_lock(inode, &bh, 1);
+int arg_flags = 0, has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
+
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+if (has_locked)
+arg_flags = OCFS2_META_LOCK_GETBH;
+status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
  if (status < 0) {
  if (status != -ENOENT)
  mlog_errno(status);
  return status;
  }
+if (!has_locked)
+ocfs2_add_holder(lockres, &oh);
+
  status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL);
-ocfs2_inode_unlock(inode, 1);
+
+if (!has_locked) {
+ocfs2_remove_holder(lockres, &oh);
+ocfs2_inode_unlock(inode, 1);
+}
  brelse(bh);
+
  return status;
  }
  @@ -303,21 +318,35 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, 
int type)

  struct buffer_head *di_bh = NULL;
  struct posix_a

Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-06 Thread Joseph Qi

On 17/1/6 17:13, Eric Ren wrote:

Hi,



Fixes them by adding the tracking logic (in the previous patch) for
these funcs above, ocfs2_permission(), ocfs2_iop_[set|get]_acl(),
ocfs2_setattr().
As described cases above, shall we just add the tracking logic 
only for set/get_acl()?


The idea is to detect recursive locking on the running task stack. 
Take case 1) for example if ocfs2_permisssion()

is not changed:

ocfs2_permission() <=== take PR, ocfs2_holder is not added
   ocfs2_iop_get_acl <=== still take PR, because there is no lock 
holder on the tracking list
I mean we have no need to check if locked by me, just do inode lock 
and add holder.

This will make code more clean, IMO.
Oh, sorry, I get your point this time. I think we need to check it 
if there are more than one processes that hold
PR lock on the same resource.  If I don't understand you correctly, 
please tell me why you think it's not neccessary

to check before getting lock?

The code logic can only check if it is locked by myself. In the case

Why only...?

described above, ocfs2_permission is the first entry to take inode lock.
And even if check succeeds, it is a bug without unlock, but not the case
of recursive lock.


By checking succeeds, you mean it's locked by me, right? If so, this flag
  "arg_flags = OCFS2_META_LOCK_GETBH"
will be passed down to ocfs2_inode_lock_full(), which gets back buffer 
head of
the disk inode for us if necessary, but doesn't take cluster locking 
again. So, there is

no need to unlock in such case.

I am trying to state my point more clearly...
The issue case you are trying to fix is:
Process A
take inode lock (phase1)
...
<<< race window (phase2, Process B)
...
take inode lock again (phase3)

Deadlock happens because Process B in phase2 and Process A in phase3
are waiting for each other.
So you are trying to fix it by making phase3 finish without really doing
__ocfs2_cluster_lock, then Process B can continue either.
Let us bear in mind that phase1 and phase3 are in the same context and
executed in order. That's why I think there is no need to check if locked
by myself in phase1.
If phase1 finds it is already locked by myself, that means the holder
is left by last operation without dec holder. That's why I think it is a bug
instead of a recursive lock case.

Thanks,
Joseph


Thanks,
Eric



Thanks,
Joseph


Thanks,
Eric


Thanks,
Joseph


Thanks for your review;-)
Eric



Thanks,
Joseph


Signed-off-by: Eric Ren 
---
  fs/ocfs2/acl.c  | 39 ++-
  fs/ocfs2/file.c | 44 ++--
  2 files changed, 68 insertions(+), 15 deletions(-)

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index bed1fcb..c539890 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -284,16 +284,31 @@ int ocfs2_iop_set_acl(struct inode *inode, 
struct posix_acl *acl, int type)

  {
  struct buffer_head *bh = NULL;
  int status = 0;
-
-status = ocfs2_inode_lock(inode, &bh, 1);
+int arg_flags = 0, has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
+
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+if (has_locked)
+arg_flags = OCFS2_META_LOCK_GETBH;
+status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
  if (status < 0) {
  if (status != -ENOENT)
  mlog_errno(status);
  return status;
  }
+if (!has_locked)
+ocfs2_add_holder(lockres, &oh);
+
  status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, 
NULL);

-ocfs2_inode_unlock(inode, 1);
+
+if (!has_locked) {
+ocfs2_remove_holder(lockres, &oh);
+ocfs2_inode_unlock(inode, 1);
+}
  brelse(bh);
+
  return status;
  }
  @@ -303,21 +318,35 @@ struct posix_acl 
*ocfs2_iop_get_acl(struct inode *inode, int type)

  struct buffer_head *di_bh = NULL;
  struct posix_acl *acl;
  int ret;
+int arg_flags = 0, has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
osb = OCFS2_SB(inode->i_sb);
  if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
  return NULL;
-ret = ocfs2_inode_lock(inode, &di_bh, 0);
+
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+if (has_locked)
+arg_flags = OCFS2_META_LOCK_GETBH;
+ret = ocfs2_inode_lock_full(inode, &di_bh, 0, arg_flags);
  if (ret < 0) {
  if (ret != -ENOENT)
  mlog_errno(ret);
  return ERR_PTR(ret);
  }
+if (!has_locked)
+ocfs2_add_holder(lockres, &oh);
acl = ocfs2_get_acl_nolock(inode, type, di_bh);
  -ocfs2_inode_unlock(inode, 0);
+if (!has_locked) {
+ocfs2_remove_holder(lockres, &oh);
+ocfs2_inode_unlock(inode, 0);
+}
  brelse(di_bh);
+
  return acl;
  }
  diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index c488965..62be75d 100644
--- a/fs

Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-06 Thread Eric Ren

Hi,



Fixes them by adding the tracking logic (in the previous patch) for
these funcs above, ocfs2_permission(), ocfs2_iop_[set|get]_acl(),
ocfs2_setattr().

As described cases above, shall we just add the tracking logic only for 
set/get_acl()?


The idea is to detect recursive locking on the running task stack. Take case 1) for 
example if ocfs2_permisssion()

is not changed:

ocfs2_permission() <=== take PR, ocfs2_holder is not added
   ocfs2_iop_get_acl <=== still take PR, because there is no lock holder on the 
tracking list

I mean we have no need to check if locked by me, just do inode lock and add 
holder.
This will make code more clean, IMO.
Oh, sorry, I get your point this time. I think we need to check it if there are more than 
one processes that hold
PR lock on the same resource.  If I don't understand you correctly, please tell me why 
you think it's not neccessary

to check before getting lock?

The code logic can only check if it is locked by myself. In the case

Why only...?

described above, ocfs2_permission is the first entry to take inode lock.
And even if check succeeds, it is a bug without unlock, but not the case
of recursive lock.


By checking succeeds, you mean it's locked by me, right? If so, this flag
  "arg_flags = OCFS2_META_LOCK_GETBH"
will be passed down to ocfs2_inode_lock_full(), which gets back buffer head of
the disk inode for us if necessary, but doesn't take cluster locking again. So, 
there is
no need to unlock in such case.

Thanks,
Eric



Thanks,
Joseph


Thanks,
Eric


Thanks,
Joseph


Thanks for your review;-)
Eric



Thanks,
Joseph


Signed-off-by: Eric Ren 
---
  fs/ocfs2/acl.c  | 39 ++-
  fs/ocfs2/file.c | 44 ++--
  2 files changed, 68 insertions(+), 15 deletions(-)

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index bed1fcb..c539890 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -284,16 +284,31 @@ int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl 
*acl, int type)

  {
  struct buffer_head *bh = NULL;
  int status = 0;
-
-status = ocfs2_inode_lock(inode, &bh, 1);
+int arg_flags = 0, has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
+
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+if (has_locked)
+arg_flags = OCFS2_META_LOCK_GETBH;
+status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
  if (status < 0) {
  if (status != -ENOENT)
  mlog_errno(status);
  return status;
  }
+if (!has_locked)
+ocfs2_add_holder(lockres, &oh);
+
  status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL);
-ocfs2_inode_unlock(inode, 1);
+
+if (!has_locked) {
+ocfs2_remove_holder(lockres, &oh);
+ocfs2_inode_unlock(inode, 1);
+}
  brelse(bh);
+
  return status;
  }
  @@ -303,21 +318,35 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int 
type)

  struct buffer_head *di_bh = NULL;
  struct posix_acl *acl;
  int ret;
+int arg_flags = 0, has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
osb = OCFS2_SB(inode->i_sb);
  if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
  return NULL;
-ret = ocfs2_inode_lock(inode, &di_bh, 0);
+
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+if (has_locked)
+arg_flags = OCFS2_META_LOCK_GETBH;
+ret = ocfs2_inode_lock_full(inode, &di_bh, 0, arg_flags);
  if (ret < 0) {
  if (ret != -ENOENT)
  mlog_errno(ret);
  return ERR_PTR(ret);
  }
+if (!has_locked)
+ocfs2_add_holder(lockres, &oh);
acl = ocfs2_get_acl_nolock(inode, type, di_bh);
  -ocfs2_inode_unlock(inode, 0);
+if (!has_locked) {
+ocfs2_remove_holder(lockres, &oh);
+ocfs2_inode_unlock(inode, 0);
+}
  brelse(di_bh);
+
  return acl;
  }
  diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index c488965..62be75d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1138,6 +1138,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr 
*attr)
  handle_t *handle = NULL;
  struct dquot *transfer_to[MAXQUOTAS] = { };
  int qtype;
+int arg_flags = 0, had_lock;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
trace_ocfs2_setattr(inode, dentry,
  (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -1173,13 +1176,20 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr 
*attr)
  }
  }
  -status = ocfs2_inode_lock(inode, &bh, 1);
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+had_lock = (ocfs2_is_locked_by_me(lockres) != NULL);
+if (had_lock)
+arg_flags = OCFS2_META_LOCK_GETBH;
+status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
  if (stat

Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-06 Thread Joseph Qi



On 17/1/6 16:21, Eric Ren wrote:

On 01/06/2017 03:14 PM, Joseph Qi wrote:



On 17/1/6 14:56, Eric Ren wrote:

On 01/06/2017 02:09 PM, Joseph Qi wrote:

Hi Eric,


On 17/1/5 23:31, Eric Ren wrote:
Commit 743b5f1434f5 ("ocfs2: take inode lock in 
ocfs2_iop_set/get_acl()")

results in a deadlock, as the author "Tariq Saeed" realized shortly
after the patch was merged. The discussion happened here
(https://oss.oracle.com/pipermail/ocfs2-devel/2015-September/011085.html). 



The reason why taking cluster inode lock at vfs entry points opens up
a self deadlock window, is explained in the previous patch of this
series.

So far, we have seen two different code paths that have this issue.
1. do_sys_open
  may_open
   inode_permission
ocfs2_permission
 ocfs2_inode_lock() <=== take PR
  generic_permission
   get_acl
ocfs2_iop_get_acl
 ocfs2_inode_lock() <=== take PR
2. fchmod|fchmodat
 chmod_common
  notify_change
   ocfs2_setattr <=== take EX
posix_acl_chmod
 get_acl
  ocfs2_iop_get_acl <=== take PR
 ocfs2_iop_set_acl <=== take EX

Fixes them by adding the tracking logic (in the previous patch) for
these funcs above, ocfs2_permission(), ocfs2_iop_[set|get]_acl(),
ocfs2_setattr().
As described cases above, shall we just add the tracking logic only 
for set/get_acl()?


The idea is to detect recursive locking on the running task stack. 
Take case 1) for example if ocfs2_permisssion()

is not changed:

ocfs2_permission() <=== take PR, ocfs2_holder is not added
   ocfs2_iop_get_acl <=== still take PR, because there is no lock 
holder on the tracking list
I mean we have no need to check if locked by me, just do inode lock 
and add holder.

This will make code more clean, IMO.
Oh, sorry, I get your point this time. I think we need to check it if 
there are more than one processes that hold
PR lock on the same resource.  If I don't understand you correctly, 
please tell me why you think it's not neccessary

to check before getting lock?

The code logic can only check if it is locked by myself. In the case
described above, ocfs2_permission is the first entry to take inode lock.
And even if check succeeds, it is a bug without unlock, but not the case
of recursive lock.

Thanks,
Joseph


Thanks,
Eric


Thanks,
Joseph


Thanks for your review;-)
Eric



Thanks,
Joseph


Signed-off-by: Eric Ren 
---
  fs/ocfs2/acl.c  | 39 ++-
  fs/ocfs2/file.c | 44 ++--
  2 files changed, 68 insertions(+), 15 deletions(-)

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index bed1fcb..c539890 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -284,16 +284,31 @@ int ocfs2_iop_set_acl(struct inode *inode, 
struct posix_acl *acl, int type)

  {
  struct buffer_head *bh = NULL;
  int status = 0;
-
-status = ocfs2_inode_lock(inode, &bh, 1);
+int arg_flags = 0, has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
+
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+if (has_locked)
+arg_flags = OCFS2_META_LOCK_GETBH;
+status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
  if (status < 0) {
  if (status != -ENOENT)
  mlog_errno(status);
  return status;
  }
+if (!has_locked)
+ocfs2_add_holder(lockres, &oh);
+
  status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL);
-ocfs2_inode_unlock(inode, 1);
+
+if (!has_locked) {
+ocfs2_remove_holder(lockres, &oh);
+ocfs2_inode_unlock(inode, 1);
+}
  brelse(bh);
+
  return status;
  }
  @@ -303,21 +318,35 @@ struct posix_acl *ocfs2_iop_get_acl(struct 
inode *inode, int type)

  struct buffer_head *di_bh = NULL;
  struct posix_acl *acl;
  int ret;
+int arg_flags = 0, has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
osb = OCFS2_SB(inode->i_sb);
  if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
  return NULL;
-ret = ocfs2_inode_lock(inode, &di_bh, 0);
+
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+if (has_locked)
+arg_flags = OCFS2_META_LOCK_GETBH;
+ret = ocfs2_inode_lock_full(inode, &di_bh, 0, arg_flags);
  if (ret < 0) {
  if (ret != -ENOENT)
  mlog_errno(ret);
  return ERR_PTR(ret);
  }
+if (!has_locked)
+ocfs2_add_holder(lockres, &oh);
acl = ocfs2_get_acl_nolock(inode, type, di_bh);
  -ocfs2_inode_unlock(inode, 0);
+if (!has_locked) {
+ocfs2_remove_holder(lockres, &oh);
+ocfs2_inode_unlock(inode, 0);
+}
  brelse(di_bh);
+
  return acl;
  }
  diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index c488965..62be75d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1138,6 +113

Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-06 Thread Eric Ren

On 01/06/2017 03:14 PM, Joseph Qi wrote:



On 17/1/6 14:56, Eric Ren wrote:

On 01/06/2017 02:09 PM, Joseph Qi wrote:

Hi Eric,


On 17/1/5 23:31, Eric Ren wrote:

Commit 743b5f1434f5 ("ocfs2: take inode lock in ocfs2_iop_set/get_acl()")
results in a deadlock, as the author "Tariq Saeed" realized shortly
after the patch was merged. The discussion happened here
(https://oss.oracle.com/pipermail/ocfs2-devel/2015-September/011085.html).

The reason why taking cluster inode lock at vfs entry points opens up
a self deadlock window, is explained in the previous patch of this
series.

So far, we have seen two different code paths that have this issue.
1. do_sys_open
  may_open
   inode_permission
ocfs2_permission
 ocfs2_inode_lock() <=== take PR
  generic_permission
   get_acl
ocfs2_iop_get_acl
 ocfs2_inode_lock() <=== take PR
2. fchmod|fchmodat
 chmod_common
  notify_change
   ocfs2_setattr <=== take EX
posix_acl_chmod
 get_acl
  ocfs2_iop_get_acl <=== take PR
 ocfs2_iop_set_acl <=== take EX

Fixes them by adding the tracking logic (in the previous patch) for
these funcs above, ocfs2_permission(), ocfs2_iop_[set|get]_acl(),
ocfs2_setattr().

As described cases above, shall we just add the tracking logic only for 
set/get_acl()?


The idea is to detect recursive locking on the running task stack. Take case 1) for 
example if ocfs2_permisssion()

is not changed:

ocfs2_permission() <=== take PR, ocfs2_holder is not added
   ocfs2_iop_get_acl <=== still take PR, because there is no lock holder on the tracking 
list

I mean we have no need to check if locked by me, just do inode lock and add 
holder.
This will make code more clean, IMO.
Oh, sorry, I get your point this time. I think we need to check it if there are more than 
one processes that hold
PR lock on the same resource.  If I don't understand you correctly, please tell me why you 
think it's not neccessary

to check before getting lock?

Thanks,
Eric


Thanks,
Joseph


Thanks for your review;-)
Eric



Thanks,
Joseph


Signed-off-by: Eric Ren 
---
  fs/ocfs2/acl.c  | 39 ++-
  fs/ocfs2/file.c | 44 ++--
  2 files changed, 68 insertions(+), 15 deletions(-)

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index bed1fcb..c539890 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -284,16 +284,31 @@ int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, 
int type)

  {
  struct buffer_head *bh = NULL;
  int status = 0;
-
-status = ocfs2_inode_lock(inode, &bh, 1);
+int arg_flags = 0, has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
+
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+if (has_locked)
+arg_flags = OCFS2_META_LOCK_GETBH;
+status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
  if (status < 0) {
  if (status != -ENOENT)
  mlog_errno(status);
  return status;
  }
+if (!has_locked)
+ocfs2_add_holder(lockres, &oh);
+
  status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL);
-ocfs2_inode_unlock(inode, 1);
+
+if (!has_locked) {
+ocfs2_remove_holder(lockres, &oh);
+ocfs2_inode_unlock(inode, 1);
+}
  brelse(bh);
+
  return status;
  }
  @@ -303,21 +318,35 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode 
*inode, int type)
  struct buffer_head *di_bh = NULL;
  struct posix_acl *acl;
  int ret;
+int arg_flags = 0, has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
osb = OCFS2_SB(inode->i_sb);
  if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
  return NULL;
-ret = ocfs2_inode_lock(inode, &di_bh, 0);
+
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+if (has_locked)
+arg_flags = OCFS2_META_LOCK_GETBH;
+ret = ocfs2_inode_lock_full(inode, &di_bh, 0, arg_flags);
  if (ret < 0) {
  if (ret != -ENOENT)
  mlog_errno(ret);
  return ERR_PTR(ret);
  }
+if (!has_locked)
+ocfs2_add_holder(lockres, &oh);
acl = ocfs2_get_acl_nolock(inode, type, di_bh);
  -ocfs2_inode_unlock(inode, 0);
+if (!has_locked) {
+ocfs2_remove_holder(lockres, &oh);
+ocfs2_inode_unlock(inode, 0);
+}
  brelse(di_bh);
+
  return acl;
  }
  diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index c488965..62be75d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1138,6 +1138,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr 
*attr)
  handle_t *handle = NULL;
  struct dquot *transfer_to[MAXQUOTAS] = { };
  int qtype;
+int arg_flags = 0, had_lock;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
trace_ocfs2_setat

Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-05 Thread Joseph Qi



On 17/1/6 14:56, Eric Ren wrote:

On 01/06/2017 02:09 PM, Joseph Qi wrote:

Hi Eric,


On 17/1/5 23:31, Eric Ren wrote:
Commit 743b5f1434f5 ("ocfs2: take inode lock in 
ocfs2_iop_set/get_acl()")

results in a deadlock, as the author "Tariq Saeed" realized shortly
after the patch was merged. The discussion happened here
(https://oss.oracle.com/pipermail/ocfs2-devel/2015-September/011085.html). 



The reason why taking cluster inode lock at vfs entry points opens up
a self deadlock window, is explained in the previous patch of this
series.

So far, we have seen two different code paths that have this issue.
1. do_sys_open
  may_open
   inode_permission
ocfs2_permission
 ocfs2_inode_lock() <=== take PR
  generic_permission
   get_acl
ocfs2_iop_get_acl
 ocfs2_inode_lock() <=== take PR
2. fchmod|fchmodat
 chmod_common
  notify_change
   ocfs2_setattr <=== take EX
posix_acl_chmod
 get_acl
  ocfs2_iop_get_acl <=== take PR
 ocfs2_iop_set_acl <=== take EX

Fixes them by adding the tracking logic (in the previous patch) for
these funcs above, ocfs2_permission(), ocfs2_iop_[set|get]_acl(),
ocfs2_setattr().
As described cases above, shall we just add the tracking logic only 
for set/get_acl()?


The idea is to detect recursive locking on the running task stack. 
Take case 1) for example if ocfs2_permisssion()

is not changed:

ocfs2_permission() <=== take PR, ocfs2_holder is not added
   ocfs2_iop_get_acl <=== still take PR, because there is no lock 
holder on the tracking list
I mean we have no need to check if locked by me, just do inode lock and 
add holder.

This will make code more clean, IMO.

Thanks,
Joseph


Thanks for your review;-)
Eric



Thanks,
Joseph


Signed-off-by: Eric Ren 
---
  fs/ocfs2/acl.c  | 39 ++-
  fs/ocfs2/file.c | 44 ++--
  2 files changed, 68 insertions(+), 15 deletions(-)

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index bed1fcb..c539890 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -284,16 +284,31 @@ int ocfs2_iop_set_acl(struct inode *inode, 
struct posix_acl *acl, int type)

  {
  struct buffer_head *bh = NULL;
  int status = 0;
-
-status = ocfs2_inode_lock(inode, &bh, 1);
+int arg_flags = 0, has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
+
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+if (has_locked)
+arg_flags = OCFS2_META_LOCK_GETBH;
+status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
  if (status < 0) {
  if (status != -ENOENT)
  mlog_errno(status);
  return status;
  }
+if (!has_locked)
+ocfs2_add_holder(lockres, &oh);
+
  status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL);
-ocfs2_inode_unlock(inode, 1);
+
+if (!has_locked) {
+ocfs2_remove_holder(lockres, &oh);
+ocfs2_inode_unlock(inode, 1);
+}
  brelse(bh);
+
  return status;
  }
  @@ -303,21 +318,35 @@ struct posix_acl *ocfs2_iop_get_acl(struct 
inode *inode, int type)

  struct buffer_head *di_bh = NULL;
  struct posix_acl *acl;
  int ret;
+int arg_flags = 0, has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
osb = OCFS2_SB(inode->i_sb);
  if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
  return NULL;
-ret = ocfs2_inode_lock(inode, &di_bh, 0);
+
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+if (has_locked)
+arg_flags = OCFS2_META_LOCK_GETBH;
+ret = ocfs2_inode_lock_full(inode, &di_bh, 0, arg_flags);
  if (ret < 0) {
  if (ret != -ENOENT)
  mlog_errno(ret);
  return ERR_PTR(ret);
  }
+if (!has_locked)
+ocfs2_add_holder(lockres, &oh);
acl = ocfs2_get_acl_nolock(inode, type, di_bh);
  -ocfs2_inode_unlock(inode, 0);
+if (!has_locked) {
+ocfs2_remove_holder(lockres, &oh);
+ocfs2_inode_unlock(inode, 0);
+}
  brelse(di_bh);
+
  return acl;
  }
  diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index c488965..62be75d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1138,6 +1138,9 @@ int ocfs2_setattr(struct dentry *dentry, 
struct iattr *attr)

  handle_t *handle = NULL;
  struct dquot *transfer_to[MAXQUOTAS] = { };
  int qtype;
+int arg_flags = 0, had_lock;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
trace_ocfs2_setattr(inode, dentry,
  (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -1173,13 +1176,20 @@ int ocfs2_setattr(struct dentry *dentry, 
struct iattr *attr)

  }
  }
  -status = ocfs2_inode_lock(inode, &bh, 1);
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+had_lock = (ocfs2_is_lo

Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-05 Thread Eric Ren

On 01/06/2017 02:09 PM, Joseph Qi wrote:

Hi Eric,


On 17/1/5 23:31, Eric Ren wrote:

Commit 743b5f1434f5 ("ocfs2: take inode lock in ocfs2_iop_set/get_acl()")
results in a deadlock, as the author "Tariq Saeed" realized shortly
after the patch was merged. The discussion happened here
(https://oss.oracle.com/pipermail/ocfs2-devel/2015-September/011085.html).

The reason why taking cluster inode lock at vfs entry points opens up
a self deadlock window, is explained in the previous patch of this
series.

So far, we have seen two different code paths that have this issue.
1. do_sys_open
  may_open
   inode_permission
ocfs2_permission
 ocfs2_inode_lock() <=== take PR
  generic_permission
   get_acl
ocfs2_iop_get_acl
 ocfs2_inode_lock() <=== take PR
2. fchmod|fchmodat
 chmod_common
  notify_change
   ocfs2_setattr <=== take EX
posix_acl_chmod
 get_acl
  ocfs2_iop_get_acl <=== take PR
 ocfs2_iop_set_acl <=== take EX

Fixes them by adding the tracking logic (in the previous patch) for
these funcs above, ocfs2_permission(), ocfs2_iop_[set|get]_acl(),
ocfs2_setattr().

As described cases above, shall we just add the tracking logic only for 
set/get_acl()?


The idea is to detect recursive locking on the running task stack. Take case 1) for example 
if ocfs2_permisssion()

is not changed:

ocfs2_permission() <=== take PR, ocfs2_holder is not added
   ocfs2_iop_get_acl <=== still take PR, because there is no lock holder on the 
tracking list

Thanks for your review;-)
Eric



Thanks,
Joseph


Signed-off-by: Eric Ren 
---
  fs/ocfs2/acl.c  | 39 ++-
  fs/ocfs2/file.c | 44 ++--
  2 files changed, 68 insertions(+), 15 deletions(-)

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index bed1fcb..c539890 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -284,16 +284,31 @@ int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, 
int type)

  {
  struct buffer_head *bh = NULL;
  int status = 0;
-
-status = ocfs2_inode_lock(inode, &bh, 1);
+int arg_flags = 0, has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
+
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+if (has_locked)
+arg_flags = OCFS2_META_LOCK_GETBH;
+status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
  if (status < 0) {
  if (status != -ENOENT)
  mlog_errno(status);
  return status;
  }
+if (!has_locked)
+ocfs2_add_holder(lockres, &oh);
+
  status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL);
-ocfs2_inode_unlock(inode, 1);
+
+if (!has_locked) {
+ocfs2_remove_holder(lockres, &oh);
+ocfs2_inode_unlock(inode, 1);
+}
  brelse(bh);
+
  return status;
  }
  @@ -303,21 +318,35 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode 
*inode, int type)
  struct buffer_head *di_bh = NULL;
  struct posix_acl *acl;
  int ret;
+int arg_flags = 0, has_locked;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
osb = OCFS2_SB(inode->i_sb);
  if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
  return NULL;
-ret = ocfs2_inode_lock(inode, &di_bh, 0);
+
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+if (has_locked)
+arg_flags = OCFS2_META_LOCK_GETBH;
+ret = ocfs2_inode_lock_full(inode, &di_bh, 0, arg_flags);
  if (ret < 0) {
  if (ret != -ENOENT)
  mlog_errno(ret);
  return ERR_PTR(ret);
  }
+if (!has_locked)
+ocfs2_add_holder(lockres, &oh);
acl = ocfs2_get_acl_nolock(inode, type, di_bh);
  -ocfs2_inode_unlock(inode, 0);
+if (!has_locked) {
+ocfs2_remove_holder(lockres, &oh);
+ocfs2_inode_unlock(inode, 0);
+}
  brelse(di_bh);
+
  return acl;
  }
  diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index c488965..62be75d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1138,6 +1138,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr 
*attr)
  handle_t *handle = NULL;
  struct dquot *transfer_to[MAXQUOTAS] = { };
  int qtype;
+int arg_flags = 0, had_lock;
+struct ocfs2_holder oh;
+struct ocfs2_lock_res *lockres;
trace_ocfs2_setattr(inode, dentry,
  (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -1173,13 +1176,20 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr 
*attr)
  }
  }
  -status = ocfs2_inode_lock(inode, &bh, 1);
+lockres = &OCFS2_I(inode)->ip_inode_lockres;
+had_lock = (ocfs2_is_locked_by_me(lockres) != NULL);
+if (had_lock)
+arg_flags = OCFS2_META_LOCK_GETBH;
+status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
  if (status < 0) {
  

Re: [PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-05 Thread Joseph Qi

Hi Eric,


On 17/1/5 23:31, Eric Ren wrote:

Commit 743b5f1434f5 ("ocfs2: take inode lock in ocfs2_iop_set/get_acl()")
results in a deadlock, as the author "Tariq Saeed" realized shortly
after the patch was merged. The discussion happened here
(https://oss.oracle.com/pipermail/ocfs2-devel/2015-September/011085.html).

The reason why taking cluster inode lock at vfs entry points opens up
a self deadlock window, is explained in the previous patch of this
series.

So far, we have seen two different code paths that have this issue.
1. do_sys_open
  may_open
   inode_permission
ocfs2_permission
 ocfs2_inode_lock() <=== take PR
  generic_permission
   get_acl
ocfs2_iop_get_acl
 ocfs2_inode_lock() <=== take PR
2. fchmod|fchmodat
 chmod_common
  notify_change
   ocfs2_setattr <=== take EX
posix_acl_chmod
 get_acl
  ocfs2_iop_get_acl <=== take PR
 ocfs2_iop_set_acl <=== take EX

Fixes them by adding the tracking logic (in the previous patch) for
these funcs above, ocfs2_permission(), ocfs2_iop_[set|get]_acl(),
ocfs2_setattr().
As described cases above, shall we just add the tracking logic only for 
set/get_acl()?


Thanks,
Joseph


Signed-off-by: Eric Ren 
---
  fs/ocfs2/acl.c  | 39 ++-
  fs/ocfs2/file.c | 44 ++--
  2 files changed, 68 insertions(+), 15 deletions(-)

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index bed1fcb..c539890 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -284,16 +284,31 @@ int ocfs2_iop_set_acl(struct inode *inode, struct 
posix_acl *acl, int type)
  {
struct buffer_head *bh = NULL;
int status = 0;
-
-   status = ocfs2_inode_lock(inode, &bh, 1);
+   int arg_flags = 0, has_locked;
+   struct ocfs2_holder oh;
+   struct ocfs2_lock_res *lockres;
+
+   lockres = &OCFS2_I(inode)->ip_inode_lockres;
+   has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+   if (has_locked)
+   arg_flags = OCFS2_META_LOCK_GETBH;
+   status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
return status;
}
+   if (!has_locked)
+   ocfs2_add_holder(lockres, &oh);
+
status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL);
-   ocfs2_inode_unlock(inode, 1);
+
+   if (!has_locked) {
+   ocfs2_remove_holder(lockres, &oh);
+   ocfs2_inode_unlock(inode, 1);
+   }
brelse(bh);
+
return status;
  }
  
@@ -303,21 +318,35 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type)

struct buffer_head *di_bh = NULL;
struct posix_acl *acl;
int ret;
+   int arg_flags = 0, has_locked;
+   struct ocfs2_holder oh;
+   struct ocfs2_lock_res *lockres;
  
  	osb = OCFS2_SB(inode->i_sb);

if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return NULL;
-   ret = ocfs2_inode_lock(inode, &di_bh, 0);
+
+   lockres = &OCFS2_I(inode)->ip_inode_lockres;
+   has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+   if (has_locked)
+   arg_flags = OCFS2_META_LOCK_GETBH;
+   ret = ocfs2_inode_lock_full(inode, &di_bh, 0, arg_flags);
if (ret < 0) {
if (ret != -ENOENT)
mlog_errno(ret);
return ERR_PTR(ret);
}
+   if (!has_locked)
+   ocfs2_add_holder(lockres, &oh);
  
  	acl = ocfs2_get_acl_nolock(inode, type, di_bh);
  
-	ocfs2_inode_unlock(inode, 0);

+   if (!has_locked) {
+   ocfs2_remove_holder(lockres, &oh);
+   ocfs2_inode_unlock(inode, 0);
+   }
brelse(di_bh);
+
return acl;
  }
  
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c

index c488965..62be75d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1138,6 +1138,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr 
*attr)
handle_t *handle = NULL;
struct dquot *transfer_to[MAXQUOTAS] = { };
int qtype;
+   int arg_flags = 0, had_lock;
+   struct ocfs2_holder oh;
+   struct ocfs2_lock_res *lockres;
  
  	trace_ocfs2_setattr(inode, dentry,

(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -1173,13 +1176,20 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr 
*attr)
}
}
  
-	status = ocfs2_inode_lock(inode, &bh, 1);

+   lockres = &OCFS2_I(inode)->ip_inode_lockres;
+   had_lock = (ocfs2_is_locked_by_me(lockres) != NULL);
+   if (had_lock)
+   arg_flags = OCFS2_META_LOCK_GETBH;
+   status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
goto bail_unlo

[PATCH 2/2] ocfs2: fix deadlocks when taking inode lock at vfs entry points

2017-01-05 Thread Eric Ren
Commit 743b5f1434f5 ("ocfs2: take inode lock in ocfs2_iop_set/get_acl()")
results in a deadlock, as the author "Tariq Saeed" realized shortly
after the patch was merged. The discussion happened here
(https://oss.oracle.com/pipermail/ocfs2-devel/2015-September/011085.html).

The reason why taking cluster inode lock at vfs entry points opens up
a self deadlock window, is explained in the previous patch of this
series.

So far, we have seen two different code paths that have this issue.
1. do_sys_open
 may_open
  inode_permission
   ocfs2_permission
ocfs2_inode_lock() <=== take PR
 generic_permission
  get_acl
   ocfs2_iop_get_acl
ocfs2_inode_lock() <=== take PR
2. fchmod|fchmodat
chmod_common
 notify_change
  ocfs2_setattr <=== take EX
   posix_acl_chmod
get_acl
 ocfs2_iop_get_acl <=== take PR
ocfs2_iop_set_acl <=== take EX

Fixes them by adding the tracking logic (in the previous patch) for
these funcs above, ocfs2_permission(), ocfs2_iop_[set|get]_acl(),
ocfs2_setattr().

Signed-off-by: Eric Ren 
---
 fs/ocfs2/acl.c  | 39 ++-
 fs/ocfs2/file.c | 44 ++--
 2 files changed, 68 insertions(+), 15 deletions(-)

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index bed1fcb..c539890 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -284,16 +284,31 @@ int ocfs2_iop_set_acl(struct inode *inode, struct 
posix_acl *acl, int type)
 {
struct buffer_head *bh = NULL;
int status = 0;
-
-   status = ocfs2_inode_lock(inode, &bh, 1);
+   int arg_flags = 0, has_locked;
+   struct ocfs2_holder oh;
+   struct ocfs2_lock_res *lockres;
+
+   lockres = &OCFS2_I(inode)->ip_inode_lockres;
+   has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+   if (has_locked)
+   arg_flags = OCFS2_META_LOCK_GETBH;
+   status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
return status;
}
+   if (!has_locked)
+   ocfs2_add_holder(lockres, &oh);
+
status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL);
-   ocfs2_inode_unlock(inode, 1);
+
+   if (!has_locked) {
+   ocfs2_remove_holder(lockres, &oh);
+   ocfs2_inode_unlock(inode, 1);
+   }
brelse(bh);
+
return status;
 }
 
@@ -303,21 +318,35 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, 
int type)
struct buffer_head *di_bh = NULL;
struct posix_acl *acl;
int ret;
+   int arg_flags = 0, has_locked;
+   struct ocfs2_holder oh;
+   struct ocfs2_lock_res *lockres;
 
osb = OCFS2_SB(inode->i_sb);
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return NULL;
-   ret = ocfs2_inode_lock(inode, &di_bh, 0);
+
+   lockres = &OCFS2_I(inode)->ip_inode_lockres;
+   has_locked = (ocfs2_is_locked_by_me(lockres) != NULL);
+   if (has_locked)
+   arg_flags = OCFS2_META_LOCK_GETBH;
+   ret = ocfs2_inode_lock_full(inode, &di_bh, 0, arg_flags);
if (ret < 0) {
if (ret != -ENOENT)
mlog_errno(ret);
return ERR_PTR(ret);
}
+   if (!has_locked)
+   ocfs2_add_holder(lockres, &oh);
 
acl = ocfs2_get_acl_nolock(inode, type, di_bh);
 
-   ocfs2_inode_unlock(inode, 0);
+   if (!has_locked) {
+   ocfs2_remove_holder(lockres, &oh);
+   ocfs2_inode_unlock(inode, 0);
+   }
brelse(di_bh);
+
return acl;
 }
 
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index c488965..62be75d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1138,6 +1138,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr 
*attr)
handle_t *handle = NULL;
struct dquot *transfer_to[MAXQUOTAS] = { };
int qtype;
+   int arg_flags = 0, had_lock;
+   struct ocfs2_holder oh;
+   struct ocfs2_lock_res *lockres;
 
trace_ocfs2_setattr(inode, dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -1173,13 +1176,20 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr 
*attr)
}
}
 
-   status = ocfs2_inode_lock(inode, &bh, 1);
+   lockres = &OCFS2_I(inode)->ip_inode_lockres;
+   had_lock = (ocfs2_is_locked_by_me(lockres) != NULL);
+   if (had_lock)
+   arg_flags = OCFS2_META_LOCK_GETBH;
+   status = ocfs2_inode_lock_full(inode, &bh, 1, arg_flags);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
goto bail_unlock_rw;
}
-   inode_locked = 1;
+   if (!had_lock) {
+   ocfs2_add_holder(lockres, &oh);
+   inode_locked = 1;
+