From: NeilBrown <[email protected]>

d_alloc_parallel() can block waiting on a d_in_lookup() dentry
so it is important to order it consistently with other blocking locks
such as inode_lock().

Currenty d_alloc_parallel() is ordered after inode_lock(): it can be
called while the inode is locked, and so the inode cannot be locked
while a d_in_lookup() dentry is held.

This patch reverses that order.  d_alloc_parallel() must now be called
*before* locking the directory, and must not be called afterwards.  This
allows directory locking to be moved closer to the filesystem
operations, and ultimately into those operations.

lookup_one_qstr_excl() is now called without an lock held, exclusive or
otherwise, so the "_excl" is dropped - it is now lookup_one_qstr().

As a lock is taken *after* lookup, start_dirop() and start_renaming()
must ensure that if the dentry isn't d_in_lookup() that after the lock
is taken the parent is still correct and the dentry is still hashed.

lookup_one_qstr() and lookup_slow() don't need to re-check the parent as
the dentry is always d_in_lookup() so parent cannot change.

The locking in lookup_slow() is moved into __lookup_slow() immediately
before/after ->lookup, and lookup_slow() just sets the task state for
waiting.

Parent locking is removed from open_last_lookups() and performed in
lookup_open().  A shared lock is taken if ->lookup() needs to be called.
An exclusive lock is taken separately if ->create() needs to be called -
with checks that the dentry hasn't become positive.

If ->atomic_open is needed we take exclusive or shared parent lock as
appropriate and check for a positive dentry or DEAD parent.

The fsnotify_create() call is kept inside the locked region in
lookup_open().  I don't know if this is important.

Signed-off-by: NeilBrown <[email protected]>
---
 fs/namei.c | 239 ++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 154 insertions(+), 85 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index bba419f2fc53..3d213070a515 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1773,8 +1773,19 @@ static struct dentry *lookup_dcache(const struct qstr 
*name,
        return dentry;
 }
 
+static inline bool inode_lock_shared_state(struct inode *inode, unsigned int 
state)
+{
+       if (state == TASK_KILLABLE) {
+               if (down_read_killable(&inode->i_rwsem) != 0) {
+                       return false;
+               }
+       } else {
+               inode_lock_shared(inode);
+       }
+       return true;
+}
+
 /*
- * Parent directory has inode locked.
  * If Lookup_EXCL or LOOKUP_RENAME_TARGET is set
  * d_lookup_done() must be called before the dentry is dput()
  * If the dentry is not d_in_lookup():
@@ -1783,8 +1794,9 @@ static struct dentry *lookup_dcache(const struct qstr 
*name,
  * If it is d_in_lookup() then these conditions can only be checked by the
  * file system when carrying out the intent (create or rename).
  */
-static struct dentry *lookup_one_qstr_excl(const struct qstr *name,
-                                          struct dentry *base, unsigned int 
flags)
+static struct dentry *lookup_one_qstr(const struct qstr *name,
+                                     struct dentry *base, unsigned int flags,
+                                     unsigned int state)
 {
        struct dentry *dentry;
        struct dentry *old;
@@ -1806,7 +1818,16 @@ static struct dentry *lookup_one_qstr_excl(const struct 
qstr *name,
                /* Raced with another thread which did the lookup */
                goto found;
 
-       old = dir->i_op->lookup(dir, dentry, flags);
+       if (!inode_lock_shared_state(dir, state)) {
+               d_lookup_done(dentry);
+               dput(dentry);
+               return ERR_PTR(-EINTR);
+       }
+       if (unlikely(IS_DEADDIR(dir)))
+               old = ERR_PTR(-ENOENT);
+       else
+               old = dir->i_op->lookup(dir, dentry, flags | LOOKUP_SHARED);
+       inode_unlock_shared(dir);
        if (unlikely(old)) {
                d_lookup_done(dentry);
                dput(dentry);
@@ -1897,7 +1918,8 @@ static struct dentry *lookup_fast(struct nameidata *nd)
 /* Fast lookup failed, do it the slow way */
 static struct dentry *__lookup_slow(const struct qstr *name,
                                    struct dentry *dir,
-                                   unsigned int flags)
+                                   unsigned int flags,
+                                   unsigned int state)
 {
        struct dentry *dentry, *old;
        struct inode *inode = dir->d_inode;
@@ -1920,8 +1942,17 @@ static struct dentry *__lookup_slow(const struct qstr 
*name,
                        dput(dentry);
                        dentry = ERR_PTR(error);
                }
+       } else if (!inode_lock_shared_state(inode, state)) {
+               d_lookup_done(dentry);
+               dput(dentry);
+               return ERR_PTR(-EINTR);
        } else {
-               old = inode->i_op->lookup(inode, dentry, flags);
+               if (unlikely(IS_DEADDIR(inode)))
+                       old = ERR_PTR(-ENOENT);
+               else
+                       old = inode->i_op->lookup(inode, dentry,
+                                                 flags | LOOKUP_SHARED);
+               inode_unlock_shared(inode);
                d_lookup_done(dentry);
                if (unlikely(old)) {
                        dput(dentry);
@@ -1935,26 +1966,14 @@ static noinline struct dentry *lookup_slow(const struct 
qstr *name,
                                  struct dentry *dir,
                                  unsigned int flags)
 {
-       struct inode *inode = dir->d_inode;
-       struct dentry *res;
-       inode_lock_shared(inode);
-       res = __lookup_slow(name, dir, flags | LOOKUP_SHARED);
-       inode_unlock_shared(inode);
-       return res;
+       return __lookup_slow(name, dir, flags | LOOKUP_SHARED, TASK_NORMAL);
 }
 
 static struct dentry *lookup_slow_killable(const struct qstr *name,
                                           struct dentry *dir,
                                           unsigned int flags)
 {
-       struct inode *inode = dir->d_inode;
-       struct dentry *res;
-
-       if (inode_lock_shared_killable(inode))
-               return ERR_PTR(-EINTR);
-       res = __lookup_slow(name, dir, flags | LOOKUP_SHARED);
-       inode_unlock_shared(inode);
-       return res;
+       return __lookup_slow(name, dir, flags | LOOKUP_SHARED, TASK_KILLABLE);
 }
 
 static inline int may_lookup(struct mnt_idmap *idmap,
@@ -2908,18 +2927,26 @@ static struct dentry *__start_dirop(struct dentry 
*parent, struct qstr *name,
        struct dentry *dentry;
        struct inode *dir = d_inode(parent);
 
-       if (state == TASK_KILLABLE) {
-               int ret = down_write_killable_nested(&dir->i_rwsem,
-                                                    I_MUTEX_PARENT);
-               if (ret)
-                       return ERR_PTR(ret);
-       } else {
-               inode_lock_nested(dir, I_MUTEX_PARENT);
-       }
-       dentry = lookup_one_qstr_excl(name, parent, lookup_flags);
-       if (IS_ERR(dentry))
+       while(1) {
+               dentry = lookup_one_qstr(name, parent, lookup_flags, state);
+               if (IS_ERR(dentry))
+                       return dentry;
+               if (state == TASK_KILLABLE) {
+                       if (down_write_killable_nested(&dir->i_rwsem, 
I_MUTEX_PARENT) != 0) {
+                               d_lookup_done(dentry);
+                               dput(dentry);
+                               return ERR_PTR(-EINTR);
+                       }
+               } else {
+                       inode_lock_nested(dir, I_MUTEX_PARENT);
+               }
+               if (d_in_lookup(dentry) ||
+                   (!d_unhashed(dentry) && dentry->d_parent == parent))
+                       return dentry;
                inode_unlock(dir);
-       return dentry;
+               d_lookup_done(dentry);
+               dput(dentry);
+       }
 }
 
 /**
@@ -3830,26 +3857,37 @@ __start_renaming(struct renamedata *rd, int 
lookup_flags,
        if (rd->flags & RENAME_NOREPLACE)
                target_flags |= LOOKUP_EXCL;
 
-       trap = lock_rename(rd->old_parent, rd->new_parent);
-       if (IS_ERR(trap))
-               return PTR_ERR(trap);
-
-       d1 = lookup_one_qstr_excl(old_last, rd->old_parent,
-                                 lookup_flags);
+retry:
+       d1 = lookup_one_qstr(old_last, rd->old_parent,
+                            lookup_flags, TASK_NORMAL);
        err = PTR_ERR(d1);
        if (IS_ERR(d1))
-               goto out_unlock;
+               goto out_err;
 
-       d2 = lookup_one_qstr_excl(new_last, rd->new_parent,
-                                 lookup_flags | target_flags);
+       d2 = lookup_one_qstr(new_last, rd->new_parent,
+                            lookup_flags | target_flags, TASK_NORMAL);
        err = PTR_ERR(d2);
        if (IS_ERR(d2))
                goto out_dput_d1;
 
+       trap = lock_rename(rd->old_parent, rd->new_parent);
+       err = PTR_ERR(trap);
+       if (IS_ERR(trap))
+               goto out_unlock;
+
+       if (unlikely((!d_in_lookup(d1) && d_unhashed(d1)) || d1->d_parent != 
rd->old_parent ||
+                    (!d_in_lookup(d2) && d_unhashed(d2)) || d2->d_parent != 
rd->new_parent)) {
+               unlock_rename(rd->old_parent, rd->new_parent);
+               d_lookup_done(d1); dput(d1);
+               d_lookup_done(d2); dput(d2);
+               dput(trap);
+               goto retry;
+       }
+
        if (d1 == trap) {
                /* source is an ancestor of target */
                err = -EINVAL;
-               goto out_dput_d2;
+               goto out_unlock;
        }
 
        if (d2 == trap) {
@@ -3858,7 +3896,7 @@ __start_renaming(struct renamedata *rd, int lookup_flags,
                        err = -EINVAL;
                else
                        err = -ENOTEMPTY;
-               goto out_dput_d2;
+               goto out_unlock;
        }
 
        rd->old_dentry = d1;
@@ -3866,14 +3904,14 @@ __start_renaming(struct renamedata *rd, int 
lookup_flags,
        dget(rd->old_parent);
        return 0;
 
-out_dput_d2:
+out_unlock:
+       unlock_rename(rd->old_parent, rd->new_parent);
        d_lookup_done(d2);
        dput(d2);
 out_dput_d1:
        d_lookup_done(d1);
        dput(d1);
-out_unlock:
-       unlock_rename(rd->old_parent, rd->new_parent);
+out_err:
        return err;
 }
 
@@ -3927,10 +3965,22 @@ __start_renaming_dentry(struct renamedata *rd, int 
lookup_flags,
        if (rd->flags & RENAME_NOREPLACE)
                target_flags |= LOOKUP_EXCL;
 
-       /* Already have the dentry - need to be sure to lock the correct parent 
*/
+retry:
+       d2 = lookup_one_qstr(new_last, rd->new_parent,
+                            lookup_flags | target_flags, TASK_NORMAL);
+       err = PTR_ERR(d2);
+       if (IS_ERR(d2))
+               goto out_unlock;
+
+       /*
+        * Already have the old_dentry - need to be sure to lock
+        * the correct parent
+        */
        trap = lock_rename_child(old_dentry, rd->new_parent);
+       err = PTR_ERR(trap);
        if (IS_ERR(trap))
-               return PTR_ERR(trap);
+               goto out_dput_d2;
+
        if (d_unhashed(old_dentry) ||
            (rd->old_parent && rd->old_parent != old_dentry->d_parent)) {
                /* dentry was removed, or moved and explicit parent requested */
@@ -3938,16 +3988,19 @@ __start_renaming_dentry(struct renamedata *rd, int 
lookup_flags,
                goto out_unlock;
        }
 
-       d2 = lookup_one_qstr_excl(new_last, rd->new_parent,
-                                 lookup_flags | target_flags);
-       err = PTR_ERR(d2);
-       if (IS_ERR(d2))
-               goto out_unlock;
+       if (unlikely((!d_in_lookup(d2) && d_unhashed(d2)) ||
+                    d2->d_parent != rd->new_parent)) {
+               /* d2 was moved/removed before lock - repeat lookup */
+               unlock_rename(old_dentry->d_parent, rd->new_parent);
+               d_lookup_done(d2); dput(d2);
+               dput(trap);
+               goto retry;
+       }
 
        if (old_dentry == trap) {
                /* source is an ancestor of target */
                err = -EINVAL;
-               goto out_dput_d2;
+               goto out_unlock;
        }
 
        if (d2 == trap) {
@@ -3956,7 +4009,7 @@ __start_renaming_dentry(struct renamedata *rd, int 
lookup_flags,
                        err = -EINVAL;
                else
                        err = -ENOTEMPTY;
-               goto out_dput_d2;
+               goto out_unlock;
        }
 
        rd->old_dentry = dget(old_dentry);
@@ -3964,11 +4017,11 @@ __start_renaming_dentry(struct renamedata *rd, int 
lookup_flags,
        rd->old_parent = dget(old_dentry->d_parent);
        return 0;
 
+out_unlock:
+       unlock_rename(old_dentry->d_parent, rd->new_parent);
 out_dput_d2:
        d_lookup_done(d2);
        dput(d2);
-out_unlock:
-       unlock_rename(old_dentry->d_parent, rd->new_parent);
        return err;
 }
 
@@ -4319,8 +4372,19 @@ static struct dentry *atomic_open(const struct path 
*path, struct dentry *dentry
 
        file->__f_path.dentry = DENTRY_NOT_SET;
        file->__f_path.mnt = path->mnt;
-       error = dir->i_op->atomic_open(dir, dentry, file,
-                                      open_to_namei_flags(open_flag), mode);
+
+       if (open_flag & O_CREAT)
+               inode_lock(dir);
+       else
+               inode_lock_shared(dir);
+       if (dentry->d_inode)
+               error = finish_no_open(file, NULL);
+       else if (unlikely(IS_DEADDIR(dir)))
+               error = -ENOENT;
+       else
+               error = dir->i_op->atomic_open(dir, dentry, file,
+                                              open_to_namei_flags(open_flag),
+                                              mode);
        d_lookup_done(dentry);
        if (!error) {
                if (file->f_mode & FMODE_OPENED) {
@@ -4339,6 +4403,13 @@ static struct dentry *atomic_open(const struct path 
*path, struct dentry *dentry
                                error = -ENOENT;
                }
        }
+       if (!error && (file->f_mode & FMODE_CREATED))
+               fsnotify_create(dir, dentry);
+       if (open_flag & O_CREAT)
+               inode_unlock(dir);
+       else
+               inode_unlock_shared(dir);
+
        if (error) {
                dput(dentry);
                dentry = ERR_PTR(error);
@@ -4372,10 +4443,6 @@ static struct dentry *lookup_open(struct nameidata *nd, 
struct file *file,
        struct dentry *dentry;
        int error, create_error = 0;
        umode_t mode = op->mode;
-       unsigned int shared_flag = (op->open_flag & O_CREAT) ? 0 : 
LOOKUP_SHARED;
-
-       if (unlikely(IS_DEADDIR(dir_inode)))
-               return ERR_PTR(-ENOENT);
 
        file->f_mode &= ~FMODE_CREATED;
        dentry = d_lookup(dir, &nd->last);
@@ -4420,7 +4487,7 @@ static struct dentry *lookup_open(struct nameidata *nd, 
struct file *file,
        if (open_flag & O_CREAT) {
                if (open_flag & O_EXCL)
                        open_flag &= ~O_TRUNC;
-               mode = vfs_prepare_mode(idmap, dir->d_inode, mode, mode, mode);
+               mode = vfs_prepare_mode(idmap, dir_inode, mode, mode, mode);
                if (likely(got_write))
                        create_error = may_o_create(idmap, &nd->path,
                                                    dentry, mode);
@@ -4439,8 +4506,15 @@ static struct dentry *lookup_open(struct nameidata *nd, 
struct file *file,
        }
 
        if (d_in_lookup(dentry)) {
-               struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry,
-                                                            nd->flags | 
shared_flag);
+               struct dentry *res;
+
+               inode_lock_shared(dir_inode);
+               if (IS_DEADDIR(dir_inode))
+                       res = ERR_PTR(-ENOENT);
+               else
+                       res = dir_inode->i_op->lookup(dir_inode, dentry,
+                                                     nd->flags | 
LOOKUP_SHARED);
+               inode_unlock_shared(dir_inode);
                d_lookup_done(dentry);
                if (unlikely(res)) {
                        if (IS_ERR(res)) {
@@ -4459,15 +4533,22 @@ static struct dentry *lookup_open(struct nameidata *nd, 
struct file *file,
                if (error)
                        goto out_dput;
 
-               file->f_mode |= FMODE_CREATED;
-               audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
-               if (!dir_inode->i_op->create) {
-                       error = -EACCES;
-                       goto out_dput;
-               }
+               inode_lock(dir_inode);
+               if (!dentry->d_inode && !unlikely(IS_DEADDIR(dir_inode))) {
+                       file->f_mode |= FMODE_CREATED;
+                       audit_inode_child(dir_inode, dentry, 
AUDIT_TYPE_CHILD_CREATE);
+                       if (!dir_inode->i_op->create) {
+                               error = -EACCES;
+                               goto out_dput;
+                       }
 
-               error = dir_inode->i_op->create(idmap, dir_inode, dentry,
-                                               mode, open_flag & O_EXCL);
+                       error = dir_inode->i_op->create(idmap, dir_inode, 
dentry,
+                                                       mode, open_flag & 
O_EXCL);
+                       if (!error)
+                               fsnotify_create(dir_inode, dentry);
+               } else if (!dentry->d_inode)
+                       error = -ENOENT;
+               inode_unlock(dir_inode);
                if (error)
                        goto out_dput;
        }
@@ -4522,7 +4603,6 @@ static const char *open_last_lookups(struct nameidata *nd,
                   struct file *file, const struct open_flags *op)
 {
        struct delegated_inode delegated_inode = { };
-       struct dentry *dir = nd->path.dentry;
        int open_flag = op->open_flag;
        bool got_write = false;
        struct dentry *dentry;
@@ -4562,22 +4642,11 @@ static const char *open_last_lookups(struct nameidata 
*nd,
                 * dropping this one anyway.
                 */
        }
-       if (open_flag & O_CREAT)
-               inode_lock(dir->d_inode);
-       else
-               inode_lock_shared(dir->d_inode);
        dentry = lookup_open(nd, file, op, got_write, &delegated_inode);
        if (!IS_ERR(dentry)) {
-               if (file->f_mode & FMODE_CREATED)
-                       fsnotify_create(dir->d_inode, dentry);
                if (file->f_mode & FMODE_OPENED)
                        fsnotify_open(file);
        }
-       if (open_flag & O_CREAT)
-               inode_unlock(dir->d_inode);
-       else
-               inode_unlock_shared(dir->d_inode);
-
        if (got_write)
                mnt_drop_write(nd->path.mnt);
 
-- 
2.50.0.107.gf914562f5916.dirty


Reply via email to