From: NeilBrown <[email protected]>

cephfs uses the results of readdir to prime the dcache.  Using d_alloc()
is no longer safe, even with an exclusive lock on the parent, as
d_alloc_parallel() will be allowed to run unlocked.  The safe interface
is d_alloc_noblock().  In the rare case that this blocks because there
is a concurrent lookup for the same name there is little cost in not
completing the allocating in the directory code.

It it still possible to create an inode at this point so we do that even
when there is no dentry.

So change to use d_alloc_noblock() and handle -EWOULDBLOCK.  Also use
QSTR_LEN() to initialise dname, and try_lookup_noperm instead of
full_name_hash() and d_lookup().

Signed-off-by: NeilBrown <[email protected]>
---
 fs/ceph/inode.c | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 0982fbda2a82..8557b207d337 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2011,9 +2011,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
                struct ceph_vino tvino;
 
-               dname.name = rde->name;
-               dname.len = rde->name_len;
-               dname.hash = full_name_hash(parent, dname.name, dname.len);
+               dname = QSTR_LEN(rde->name, rde->name_len);
 
                tvino.ino = le64_to_cpu(rde->inode.in->ino);
                tvino.snap = le64_to_cpu(rde->inode.in->snapid);
@@ -2029,20 +2027,24 @@ int ceph_readdir_prepopulate(struct ceph_mds_request 
*req,
                }
 
 retry_lookup:
-               dn = d_lookup(parent, &dname);
+               dn = try_lookup_noperm(&dname, parent);
                doutc(cl, "d_lookup on parent=%p name=%.*s got %p\n",
                      parent, dname.len, dname.name, dn);
-
-               if (!dn) {
-                       dn = d_alloc(parent, &dname);
-                       doutc(cl, "d_alloc %p '%.*s' = %p\n", parent,
+               if (IS_ERR(dn)) {
+                       err = PTR_ERR(dn);
+                       goto out;
+               } else if (!dn) {
+                       dn = d_alloc_noblock(parent, &dname);
+                       doutc(cl, "d_alloc_noblock %p '%.*s' = %p\n", parent,
                              dname.len, dname.name, dn);
-                       if (!dn) {
-                               doutc(cl, "d_alloc badness\n");
-                               err = -ENOMEM;
+                       if (dn == ERR_PTR(-EWOULDBLOCK)) {
+                               /* Just handle the inode info */
+                               dn = NULL;
+                       } else if (IS_ERR(dn)) {
+                               doutc(cl, "d_alloc_noblock badness\n");
+                               err = PTR_ERR(dn);
                                goto out;
-                       }
-                       if (rde->is_nokey) {
+                       } else if (rde->is_nokey) {
                                spin_lock(&dn->d_lock);
                                dn->d_flags |= DCACHE_NOKEY_NAME;
                                spin_unlock(&dn->d_lock);
@@ -2069,7 +2071,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                }
 
                /* inode */
-               if (d_really_is_positive(dn)) {
+               if (dn && d_really_is_positive(dn)) {
                        in = d_inode(dn);
                } else {
                        in = ceph_get_inode(parent->d_sb, tvino, NULL);
@@ -2087,21 +2089,22 @@ int ceph_readdir_prepopulate(struct ceph_mds_request 
*req,
                if (ret < 0) {
                        pr_err_client(cl, "badness on %p %llx.%llx\n", in,
                                      ceph_vinop(in));
-                       if (d_really_is_negative(dn)) {
+                       if (!dn || d_really_is_negative(dn)) {
                                if (inode_state_read_once(in) & I_NEW) {
                                        ihold(in);
                                        discard_new_inode(in);
                                }
                                iput(in);
                        }
-                       d_drop(dn);
+                       if (dn)
+                               d_drop(dn);
                        err = ret;
                        goto next_item;
                }
                if (inode_state_read_once(in) & I_NEW)
                        unlock_new_inode(in);
 
-               if (d_really_is_negative(dn)) {
+               if (d_in_lookup(dn) || d_really_is_negative(dn)) {
                        if (ceph_security_xattr_deadlock(in)) {
                                doutc(cl, " skip splicing dn %p to inode %p"
                                      " (security xattr deadlock)\n", dn, in);
-- 
2.50.0.107.gf914562f5916.dirty


Reply via email to