On Mon, Jan 4, 2021 at 7:57 PM Dr. David Alan Gilbert
<[email protected]> wrote:
>
> * Vivek Goyal ([email protected]) wrote:
> > On Mon, Jan 04, 2021 at 04:00:13PM +0000, Dr. David Alan Gilbert wrote:
> > > Hi,
> > >   On virtio-fs we're hitting a problem with NFS, where
> > > unlinking a file in a directory and then rmdir'ing that
> > > directory fails complaining about the directory not being empty.
> > >
> > > The problem here is that if a file has an open fd, NFS doesn't
> > > actually delete the file on unlink, it just renames it to
> > > a hidden file (e.g. .nfs*******).  That open file is there because
> > > the 'FORGET' hasn't completed yet by the time the rmdir is issued.
> > >
> > > Question:
> > >   a) In the FUSE protocol, are requests assumed to complete in order;
> > > i.e.  unlink, forget, rmdir   is it required that 'forget' completes
> > > before the rmdir is processed?
> > >      (In virtiofs we've been processing requests, in parallel, and
> > > have sent forgets down a separate queue to keep them out of the way).
> > >
> > >   b) 'forget' doesn't send a reply - so the kernel can't wait for the
> > > client to have finished it;  do we need a synchronous forget here?
> >
> > Even if we introduce a synchronous forget, will that really fix the
> > issue. For example, this could also happen if file has been unlinked
> > but it is still open and directory is being removed.
> >
> > fd = open(foo/bar.txt)
> > unlink foo/bar.txt
> > rmdir foo
> > close(fd).
> >
> > In this case, final forget should go after fd has been closed. Its
> > not a forget race.
> >
> > I wrote a test case for this and it works on regular file systems.
> >
> > https://github.com/rhvgoyal/misc/blob/master/virtiofs-tests/rmdir.c
> >
> > I suspect it will fail on nfs because I am assuming that temporary
> > file will be there till final close(fd) happens. If that's the
> > case this is a NFS specific issue because its behavior is different
> > from other file systems.
>
> That's true; but that's NFS just being NFS; in our case we're keeping
> an fd open even though the guest has been smart enough not to; so we're
> causing the NFS oddity when it wouldn't normally happen.

Can't think of anything better than a synchronous forget.   Compile
only tested patch attached.

Thanks,
Miklos
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 78f9f209078c..daa4e669441d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -373,6 +373,26 @@ static struct vfsmount *fuse_dentry_automount(struct path *path)
 	return ERR_PTR(err);
 }
 
+static void fuse_dentry_iput(struct dentry *dentry, struct inode *inode)
+{
+	if (!__lockref_is_dead(&dentry->d_lockref)) {
+		/*
+		 * This is an unlink/rmdir and removing the last ref to the
+		 * dentry.  Use synchronous FORGET in case filesystem requests
+		 * it.
+		 *
+		 * FIXME: This is racy!  Two or more instances of
+		 * fuse_dentry_iput() could be running concurrently (unlink of
+		 * several aliases in different directories).
+		 */
+		set_bit(FUSE_I_SYNC_FORGET, &get_fuse_inode(inode)->state);
+		iput(inode);
+		clear_bit(FUSE_I_SYNC_FORGET, &get_fuse_inode(inode)->state);
+	} else {
+		iput(inode);
+	}
+}
+
 const struct dentry_operations fuse_dentry_operations = {
 	.d_revalidate	= fuse_dentry_revalidate,
 	.d_delete	= fuse_dentry_delete,
@@ -381,6 +401,7 @@ const struct dentry_operations fuse_dentry_operations = {
 	.d_release	= fuse_dentry_release,
 #endif
 	.d_automount	= fuse_dentry_automount,
+	.d_iput		= fuse_dentry_iput,
 };
 
 const struct dentry_operations fuse_root_dentry_operations = {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 7c4b8cb93f9f..0820b7a63ca7 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -174,6 +174,8 @@ enum {
 	FUSE_I_SIZE_UNSTABLE,
 	/* Bad inode */
 	FUSE_I_BAD,
+	/* Synchronous forget requested */
+	FUSE_I_SYNC_FORGET,
 };
 
 struct fuse_conn;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index b0e18b470e91..a49ff30d1ecc 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -115,6 +115,26 @@ static void fuse_free_inode(struct inode *inode)
 	kmem_cache_free(fuse_inode_cachep, fi);
 }
 
+static void fuse_sync_forget(struct inode *inode)
+{
+	struct fuse_mount *fm = get_fuse_mount(inode);
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_forget_in inarg;
+	FUSE_ARGS(args);
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.nlookup = fi->nlookup;
+	args.opcode = FUSE_SYNC_FORGET;
+	args.nodeid = fi->nodeid;
+	args.in_numargs = 1;
+	args.in_args[0].size = sizeof(inarg);
+	args.in_args[0].value = &inarg;
+	args.force = true;
+
+	fuse_simple_request(fm, &args);
+	/* ignore errors */
+}
+
 static void fuse_evict_inode(struct inode *inode)
 {
 	struct fuse_inode *fi = get_fuse_inode(inode);
@@ -127,9 +147,13 @@ static void fuse_evict_inode(struct inode *inode)
 		if (FUSE_IS_DAX(inode))
 			fuse_dax_inode_cleanup(inode);
 		if (fi->nlookup) {
-			fuse_queue_forget(fc, fi->forget, fi->nodeid,
-					  fi->nlookup);
-			fi->forget = NULL;
+			if (test_bit(FUSE_I_SYNC_FORGET, &fi->state)) {
+				fuse_sync_forget(inode);
+			} else {
+				fuse_queue_forget(fc, fi->forget, fi->nodeid,
+						  fi->nlookup);
+				fi->forget = NULL;
+			}
 		}
 	}
 	if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 98ca64d1beb6..cfcf95cfde76 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -499,6 +499,7 @@ enum fuse_opcode {
 	FUSE_COPY_FILE_RANGE	= 47,
 	FUSE_SETUPMAPPING	= 48,
 	FUSE_REMOVEMAPPING	= 49,
+	FUSE_SYNC_FORGET	= 50,
 
 	/* CUSE specific operations */
 	CUSE_INIT		= 4096,
_______________________________________________
Virtio-fs mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/virtio-fs

Reply via email to