On Wed, Jan 6, 2021 at 10:16 AM Amir Goldstein <[email protected]> wrote:

> Please note that NFS doesn't do "silly rename" for directories,
> so mitigation is mostly needed for non-dir.

Okay.

> An alternative method if daemon is not capable, is to store parent dirfd
> in addition to filehandle and implement open_child_by_handle_at(int
> parent_fd, ...):
> - readdir(parend_fd)
> - search a match for d_ino
> - name_to_handle_at() and verify match to stored filehandle
>
> This is essentially what open_by_handle_at(2) does under the covers
> with a "connectable" non-dir filehandle after having resolved the
> parent file handle part. And "connectable" file handles are used by nfsd
> to enforce "subtree_check" to make sure that file wasn't moved outside
> obtainable path after initial lookup.

Yes, sort of makes sense, but will have corner cases for hard links in
different directories, open files after unlink, etc..

Also back to the original problem: what we really want is to close the
O_PATH descriptor on unlink().  This should be possible, regardless of
any FORGET, assuming

1) no open files exist that reference the inode
2) no aliases have been looked up (i.e. just one cached dentry)

The attached untested patch tries to do this.

Thanks,
Miklos
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index ec1008bceba8..d9c03e87e57d 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -107,6 +107,11 @@ struct lo_inode {
      */
     gint refcount;
 
+    /*
+     * Number of open instances
+     */
+    gint opencount;
+
     struct lo_key key;
 
     /*
@@ -901,6 +906,18 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
 
     inode = lo_find(lo, &e->attr, mnt_id);
     if (inode) {
+        char buf1[PATH_MAX + 1], buf2[PATH_MAX + 1], procname[64];
+        ssize_t siz1, siz2;
+
+        sprintf(procname, "%i", inode->fd);
+        siz1 = readlinkat(lo->proc_self_fd, procname, buf1, sizeof(buf1));
+        sprintf(procname, "%i", newfd);
+        siz2 = readlinkat(lo->proc_self_fd, procname, buf2, sizeof(buf2));
+
+        /* disable close on unlink if alias is detected */
+        if (siz1 != siz2 || memcmp(buf1, buf2, siz1))
+            g_atomic_int_inc(&inode->opencount);
+
         close(newfd);
     } else {
         inode = calloc(1, sizeof(struct lo_inode));
@@ -917,6 +934,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
          */
         g_atomic_int_set(&inode->refcount, 2);
 
+        g_atomic_int_set(&inode->opencount, 0);
         inode->nlookup = 1;
         inode->fd = newfd;
         inode->key.ino = e->attr.st_ino;
@@ -1295,6 +1313,10 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
     res = unlinkat(lo_fd(req, parent), name, 0);
 
     fuse_reply_err(req, res == -1 ? errno : 0);
+    if (!g_atomic_int_get(&inode->opencount)) {
+        close(inode->fd);
+        inode->fd = -1;
+    }
     unref_inode_lolocked(lo, inode, 1);
     lo_inode_put(lo, &inode);
 }
@@ -1904,25 +1926,30 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
     ssize_t fh;
     char buf[64];
     struct lo_data *lo = lo_data(req);
+    struct lo_inode *inode = lo_inode(req, ino);
+    int err = EBADF;
+
+    if (!inode)
+        goto out_err;
 
     fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino,
              fi->flags);
 
     update_open_flags(lo->writeback, lo->allow_direct_io, fi);
 
-    sprintf(buf, "%i", lo_fd(req, ino));
+    sprintf(buf, "%i", inode->fd);
     fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
-    if (fd == -1) {
-        return (void)fuse_reply_err(req, errno);
-    }
+    err = errno;
+    if (fd == -1)
+        goto out_err;
 
     pthread_mutex_lock(&lo->mutex);
     fh = lo_add_fd_mapping(req, fd);
     pthread_mutex_unlock(&lo->mutex);
     if (fh == -1) {
         close(fd);
-        fuse_reply_err(req, ENOMEM);
-        return;
+        err = ENOMEM;
+        goto out_err;
     }
 
     fi->fh = fh;
@@ -1931,18 +1958,26 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
     } else if (lo->cache == CACHE_ALWAYS) {
         fi->keep_cache = 1;
     }
+    g_atomic_int_inc(&inode->opencount);
+    lo_inode_put(lo, &inode);
     fuse_reply_open(req, fi);
+    return;
+
+out_err:
+    lo_inode_put(lo, &inode);
+    fuse_reply_err(req, err);
 }
 
 static void lo_release(fuse_req_t req, fuse_ino_t ino,
                        struct fuse_file_info *fi)
 {
     struct lo_data *lo = lo_data(req);
+    struct lo_inode *inode = lo_inode(req, ino);
     struct lo_map_elem *elem;
     int fd = -1;
 
-    (void)ino;
-
+    if (inode)
+        g_atomic_int_dec_and_test(&inode->opencount);
     pthread_mutex_lock(&lo->mutex);
     elem = lo_map_get(&lo->fd_map, fi->fh);
     if (elem) {
@@ -1951,6 +1986,7 @@ static void lo_release(fuse_req_t req, fuse_ino_t ino,
         lo_map_remove(&lo->fd_map, fi->fh);
     }
     pthread_mutex_unlock(&lo->mutex);
+    lo_inode_put(lo, &inode);
 
     close(fd);
     fuse_reply_err(req, 0);
_______________________________________________
Virtio-fs mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/virtio-fs

Reply via email to