Re: [PATCH v3 07/10] virtiofsd: Add lo_inode.fhandle

2021-08-09 Thread Hanna Reitz

On 09.08.21 17:21, Vivek Goyal wrote:

On Fri, Jul 30, 2021 at 05:01:31PM +0200, Max Reitz wrote:

This new field is an alternative to lo_inode.fd: Either of the two must
be set.  In case an O_PATH FD is needed for some lo_inode, it is either
taken from lo_inode.fd, if valid, or a temporary FD is opened with
open_by_handle_at().

Using a file handle instead of an FD has the advantage of keeping the
number of open file descriptors low.

Because open_by_handle_at() requires a mount FD (i.e. a non-O_PATH FD
opened on the filesystem to which the file handle refers), but every
lo_fhandle only has a mount ID (as returned by name_to_handle_at()), we
keep a hash map of such FDs in mount_fds (mapping ID to FD).
get_file_handle(), which is added by a later patch, will ensure that
every mount ID for which we have generated a handle has a corresponding
entry in mount_fds.

Signed-off-by: Max Reitz 
Reviewed-by: Connor Kuehl 
---
  tools/virtiofsd/passthrough_ll.c  | 116 ++
  tools/virtiofsd/passthrough_seccomp.c |   1 +
  2 files changed, 102 insertions(+), 15 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 292b7f7e27..487448d666 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -88,8 +88,25 @@ struct lo_key {
  uint64_t mnt_id;
  };
  
+struct lo_fhandle {

+union {
+struct file_handle handle;
+char padding[sizeof(struct file_handle) + MAX_HANDLE_SZ];
+};
+int mount_id;
+};
+
+/* Maps mount IDs to an FD that we can pass to open_by_handle_at() */
+static GHashTable *mount_fds;
+pthread_rwlock_t mount_fds_lock = PTHREAD_RWLOCK_INITIALIZER;
+

How about if we move this hash table inside "struct lo_data". That seems to
be one global data structure keeping all the info. Also it can be
cleaned up during lo_destroy().


Yes, sounds good and right, will do.

Hanna




Re: [PATCH v3 07/10] virtiofsd: Add lo_inode.fhandle

2021-08-09 Thread Vivek Goyal
On Fri, Jul 30, 2021 at 05:01:31PM +0200, Max Reitz wrote:
> This new field is an alternative to lo_inode.fd: Either of the two must
> be set.  In case an O_PATH FD is needed for some lo_inode, it is either
> taken from lo_inode.fd, if valid, or a temporary FD is opened with
> open_by_handle_at().
> 
> Using a file handle instead of an FD has the advantage of keeping the
> number of open file descriptors low.
> 
> Because open_by_handle_at() requires a mount FD (i.e. a non-O_PATH FD
> opened on the filesystem to which the file handle refers), but every
> lo_fhandle only has a mount ID (as returned by name_to_handle_at()), we
> keep a hash map of such FDs in mount_fds (mapping ID to FD).
> get_file_handle(), which is added by a later patch, will ensure that
> every mount ID for which we have generated a handle has a corresponding
> entry in mount_fds.
> 
> Signed-off-by: Max Reitz 
> Reviewed-by: Connor Kuehl 
> ---
>  tools/virtiofsd/passthrough_ll.c  | 116 ++
>  tools/virtiofsd/passthrough_seccomp.c |   1 +
>  2 files changed, 102 insertions(+), 15 deletions(-)
> 
> diff --git a/tools/virtiofsd/passthrough_ll.c 
> b/tools/virtiofsd/passthrough_ll.c
> index 292b7f7e27..487448d666 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -88,8 +88,25 @@ struct lo_key {
>  uint64_t mnt_id;
>  };
>  
> +struct lo_fhandle {
> +union {
> +struct file_handle handle;
> +char padding[sizeof(struct file_handle) + MAX_HANDLE_SZ];
> +};
> +int mount_id;
> +};
> +
> +/* Maps mount IDs to an FD that we can pass to open_by_handle_at() */
> +static GHashTable *mount_fds;
> +pthread_rwlock_t mount_fds_lock = PTHREAD_RWLOCK_INITIALIZER;
> +

How about if we move this hash table inside "struct lo_data". That seems to
be one global data structure keeping all the info. Also it can be
cleaned up during lo_destroy().

Thanks
Vivek




[PATCH v3 07/10] virtiofsd: Add lo_inode.fhandle

2021-07-30 Thread Max Reitz
This new field is an alternative to lo_inode.fd: Either of the two must
be set.  In case an O_PATH FD is needed for some lo_inode, it is either
taken from lo_inode.fd, if valid, or a temporary FD is opened with
open_by_handle_at().

Using a file handle instead of an FD has the advantage of keeping the
number of open file descriptors low.

Because open_by_handle_at() requires a mount FD (i.e. a non-O_PATH FD
opened on the filesystem to which the file handle refers), but every
lo_fhandle only has a mount ID (as returned by name_to_handle_at()), we
keep a hash map of such FDs in mount_fds (mapping ID to FD).
get_file_handle(), which is added by a later patch, will ensure that
every mount ID for which we have generated a handle has a corresponding
entry in mount_fds.

Signed-off-by: Max Reitz 
Reviewed-by: Connor Kuehl 
---
 tools/virtiofsd/passthrough_ll.c  | 116 ++
 tools/virtiofsd/passthrough_seccomp.c |   1 +
 2 files changed, 102 insertions(+), 15 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 292b7f7e27..487448d666 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -88,8 +88,25 @@ struct lo_key {
 uint64_t mnt_id;
 };
 
+struct lo_fhandle {
+union {
+struct file_handle handle;
+char padding[sizeof(struct file_handle) + MAX_HANDLE_SZ];
+};
+int mount_id;
+};
+
+/* Maps mount IDs to an FD that we can pass to open_by_handle_at() */
+static GHashTable *mount_fds;
+pthread_rwlock_t mount_fds_lock = PTHREAD_RWLOCK_INITIALIZER;
+
 struct lo_inode {
+/*
+ * Either of fd or fhandle must be set (i.e. >= 0 or non-NULL,
+ * respectively).
+ */
 int fd;
+struct lo_fhandle *fhandle;
 
 /*
  * Atomic reference count for this object.  The nlookup field holds a
@@ -302,6 +319,44 @@ static int temp_fd_steal(TempFd *temp_fd)
 }
 }
 
+/**
+ * Open the given file handle with the given flags.
+ *
+ * The mount FD to pass to open_by_handle_at() is taken from the
+ * mount_fds hash map.
+ *
+ * On error, return -errno.
+ */
+static int open_file_handle(const struct lo_fhandle *fh, int flags)
+{
+gpointer mount_fd_ptr;
+int mount_fd;
+bool found;
+int ret;
+
+ret = pthread_rwlock_rdlock(_fds_lock);
+if (ret) {
+return -ret;
+}
+
+/* mount_fd == 0 is valid, so we need lookup_extended */
+found = g_hash_table_lookup_extended(mount_fds,
+ GINT_TO_POINTER(fh->mount_id),
+ NULL, _fd_ptr);
+pthread_rwlock_unlock(_fds_lock);
+if (!found) {
+return -EINVAL;
+}
+mount_fd = GPOINTER_TO_INT(mount_fd_ptr);
+
+ret = open_by_handle_at(mount_fd, (struct file_handle *)>handle, 
flags);
+if (ret < 0) {
+return -errno;
+}
+
+return ret;
+}
+
 /*
  * Load capng's state from our saved state if the current thread
  * hadn't previously been loaded.
@@ -608,7 +663,11 @@ static void lo_inode_put(struct lo_data *lo, struct 
lo_inode **inodep)
 *inodep = NULL;
 
 if (g_atomic_int_dec_and_test(>refcount)) {
-close(inode->fd);
+if (inode->fd >= 0) {
+close(inode->fd);
+} else {
+g_free(inode->fhandle);
+}
 free(inode);
 }
 }
@@ -635,10 +694,25 @@ static struct lo_inode *lo_inode(fuse_req_t req, 
fuse_ino_t ino)
 
 static int lo_inode_fd(const struct lo_inode *inode, TempFd *tfd)
 {
-*tfd = (TempFd) {
-.fd = inode->fd,
-.owned = false,
-};
+if (inode->fd >= 0) {
+*tfd = (TempFd) {
+.fd = inode->fd,
+.owned = false,
+};
+} else {
+int fd;
+
+assert(inode->fhandle != NULL);
+fd = open_file_handle(inode->fhandle, O_PATH);
+if (fd < 0) {
+return -errno;
+}
+
+*tfd = (TempFd) {
+.fd = fd,
+.owned = true,
+};
+}
 
 return 0;
 }
@@ -678,22 +752,32 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino, TempFd 
*tfd)
 static int lo_inode_open(const struct lo_data *lo, const struct lo_inode 
*inode,
  int open_flags, TempFd *tfd)
 {
-g_autofree char *fd_str = g_strdup_printf("%d", inode->fd);
+g_autofree char *fd_str = NULL;
 int fd;
 
 if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) {
 return -EBADF;
 }
 
-/*
- * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier
- * that the inode is not a special file but if an external process races
- * with us then symlinks are traversed here. It is not possible to escape
- * the shared directory since it is mounted as "/" though.
- */
-fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW);
-if (fd < 0) {
-return -errno;
+if (inode->fd >= 0) {
+/*
+ * The file is a symlink so