> From: Stefan Hajnoczi <stefa...@redhat.com> > > If thread A is using an inode it must not be deleted by thread B when > processing a FUSE_FORGET request. > > The FUSE protocol itself already has a counter called nlookup that is > used in FUSE_FORGET messages. We cannot trust this counter since the > untrusted client can manipulate it via FUSE_FORGET messages. > > Introduce a new refcount to keep inodes alive for the required lifespan. > lo_inode_put() must be called to release a reference. FUSE's nlookup > counter holds exactly one reference so that the inode stays alive as > long as the client still wants to remember it. > > Note that the lo_inode->is_symlink field is moved to avoid creating a > hole in the struct due to struct field alignment. > > Signed-off-by: Stefan Hajnoczi <stefa...@redhat.com> > --- > tools/virtiofsd/passthrough_ll.c | 168 ++++++++++++++++++++++++++----- > 1 file changed, 145 insertions(+), 23 deletions(-) > > diff --git a/tools/virtiofsd/passthrough_ll.c > b/tools/virtiofsd/passthrough_ll.c > index b19c9ee328..8f4ab8351c 100644 > --- a/tools/virtiofsd/passthrough_ll.c > +++ b/tools/virtiofsd/passthrough_ll.c > @@ -99,7 +99,13 @@ struct lo_key { > > struct lo_inode { > int fd; > - bool is_symlink; > + > + /* > + * Atomic reference count for this object. The nlookup field holds a > + * reference and release it when nlookup reaches 0. > + */ > + gint refcount; > + > struct lo_key key; > > /* > @@ -118,6 +124,8 @@ struct lo_inode { > fuse_ino_t fuse_ino; > pthread_mutex_t plock_mutex; > GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ > + > + bool is_symlink; > }; > > struct lo_cred { > @@ -473,6 +481,23 @@ static ssize_t lo_add_inode_mapping(fuse_req_t req, > struct lo_inode *inode) > return elem - lo_data(req)->ino_map.elems; > } > > +static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep) > +{ > + struct lo_inode *inode = *inodep; > + > + if (!inode) { > + return; > + } > + > + *inodep = NULL; > + > + if (g_atomic_int_dec_and_test(&inode->refcount)) { > + close(inode->fd); > + free(inode); > + } > +} > + > +/* Caller must release refcount using lo_inode_put() */ > static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) > { > struct lo_data *lo = lo_data(req); > @@ -480,6 +505,9 @@ static struct lo_inode *lo_inode(fuse_req_t req, > fuse_ino_t ino) > > pthread_mutex_lock(&lo->mutex); > elem = lo_map_get(&lo->ino_map, ino); > + if (elem) { > + g_atomic_int_inc(&elem->inode->refcount); > + } > pthread_mutex_unlock(&lo->mutex); > > if (!elem) { > @@ -489,10 +517,23 @@ static struct lo_inode *lo_inode(fuse_req_t req, > fuse_ino_t ino) > return elem->inode; > } > > +/* > + * TODO Remove this helper and force callers to hold an inode refcount until > + * they are done with the fd. This will be done in a later patch to make > + * review easier. > + */ > static int lo_fd(fuse_req_t req, fuse_ino_t ino) > { > struct lo_inode *inode = lo_inode(req, ino); > - return inode ? inode->fd : -1; > + int fd; > + > + if (!inode) { > + return -1; > + } > + > + fd = inode->fd; > + lo_inode_put(lo_data(req), &inode); > + return fd; > } > > static void lo_init(void *userdata, struct fuse_conn_info *conn) > @@ -547,6 +588,10 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino, > fuse_reply_attr(req, &buf, lo->timeout); > } > > +/* > + * Increments parent->nlookup and caller must release refcount using > + * lo_inode_put(&parent). > + */ > static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, > char path[PATH_MAX], struct lo_inode **parent) > { > @@ -584,6 +629,7 @@ retry: > p = &lo->root; > pthread_mutex_lock(&lo->mutex); > p->nlookup++; > + g_atomic_int_inc(&p->refcount); > pthread_mutex_unlock(&lo->mutex); > } else { > *last = '\0';
We need lo_ionde_put() in error path, right?: https://gitlab.com/virtio-fs/qemu/blob/virtio-fs-as-posted-2019-12-12/tools/virtiofsd/passthrough_ll.c#L680 nit: if yes, unref_inode_lolocked() is always paired with lo_inode_put(). So how about combine them in one function? As p->nloockup and p->refcount are both incremented in one place (lo_find/lo_parent_and_name) in these case, it seems natural for me to decrement them in one function as well. Thanks, Misono