On Mon, Nov 15, 2010 at 2:52 PM, M. Mohan Kumar <mo...@in.ibm.com> wrote: > In passthrough security model, following symbolic links in the server > side could result in accessing files outside guest's export path.This > could happen under two conditions: > 1) If a modified guest kernel is sending symbolic link as part of the > file path and when resolving that symbolic link at server side, it could > result in accessing files outside export path. > 2) If a same path is exported to multiple guests and if guest1 tries to > open a file "a/b/c/passwd" and meanwhile guest2 did this "rm -rf a/b/c; > cd a/b; ln -s ../../etc c". If guest1 lookup happened and guest2 > completed these operations just before guest1 opening the file, this > operation could result in opening host's /etc/passwd. > > Following approach is used to avoid the security issue involved in > following symbolic links in the passthrough model. Create a sub-process > which will chroot into export path, so that even if there is a symbolic > link in the path it could never go beyond the share path. > > When qemu is started with passthrough security model, a process is > forked and this sub-process process takes care of accessing files in the > passthrough share path. It does > * Create socketpair > * Chroot into share path > * Read file open request from socket descriptor > * Open request contains file path, flags, mode, uid, gid, dev etc > * Based on the request type it creates/opens file/directory/device node > * Return the file descriptor to main process using socket with > SCM_RIGHTS as cmsg type. > > Main process when ever there is a request for a resource to be > opened/created, it constructs the open request and writes that into > its socket descriptor and reads from chroot process socket to get the > file descriptor.
How does the child process exit cleanly? If QEMU crashes will the child process be orphaned? > > This patch implements chroot enviroment, provides necessary functions > that can be used by the passthrough function calls. > > Signed-off-by: M. Mohan Kumar <mo...@in.ibm.com> > --- > Makefile.objs | 1 + > hw/file-op-9p.h | 2 + > hw/virtio-9p-chroot.c | 275 > +++++++++++++++++++++++++++++++++++++++++++++++++ > hw/virtio-9p.c | 20 ++++ > hw/virtio-9p.h | 21 ++++ > 5 files changed, 319 insertions(+), 0 deletions(-) > create mode 100644 hw/virtio-9p-chroot.c > > diff --git a/Makefile.objs b/Makefile.objs > index cd5a24b..134da8e 100644 > --- a/Makefile.objs > +++ b/Makefile.objs > @@ -251,6 +251,7 @@ hw-obj-$(CONFIG_SOUND) += $(sound-obj-y) > > hw-obj-$(CONFIG_VIRTFS) += virtio-9p-debug.o virtio-9p-local.o > virtio-9p-xattr.o > hw-obj-$(CONFIG_VIRTFS) += virtio-9p-xattr-user.o virtio-9p-posix-acl.o > +hw-obj-$(CONFIG_VIRTFS) += virtio-9p-chroot.o > > ###################################################################### > # libdis > diff --git a/hw/file-op-9p.h b/hw/file-op-9p.h > index c7731c2..149a915 100644 > --- a/hw/file-op-9p.h > +++ b/hw/file-op-9p.h > @@ -55,6 +55,8 @@ typedef struct FsContext > SecModel fs_sm; > uid_t uid; > struct xattr_operations **xops; > + pthread_mutex_t chroot_mutex; > + int chroot_socket; > } FsContext; > > extern void cred_init(FsCred *); > diff --git a/hw/virtio-9p-chroot.c b/hw/virtio-9p-chroot.c > new file mode 100644 > index 0000000..50e28a1 > --- /dev/null > +++ b/hw/virtio-9p-chroot.c > @@ -0,0 +1,275 @@ > +/* > + * Virtio 9p chroot environment for secured access to exported file > + * system > + * > + * Copyright IBM, Corp. 2010 > + * > + * Authors: > + * M. Mohan Kumar <mo...@in.ibm.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the copying file in the top-level directory > + * > + */ > + > +#include "virtio.h" > +#include "qemu_socket.h" > +#include "qemu-thread.h" > +#include "virtio-9p.h" > +#include <sys/fsuid.h> > +#include <sys/resource.h> > + > +/* Structure used to transfer file descriptor and error info to the main > + * process. fd will be zero if there was an error(in this case error > + * will hold the errno). error will be zero and fd will be a valid > + * identifier when open was success > + */ > +typedef struct { > + int fd; > + int error; > +} FdInfo; > + > +static int sendfd(int sockfd, FdInfo fd_info) > +{ > + struct msghdr msg = { }; > + struct iovec iov; > + union { > + struct cmsghdr cmsg; > + char control[CMSG_SPACE(sizeof(int))]; > + } msg_control; > + struct cmsghdr *cmsg; > + > + iov.iov_base = &fd_info; > + iov.iov_len = sizeof(fd_info); > + > + memset(&msg, 0, sizeof(msg)); > + msg.msg_iov = &iov; > + msg.msg_iovlen = 1; > + msg.msg_control = &msg_control; > + msg.msg_controllen = sizeof(msg_control); > + > + cmsg = &msg_control.cmsg; > + cmsg->cmsg_len = CMSG_LEN(sizeof(fd_info.fd)); > + cmsg->cmsg_level = SOL_SOCKET; > + cmsg->cmsg_type = SCM_RIGHTS; > + memcpy(CMSG_DATA(cmsg), &fd_info.fd, sizeof(fd_info.fd)); > + > + return sendmsg(sockfd, &msg, 0); > +} > + > +static int getfd(int sockfd, int *error) > +{ > + struct msghdr msg = { }; > + struct iovec iov; > + union { > + struct cmsghdr cmsg; > + char control[CMSG_SPACE(sizeof(int))]; > + } msg_control; > + struct cmsghdr *cmsg; > + int retval, fd; > + FdInfo fd_info; > + > + iov.iov_base = &fd_info; > + iov.iov_len = sizeof(fd_info); > + > + memset(&msg, 0, sizeof(msg)); > + msg.msg_iov = &iov; > + msg.msg_iovlen = 1; > + msg.msg_control = &msg_control; > + msg.msg_controllen = sizeof(msg_control); > + > + retval = recvmsg(sockfd, &msg, 0); > + if (retval < 0) { > + return retval; > + } > + > + if (fd_info.error) { > + *error = fd_info.error; > + } > + > + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { > + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)) || > + cmsg->cmsg_level != SOL_SOCKET || > + cmsg->cmsg_type != SCM_RIGHTS) { > + continue; > + } > + fd = *((int *)CMSG_DATA(cmsg)); > + if (fd_info.fd == 0) { > + /* if fd is 0 and error is also 0, it means we created some > + * file/directory/nodes */ > + if (*error) { > + fd_info.fd = -1; > + } else { > + fd_info.fd = 0; > + } > + close(fd); > + } else { > + fd_info.fd = fd; > + } > + return fd_info.fd; > + } > + return -1; > +} > + > +static int write_openrequest(int sockfd, V9fsOpenRequest *request) > +{ > + int bytes; > + bytes = write(sockfd, &request->data, sizeof(request->data)); > + bytes += write(sockfd, request->path.path, request->data.path_len); > + bytes += write(sockfd, request->path.old_path, > + request->data.oldpath_len); > + return bytes; > +} > + > +int v9fs_getfd(V9fsOpenRequest *or, int *error, FsContext *fs_ctx) > +{ > + int fd; > + pthread_mutex_lock(&fs_ctx->chroot_mutex); > + write_openrequest(fs_ctx->chroot_socket, or); > + fd = getfd(fs_ctx->chroot_socket, error); > + pthread_mutex_unlock(&fs_ctx->chroot_mutex); > + return fd; > +} > + > +static int read_openrequest(int sockfd, V9fsOpenRequest *request) > +{ > + int bytes; > + bytes = recv(sockfd, request, sizeof(request->data), 0); This could fail... > + request->path.path = qemu_mallocz(request->data.path_len + 1); ...and it would be dangerous to use request->data.path_len if recv() had failed. > + bytes += recv(sockfd, (void *)request->path.path, > + request->data.path_len, 0); Same thing, return value needs to be checked before adding to byte count. Stefan