From: Tom Gundersen <t...@jklm.no>

While notifications already work and simply require linking bus1_handle
objects into the destination queue, real messages require proper
payloads. This implements two core objects: Message objects and
factories.

The message factory is similar to transaction contexts, and lives
completely on the stack. It is used to import the parameters given by
user-space in a SEND ioctl. It parses and validates them. With this
message factors we can now instantiate many messages, one for each
destination of a multicast.

Messages need to carry a bunch of data, mainly:
  - metadata: This just matches what Unix-sockets do (uid, gid, pid,
              tid, and secctx)
  - payload: Random memory passed in as iovec-array by user-space
  - files: Set of file-descriptors, very similar to SCM_RIGHTS
  - handles: Set of local handles to transfer to the destination

Signed-off-by: Tom Gundersen <t...@jklm.no>
Signed-off-by: David Herrmann <dh.herrm...@gmail.com>
---
 ipc/bus1/Makefile  |   1 +
 ipc/bus1/message.c | 613 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 ipc/bus1/message.h | 171 +++++++++++++++
 ipc/bus1/peer.c    |   2 +
 ipc/bus1/peer.h    |   2 +
 ipc/bus1/util.c    | 162 ++++++++++++++
 ipc/bus1/util.h    |   7 +
 7 files changed, 958 insertions(+)
 create mode 100644 ipc/bus1/message.c
 create mode 100644 ipc/bus1/message.h

diff --git a/ipc/bus1/Makefile b/ipc/bus1/Makefile
index b87cddb..05434bda 100644
--- a/ipc/bus1/Makefile
+++ b/ipc/bus1/Makefile
@@ -1,6 +1,7 @@
 bus1-y :=                      \
        handle.o                \
        main.o                  \
+       message.o               \
        peer.o                  \
        tx.o                    \
        user.o                  \
diff --git a/ipc/bus1/message.c b/ipc/bus1/message.c
new file mode 100644
index 0000000..4c5c905
--- /dev/null
+++ b/ipc/bus1/message.c
@@ -0,0 +1,613 @@
+/*
+ * Copyright (C) 2013-2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/cred.h>
+#include <linux/err.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/pid.h>
+#include <linux/pid_namespace.h>
+#include <linux/sched.h>
+#include <linux/security.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/uidgid.h>
+#include <linux/uio.h>
+#include <uapi/linux/bus1.h>
+#include "handle.h"
+#include "message.h"
+#include "peer.h"
+#include "tx.h"
+#include "user.h"
+#include "util.h"
+#include "util/flist.h"
+#include "util/pool.h"
+#include "util/queue.h"
+
+static size_t bus1_factory_size(struct bus1_cmd_send *param)
+{
+       /* make sure @size cannot overflow */
+       BUILD_BUG_ON(UIO_MAXIOV > U16_MAX);
+       BUILD_BUG_ON(BUS1_FD_MAX > U16_MAX);
+
+       /* make sure we do not violate alignment rules */
+       BUILD_BUG_ON(__alignof(struct bus1_flist) < __alignof(struct iovec));
+       BUILD_BUG_ON(__alignof(struct iovec) < __alignof(struct file *));
+
+       return sizeof(struct bus1_factory) +
+              bus1_flist_inline_size(param->n_handles) +
+              param->n_vecs * sizeof(struct iovec) +
+              param->n_fds * sizeof(struct file *);
+}
+
+/**
+ * bus1_factory_new() - create new message factory
+ * @peer:                      peer to operate as
+ * @param:                     factory parameters
+ * @stack:                     optional stack for factory, or NULL
+ * @n_stack:                   size of space at @stack
+ *
+ * This allocates a new message factory. It imports data from @param and
+ * prepares the factory for a transaction. From this factory, messages can be
+ * instantiated. This is used both for unicasts and multicasts.
+ *
+ * If @stack is given, this tries to place the factory on the specified stack
+ * space. The caller must guarantee that the factory does not outlive the stack
+ * frame. If this is not wanted, pass 0 as @n_stack.
+ * In either case, if the stack frame is too small, this will allocate the
+ * factory on the heap.
+ *
+ * Return: Pointer to factory, or ERR_PTR on failure.
+ */
+struct bus1_factory *bus1_factory_new(struct bus1_peer *peer,
+                                     struct bus1_cmd_send *param,
+                                     void *stack,
+                                     size_t n_stack)
+{
+       const struct iovec __user *ptr_vecs;
+       const u64 __user *ptr_handles;
+       const int __user *ptr_fds;
+       struct bus1_factory *f;
+       struct bus1_flist *e;
+       struct file *file;
+       size_t i, size;
+       bool is_new;
+       int r, fd;
+       u32 sid;
+       u64 id;
+
+       lockdep_assert_held(&peer->local.lock);
+
+       size = bus1_factory_size(param);
+       if (unlikely(size > n_stack)) {
+               f = kmalloc(size, GFP_TEMPORARY);
+               if (!f)
+                       return ERR_PTR(-ENOMEM);
+
+               f->on_stack = false;
+       } else {
+               f = stack;
+               f->on_stack = true;
+       }
+
+       /* set to default first, so the destructor can be called anytime */
+       f->peer = peer;
+       f->param = param;
+       f->cred = current_cred();
+       f->pid = task_tgid(current);
+       f->tid = task_pid(current);
+
+       f->has_secctx = false;
+
+       f->length_vecs = 0;
+       f->n_vecs = param->n_vecs;
+       f->n_handles = 0;
+       f->n_handles_charge = 0;
+       f->n_files = 0;
+       f->n_secctx = 0;
+       f->vecs = (void *)(f + 1) + bus1_flist_inline_size(param->n_handles);
+       f->files = (void *)(f->vecs + param->n_vecs);
+       f->secctx = NULL;
+       bus1_flist_init(f->handles, f->param->n_handles);
+
+       /* import vecs */
+       ptr_vecs = (const struct iovec __user *)(unsigned long)param->ptr_vecs;
+       r = bus1_import_vecs(f->vecs, &f->length_vecs, ptr_vecs, f->n_vecs);
+       if (r < 0)
+               goto error;
+
+       /* import handles */
+       r = bus1_flist_populate(f->handles, f->param->n_handles, GFP_TEMPORARY);
+       if (r < 0)
+               goto error;
+
+       ptr_handles = (const u64 __user *)(unsigned long)param->ptr_handles;
+       for (i = 0, e = f->handles;
+            i < f->param->n_handles;
+            e = bus1_flist_next(e, &i)) {
+               if (get_user(id, ptr_handles + f->n_handles)) {
+                       r = -EFAULT;
+                       goto error;
+               }
+
+               e->ptr = bus1_handle_import(peer, id, &is_new);
+               if (IS_ERR(e->ptr)) {
+                       r = PTR_ERR(e->ptr);
+                       goto error;
+               }
+
+               ++f->n_handles;
+               if (is_new)
+                       ++f->n_handles_charge;
+       }
+
+       /* import files */
+       ptr_fds = (const int __user *)(unsigned long)param->ptr_fds;
+       while (f->n_files < param->n_fds) {
+               if (get_user(fd, ptr_fds + f->n_files)) {
+                       r = -EFAULT;
+                       goto error;
+               }
+
+               file = bus1_import_fd(fd);
+               if (IS_ERR(file)) {
+                       r = PTR_ERR(file);
+                       goto error;
+               }
+
+               f->files[f->n_files++] = file;
+       }
+
+       /* import secctx */
+       security_task_getsecid(current, &sid);
+       r = security_secid_to_secctx(sid, &f->secctx, &f->n_secctx);
+       if (r != -EOPNOTSUPP) {
+               if (r < 0)
+                       goto error;
+
+               f->has_secctx = true;
+       }
+
+       return f;
+
+error:
+       bus1_factory_free(f);
+       return ERR_PTR(r);
+}
+
+/**
+ * bus1_factory_free() - destroy message factory
+ * @f:                         factory to operate on, or NULL
+ *
+ * This destroys the message factory @f, previously created via
+ * bus1_factory_new(). All pinned resources are freed. Messages created via the
+ * factory are unaffected.
+ *
+ * If @f is NULL, this is a no-op.
+ *
+ * Return: NULL is returned.
+ */
+struct bus1_factory *bus1_factory_free(struct bus1_factory *f)
+{
+       struct bus1_flist *e;
+       size_t i;
+
+       if (f) {
+               lockdep_assert_held(&f->peer->local.lock);
+
+               if (f->has_secctx)
+                       security_release_secctx(f->secctx, f->n_secctx);
+
+               for (i = 0; i < f->n_files; ++i)
+                       fput(f->files[i]);
+
+               /* Iterate and forget imported handles (f->n_handles)... */
+               for (i = 0, e = f->handles;
+                    i < f->n_handles;
+                    e = bus1_flist_next(e, &i)) {
+                       bus1_handle_forget(e->ptr);
+                       bus1_handle_unref(e->ptr);
+               }
+               /* ...but free total space (f->param->n_handles). */
+               bus1_flist_deinit(f->handles, f->param->n_handles);
+
+               if (!f->on_stack)
+                       kfree(f);
+       }
+
+       return NULL;
+}
+
+/**
+ * bus1_factory_seal() - charge and commit local resources
+ * @f:                         factory to use
+ *
+ * The factory needs to pin and possibly create local peer resources. This
+ * commits those resources. You should call this after you instantiated all
+ * messages, since you cannot undo it easily.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int bus1_factory_seal(struct bus1_factory *f)
+{
+       struct bus1_handle *h;
+       struct bus1_flist *e;
+       size_t i;
+
+       lockdep_assert_held(&f->peer->local.lock);
+
+       for (i = 0, e = f->handles;
+            i < f->n_handles;
+            e = bus1_flist_next(e, &i)) {
+               h = e->ptr;
+               if (bus1_handle_is_public(h))
+                       continue;
+
+               --f->n_handles_charge;
+               WARN_ON(h != bus1_handle_acquire(h, false));
+               WARN_ON(atomic_inc_return(&h->n_user) != 1);
+       }
+
+       return 0;
+}
+
+/**
+ * bus1_factory_instantiate() - instantiate a message from a factory
+ * @f:                         factory to use
+ * @handle:                    destination handle
+ * @peer:                      destination peer
+ *
+ * This instantiates a new message targetted at @handle, based on the plans in
+ * the message factory @f.
+ *
+ * The newly created message is not linked into any contexts, but is available
+ * for free use to the caller.
+ *
+ * Return: Pointer to new message, or ERR_PTR on failure.
+ */
+struct bus1_message *bus1_factory_instantiate(struct bus1_factory *f,
+                                             struct bus1_handle *handle,
+                                             struct bus1_peer *peer)
+{
+       struct bus1_flist *src_e, *dst_e;
+       struct bus1_message *m;
+       bool transmit_secctx;
+       struct kvec vec;
+       size_t size, i, j;
+       u64 offset;
+       int r;
+
+       lockdep_assert_held(&f->peer->local.lock);
+
+       transmit_secctx = f->has_secctx &&
+                         (READ_ONCE(peer->flags) & BUS1_PEER_FLAG_WANT_SECCTX);
+
+       size = sizeof(*m) + bus1_flist_inline_size(f->n_handles) +
+              f->n_files * sizeof(struct file *);
+       m = kmalloc(size, GFP_KERNEL);
+       if (!m)
+               return ERR_PTR(-ENOMEM);
+
+       /* set to default first, so the destructor can be called anytime */
+       kref_init(&m->ref);
+       bus1_queue_node_init(&m->qnode, BUS1_MSG_DATA);
+       m->qnode.owner = peer;
+       m->dst = bus1_handle_ref(handle);
+       m->user = bus1_user_ref(f->peer->user);
+
+       m->flags = 0;
+       m->uid = from_kuid_munged(peer->cred->user_ns, f->cred->uid);
+       m->gid = from_kgid_munged(peer->cred->user_ns, f->cred->gid);
+       m->pid = pid_nr_ns(f->pid, peer->pid_ns);
+       m->tid = pid_nr_ns(f->tid, peer->pid_ns);
+
+       m->n_bytes = f->length_vecs;
+       m->n_handles = 0;
+       m->n_handles_charge = f->n_handles;
+       m->n_files = 0;
+       m->n_secctx = 0;
+       m->slice = NULL;
+       m->files = (void *)(m + 1) + bus1_flist_inline_size(f->n_handles);
+       bus1_flist_init(m->handles, f->n_handles);
+
+       /* allocate pool slice */
+       size = max_t(size_t, 8,
+                            ALIGN(m->n_bytes, 8) +
+                            ALIGN(f->n_handles * sizeof(u64), 8) +
+                            ALIGN(f->n_files * sizeof(int), 8) +
+                            ALIGN(f->n_secctx, 8));
+       mutex_lock(&peer->data.lock);
+       m->slice = bus1_pool_alloc(&peer->data.pool, size);
+       mutex_unlock(&peer->data.lock);
+       if (IS_ERR(m->slice)) {
+               r = PTR_ERR(m->slice);
+               m->slice = NULL;
+               goto error;
+       }
+
+       /* import blob */
+       r = bus1_pool_write_iovec(&peer->data.pool, m->slice, 0, f->vecs,
+                                 f->n_vecs, f->length_vecs);
+       if (r < 0)
+               goto error;
+
+       /* import handles */
+       r = bus1_flist_populate(m->handles, f->n_handles, GFP_KERNEL);
+       if (r < 0)
+               goto error;
+
+       r = 0;
+       m->n_handles = f->n_handles;
+       i = 0;
+       j = 0;
+       src_e = f->handles;
+       dst_e = m->handles;
+       while (i < f->n_handles) {
+               WARN_ON(i != j);
+
+               dst_e->ptr = bus1_handle_ref_by_other(peer, src_e->ptr);
+               if (!dst_e->ptr) {
+                       dst_e->ptr = bus1_handle_new_remote(peer, src_e->ptr);
+                       if (IS_ERR(dst_e->ptr) && r >= 0) {
+                               /*
+                                * Continue on error until we imported all
+                                * handles. Otherwise, trailing entries in the
+                                * array will be stale, and the destructor
+                                * cannot tell which.
+                                */
+                               r = PTR_ERR(dst_e->ptr);
+                       }
+               }
+
+               src_e = bus1_flist_next(src_e, &i);
+               dst_e = bus1_flist_next(dst_e, &j);
+       }
+       if (r < 0)
+               goto error;
+
+       /* import files */
+       while (m->n_files < f->n_files) {
+               m->files[m->n_files] = get_file(f->files[m->n_files]);
+               ++m->n_files;
+       }
+
+       /* import secctx */
+       if (transmit_secctx) {
+               offset = ALIGN(m->n_bytes, 8) +
+                        ALIGN(m->n_handles * sizeof(u64), 8) +
+                        ALIGN(m->n_files * sizeof(int), 8);
+               vec = (struct kvec){
+                       .iov_base = f->secctx,
+                       .iov_len = f->n_secctx,
+               };
+
+               r = bus1_pool_write_kvec(&peer->data.pool, m->slice, offset,
+                                        &vec, 1, vec.iov_len);
+               if (r < 0)
+                       goto error;
+
+               m->n_secctx = f->n_secctx;
+               m->flags |= BUS1_MSG_FLAG_HAS_SECCTX;
+       }
+
+       return m;
+
+error:
+       bus1_message_unref(m);
+       return ERR_PTR(r);
+}
+
+/**
+ * bus1_message_free() - destroy message
+ * @k:                 kref belonging to a message
+ *
+ * This frees the message belonging to the reference counter @k. It is supposed
+ * to be used with kref_put(). See bus1_message_unref(). Like all queue nodes,
+ * the memory deallocation is rcu-delayed.
+ */
+void bus1_message_free(struct kref *k)
+{
+       struct bus1_message *m = container_of(k, struct bus1_message, ref);
+       struct bus1_peer *peer = m->qnode.owner;
+       struct bus1_flist *e;
+       size_t i;
+
+       WARN_ON(!peer);
+       lockdep_assert_held(&peer->active);
+
+       for (i = 0; i < m->n_files; ++i)
+               fput(m->files[i]);
+
+       for (i = 0, e = m->handles;
+            i < m->n_handles;
+            e = bus1_flist_next(e, &i)) {
+               if (!IS_ERR_OR_NULL(e->ptr)) {
+                       if (m->qnode.group)
+                               bus1_handle_release(e->ptr, true);
+                       bus1_handle_unref(e->ptr);
+               }
+       }
+       bus1_flist_deinit(m->handles, m->n_handles);
+
+       if (m->slice) {
+               mutex_lock(&peer->data.lock);
+               bus1_pool_release_kernel(&peer->data.pool, m->slice);
+               mutex_unlock(&peer->data.lock);
+       }
+
+       bus1_user_unref(m->user);
+       bus1_handle_unref(m->dst);
+       bus1_queue_node_deinit(&m->qnode);
+       kfree_rcu(m, qnode.rcu);
+}
+
+/**
+ * bus1_message_stage() - stage message
+ * @m:                         message to operate on
+ * @tx:                                transaction to stage on
+ *
+ * This acquires all resources of the message @m and then stages the message on
+ * @tx. Like all stage operations, this cannot be undone. Hence, you must make
+ * sure you can continue to commit the transaction without erroring-out in
+ * between.
+ *
+ * This consumes the caller's reference on @m, plus the active reference on the
+ * destination peer.
+ */
+void bus1_message_stage(struct bus1_message *m, struct bus1_tx *tx)
+{
+       struct bus1_peer *peer = m->qnode.owner;
+       struct bus1_flist *e;
+       size_t i;
+
+       WARN_ON(!peer);
+       lockdep_assert_held(&peer->active);
+
+       for (i = 0, e = m->handles;
+            i < m->n_handles;
+            e = bus1_flist_next(e, &i))
+               e->ptr = bus1_handle_acquire(e->ptr, true);
+
+       /* this consumes an active reference on m->qnode.owner */
+       bus1_tx_stage_sync(tx, &m->qnode);
+}
+
+/**
+ * bus1_message_install() - install message payload into target process
+ * @m:                         message to operate on
+ * @inst_fds:                  whether to install FDs
+ *
+ * This installs the payload FDs and handles of @message into the receiving
+ * peer and the calling process. Handles are always installed, FDs are only
+ * installed if explicitly requested via @param.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int bus1_message_install(struct bus1_message *m, struct bus1_cmd_recv *param)
+{
+       size_t i, j, n, size, offset, n_handles = 0, n_fds = 0;
+       const bool inst_fds = param->flags & BUS1_RECV_FLAG_INSTALL_FDS;
+       const bool peek = param->flags & BUS1_RECV_FLAG_PEEK;
+       struct bus1_peer *peer = m->qnode.owner;
+       struct bus1_handle *h;
+       struct bus1_flist *e;
+       struct kvec vec;
+       u64 ts, *handles;
+       u8 stack[512];
+       void *buffer = stack;
+       int r, *fds;
+
+       WARN_ON(!peer);
+       lockdep_assert_held(&peer->local.lock);
+
+       size = max(m->n_files, min_t(size_t, m->n_handles, BUS1_FLIST_BATCH));
+       size *= max(sizeof(*fds), sizeof(*handles));
+       if (unlikely(size > sizeof(stack))) {
+               buffer = kmalloc(size, GFP_TEMPORARY);
+               if (!buffer)
+                       return -ENOMEM;
+       }
+
+       if (m->n_handles > 0) {
+               handles = buffer;
+               ts = bus1_queue_node_get_timestamp(&m->qnode);
+               offset = ALIGN(m->n_bytes, 8);
+
+               i = 0;
+               while ((n = bus1_flist_walk(m->handles, m->n_handles,
+                                           &e, &i)) > 0) {
+                       WARN_ON(i > m->n_handles);
+                       WARN_ON(i > BUS1_FLIST_BATCH);
+
+                       for (j = 0; j < n; ++j) {
+                               h = e[j].ptr;
+                               if (h && bus1_handle_is_live_at(h, ts)) {
+                                       handles[j] = bus1_handle_identify(h);
+                                       ++n_handles;
+                               } else {
+                                       bus1_handle_release(h, true);
+                                       e[j].ptr = bus1_handle_unref(h);
+                                       handles[j] = BUS1_HANDLE_INVALID;
+                               }
+                       }
+
+                       vec.iov_base = buffer;
+                       vec.iov_len = n * sizeof(u64);
+
+                       r = bus1_pool_write_kvec(&peer->data.pool, m->slice,
+                                                offset, &vec, 1, vec.iov_len);
+                       if (r < 0)
+                               goto exit;
+
+                       offset += n * sizeof(u64);
+               }
+       }
+
+       if (inst_fds && m->n_files > 0) {
+               fds = buffer;
+
+               for ( ; n_fds < m->n_files; ++n_fds) {
+                       r = get_unused_fd_flags(O_CLOEXEC);
+                       if (r < 0)
+                               goto exit;
+
+                       fds[n_fds] = r;
+               }
+
+               vec.iov_base = fds;
+               vec.iov_len = n_fds * sizeof(int);
+               offset = ALIGN(m->n_bytes, 8) +
+                        ALIGN(m->n_handles * sizeof(u64), 8);
+
+               r = bus1_pool_write_kvec(&peer->data.pool, m->slice, offset,
+                                        &vec, 1, vec.iov_len);
+               if (r < 0)
+                       goto exit;
+       }
+
+       /* charge resources */
+       if (!peek) {
+               WARN_ON(n_handles < m->n_handles_charge);
+               m->n_handles_charge -= n_handles;
+       }
+
+       /* publish pool slice */
+       mutex_lock(&peer->data.lock);
+       bus1_pool_publish(&peer->data.pool, m->slice);
+       mutex_unlock(&peer->data.lock);
+
+       /* commit handles */
+       for (i = 0, e = m->handles;
+            i < m->n_handles;
+            e = bus1_flist_next(e, &i)) {
+               h = e->ptr;
+               if (!IS_ERR_OR_NULL(h)) {
+                       WARN_ON(h != bus1_handle_acquire(h, true));
+                       WARN_ON(atomic_inc_return(&h->n_user) < 1);
+               }
+       }
+
+       /* commit FDs */
+       while (n_fds > 0) {
+               --n_fds;
+               fd_install(fds[n_fds], get_file(m->files[n_fds]));
+       }
+
+       r = 0;
+
+exit:
+       while (n_fds-- > 0)
+               put_unused_fd(fds[n_fds]);
+       if (buffer != stack)
+               kfree(buffer);
+       return r;
+}
diff --git a/ipc/bus1/message.h b/ipc/bus1/message.h
new file mode 100644
index 0000000..e8c982f
--- /dev/null
+++ b/ipc/bus1/message.h
@@ -0,0 +1,171 @@
+#ifndef __BUS1_MESSAGE_H
+#define __BUS1_MESSAGE_H
+
+/*
+ * Copyright (C) 2013-2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ */
+
+/**
+ * DOC: Messages
+ *
+ * XXX
+ */
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include "util/flist.h"
+#include "util/queue.h"
+
+struct bus1_cmd_send;
+struct bus1_handle;
+struct bus1_peer;
+struct bus1_pool_slice;
+struct bus1_tx;
+struct bus1_user;
+struct cred;
+struct file;
+struct iovec;
+struct pid;
+
+/**
+ * struct bus1_factory - message factory
+ * @peer:                      sending peer
+ * @param:                     factory parameters
+ * @cred:                      sender credentials
+ * @pid:                       sender PID
+ * @tid:                       sender TID
+ * @on_stack:                  whether object lives on stack
+ * @has_secctx:                        whether secctx has been set
+ * @length_vecs:               total length of data in vectors
+ * @n_vecs:                    number of vectors
+ * @n_handles:                 number of handles
+ * @n_handles_charge:          number of handles to charge on commit
+ * @n_files:                   number of files
+ * @n_secctx:                  length of secctx
+ * @vecs:                      vector array
+ * @files:                     file array
+ * @secctx:                    allocated secctx
+ * @handles:                   handle array
+ */
+struct bus1_factory {
+       struct bus1_peer *peer;
+       struct bus1_cmd_send *param;
+       const struct cred *cred;
+       struct pid *pid;
+       struct pid *tid;
+
+       bool on_stack : 1;
+       bool has_secctx : 1;
+
+       size_t length_vecs;
+       size_t n_vecs;
+       size_t n_handles;
+       size_t n_handles_charge;
+       size_t n_files;
+       u32 n_secctx;
+       struct iovec *vecs;
+       struct file **files;
+       char *secctx;
+
+       struct bus1_flist handles[];
+};
+
+/**
+ * struct bus1_message - data messages
+ * @ref:                       reference counter
+ * @qnode:                     embedded queue node
+ * @dst:                       destination handle
+ * @user:                      sending user
+ * @flags:                     message flags
+ * @uid:                       sender UID
+ * @gid:                       sender GID
+ * @pid:                       sender PID
+ * @tid:                       sender TID
+ * @n_bytes:                   number of user-bytes transmitted
+ * @n_handles:                 number of handles transmitted
+ * @n_handles_charge:          number of handle charges
+ * @n_files:                   number of files transmitted
+ * @n_secctx:                  number of bytes of security context transmitted
+ * @slice:                     actual message data
+ * @files:                     passed file descriptors
+ * @handles:                   passed handles
+ */
+struct bus1_message {
+       struct kref ref;
+       struct bus1_queue_node qnode;
+       struct bus1_handle *dst;
+       struct bus1_user *user;
+
+       u64 flags;
+       uid_t uid;
+       gid_t gid;
+       pid_t pid;
+       pid_t tid;
+
+       size_t n_bytes;
+       size_t n_handles;
+       size_t n_handles_charge;
+       size_t n_files;
+       size_t n_secctx;
+       struct bus1_pool_slice *slice;
+       struct file **files;
+
+       struct bus1_flist handles[];
+};
+
+struct bus1_factory *bus1_factory_new(struct bus1_peer *peer,
+                                     struct bus1_cmd_send *param,
+                                     void *stack,
+                                     size_t n_stack);
+struct bus1_factory *bus1_factory_free(struct bus1_factory *f);
+int bus1_factory_seal(struct bus1_factory *f);
+struct bus1_message *bus1_factory_instantiate(struct bus1_factory *f,
+                                             struct bus1_handle *handle,
+                                             struct bus1_peer *peer);
+
+void bus1_message_free(struct kref *k);
+void bus1_message_stage(struct bus1_message *m, struct bus1_tx *tx);
+int bus1_message_install(struct bus1_message *m, struct bus1_cmd_recv *param);
+
+/**
+ * bus1_message_ref() - acquire object reference
+ * @m:                 message to operate on, or NULL
+ *
+ * This acquires a single reference to @m. The caller must already hold a
+ * reference when calling this.
+ *
+ * If @m is NULL, this is a no-op.
+ *
+ * Return: @m is returned.
+ */
+static inline struct bus1_message *bus1_message_ref(struct bus1_message *m)
+{
+       if (m)
+               kref_get(&m->ref);
+       return m;
+}
+
+/**
+ * bus1_message_unref() - release object reference
+ * @m:                 message to operate on, or NULL
+ *
+ * This releases a single object reference to @m. If the reference counter
+ * drops to 0, the message is destroyed.
+ *
+ * If @m is NULL, this is a no-op.
+ *
+ * Return: NULL is returned.
+ */
+static inline struct bus1_message *bus1_message_unref(struct bus1_message *m)
+{
+       if (m)
+               kref_put(&m->ref, bus1_message_free);
+       return NULL;
+}
+
+#endif /* __BUS1_MESSAGE_H */
diff --git a/ipc/bus1/peer.c b/ipc/bus1/peer.c
index a1525cb..0ff7a98 100644
--- a/ipc/bus1/peer.c
+++ b/ipc/bus1/peer.c
@@ -70,6 +70,7 @@ struct bus1_peer *bus1_peer_new(void)
 
        /* initialize data section */
        mutex_init(&peer->data.lock);
+       peer->data.pool = BUS1_POOL_NULL;
        bus1_queue_init(&peer->data.queue);
 
        /* initialize peer-private section */
@@ -136,6 +137,7 @@ struct bus1_peer *bus1_peer_free(struct bus1_peer *peer)
 
        /* deinitialize data section */
        bus1_queue_deinit(&peer->data.queue);
+       bus1_pool_deinit(&peer->data.pool);
        mutex_destroy(&peer->data.lock);
 
        /* deinitialize constant fields */
diff --git a/ipc/bus1/peer.h b/ipc/bus1/peer.h
index 655d3ac..5eb558f 100644
--- a/ipc/bus1/peer.h
+++ b/ipc/bus1/peer.h
@@ -54,6 +54,7 @@
 #include <linux/wait.h>
 #include "user.h"
 #include "util/active.h"
+#include "util/pool.h"
 #include "util/queue.h"
 
 struct cred;
@@ -88,6 +89,7 @@ struct bus1_peer {
 
        struct {
                struct mutex lock;
+               struct bus1_pool pool;
                struct bus1_queue queue;
        } data;
 
diff --git a/ipc/bus1/util.c b/ipc/bus1/util.c
index 8acf798..687f40d 100644
--- a/ipc/bus1/util.c
+++ b/ipc/bus1/util.c
@@ -9,12 +9,174 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/atomic.h>
+#include <linux/compat.h>
 #include <linux/debugfs.h>
 #include <linux/err.h>
+#include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/kernel.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/uio.h>
+#include <net/sock.h>
+#include "main.h"
 #include "util.h"
 
+/**
+ * bus1_import_vecs() - import vectors from user
+ * @out_vecs:          kernel memory to store vecs, preallocated
+ * @out_length:                output storage for sum of all vectors lengths
+ * @vecs:              user pointer for vectors
+ * @n_vecs:            number of vectors to import
+ *
+ * This copies the given vectors from user memory into the preallocated kernel
+ * buffer. Sanity checks are performed on the memory of the vector-array, the
+ * memory pointed to by the vectors and on the overall size calculation.
+ *
+ * If the vectors were copied successfully, @out_length will contain the sum of
+ * all vector-lengths.
+ *
+ * Unlike most other functions, this function might modify its output buffer
+ * even if it fails. That is, @out_vecs might contain garbage if this function
+ * fails. This is done for performance reasons.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int bus1_import_vecs(struct iovec *out_vecs,
+                    size_t *out_length,
+                    const void __user *vecs,
+                    size_t n_vecs)
+{
+       size_t i, length = 0;
+
+       if (n_vecs > UIO_MAXIOV)
+               return -EMSGSIZE;
+       if (n_vecs == 0) {
+               *out_length = 0;
+               return 0;
+       }
+
+       if (IS_ENABLED(CONFIG_COMPAT) && in_compat_syscall()) {
+               /*
+                * Compat types and macros are protected by CONFIG_COMPAT,
+                * rather than providing a fallback. We want compile-time
+                * coverage, so provide fallback types. The IS_ENABLED(COMPAT)
+                * condition guarantees this is collected by the dead-code
+                * elimination, anyway.
+                */
+#if IS_ENABLED(CONFIG_COMPAT)
+               const struct compat_iovec __user *uvecs = vecs;
+               compat_uptr_t v_base;
+               compat_size_t v_len;
+               compat_ssize_t v_slen;
+#else
+               const struct iovec __user *uvecs = vecs;
+               void __user *v_base;
+               size_t v_len;
+               ssize_t v_slen;
+#endif
+               void __user *v_ptr;
+
+               if (unlikely(!access_ok(VERIFY_READ, vecs,
+                                       sizeof(*uvecs) * n_vecs)))
+                       return -EFAULT;
+
+               for (i = 0; i < n_vecs; ++i) {
+                       if (unlikely(__get_user(v_base, &uvecs[i].iov_base) ||
+                                    __get_user(v_len, &uvecs[i].iov_len)))
+                               return -EFAULT;
+
+#if IS_ENABLED(CONFIG_COMPAT)
+                       v_ptr = compat_ptr(v_base);
+#else
+                       v_ptr = v_base;
+#endif
+                       v_slen = v_len;
+
+                       if (unlikely(v_slen < 0 ||
+                                    (typeof(v_len))v_slen != v_len))
+                               return -EMSGSIZE;
+                       if (unlikely(!access_ok(VERIFY_READ, v_ptr, v_len)))
+                               return -EFAULT;
+                       if (unlikely((size_t)v_len > MAX_RW_COUNT - length))
+                               return -EMSGSIZE;
+
+                       out_vecs[i].iov_base = v_ptr;
+                       out_vecs[i].iov_len = v_len;
+                       length += v_len;
+               }
+       } else {
+               void __user *v_base;
+               size_t v_len;
+
+               if (copy_from_user(out_vecs, vecs, sizeof(*out_vecs) * n_vecs))
+                       return -EFAULT;
+
+               for (i = 0; i < n_vecs; ++i) {
+                       v_base = out_vecs[i].iov_base;
+                       v_len = out_vecs[i].iov_len;
+
+                       if (unlikely((ssize_t)v_len < 0))
+                               return -EMSGSIZE;
+                       if (unlikely(!access_ok(VERIFY_READ, v_base, v_len)))
+                               return -EFAULT;
+                       if (unlikely(v_len > MAX_RW_COUNT - length))
+                               return -EMSGSIZE;
+
+                       length += v_len;
+               }
+       }
+
+       *out_length = length;
+       return 0;
+}
+
+/**
+ * bus1_import_fd() - import file descriptor from user
+ * @user_fd:   pointer to user-supplied file descriptor
+ *
+ * This imports a file-descriptor from the current user-context. The FD number
+ * is copied into kernel-space, then resolved to a file and returned to the
+ * caller. If something goes wrong, an error is returned.
+ *
+ * Neither bus1, nor UDS files are allowed. If those are supplied, EOPNOTSUPP
+ * is returned. Those would require expensive garbage-collection if they're
+ * sent recursively by user-space.
+ *
+ * Return: Pointer to pinned file, ERR_PTR on failure.
+ */
+struct file *bus1_import_fd(int fd)
+{
+       struct file *f, *ret;
+       struct socket *sock;
+       struct inode *inode;
+
+       if (unlikely(fd < 0))
+               return ERR_PTR(-EBADF);
+
+       f = fget_raw(fd);
+       if (unlikely(!f))
+               return ERR_PTR(-EBADF);
+
+       inode = file_inode(f);
+       sock = S_ISSOCK(inode->i_mode) ? SOCKET_I(inode) : NULL;
+
+       if (f->f_mode & FMODE_PATH)
+               ret = f; /* O_PATH is always allowed */
+       else if (f->f_op == &bus1_fops)
+               ret = ERR_PTR(-EOPNOTSUPP); /* disallow bus1 recursion */
+       else if (sock && sock->sk && sock->ops && sock->ops->family == PF_UNIX)
+               ret = ERR_PTR(-EOPNOTSUPP); /* disallow UDS recursion */
+       else
+               ret = f; /* all others are allowed */
+
+       if (f != ret)
+               fput(f);
+
+       return ret;
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 static int bus1_debugfs_atomic_t_get(void *data, u64 *val)
diff --git a/ipc/bus1/util.h b/ipc/bus1/util.h
index c22ecd5..ab41d5e 100644
--- a/ipc/bus1/util.h
+++ b/ipc/bus1/util.h
@@ -26,6 +26,7 @@
 #include <linux/types.h>
 
 struct dentry;
+struct iovec;
 
 /**
  * BUS1_TAIL - tail pointer in singly-linked lists
@@ -37,6 +38,12 @@ struct dentry;
  */
 #define BUS1_TAIL ERR_PTR(-1)
 
+int bus1_import_vecs(struct iovec *out_vecs,
+                    size_t *out_length,
+                    const void __user *vecs,
+                    size_t n_vecs);
+struct file *bus1_import_fd(int fd);
+
 #if defined(CONFIG_DEBUG_FS)
 
 struct dentry *
-- 
2.10.1

Reply via email to