vas: Implement a simple FTW driver

Michael Ellerman Sun, 13 Aug 2017 23:54:13 -0700

Hi Suka,

Some comments inline ...



Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com> writes:

> The Fast Thread Wake-up (FTW) driver provides user space applications an
> interface to the Core-to-Core functionality in POWER9. The driver provides
> the device node/ioctl API to applications and uses the external interfaces
> to the VAS driver to interact with the VAS hardware.
>
> A follow-on patch provides detailed description of the API for the driver.
>
> Signed-off-by: Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>
> ---
>  MAINTAINERS                             |   1 +
>  arch/powerpc/platforms/powernv/Kconfig  |  16 ++
>  arch/powerpc/platforms/powernv/Makefile |   1 +
>  arch/powerpc/platforms/powernv/nx-ftw.c | 486 
> ++++++++++++++++++++++++++++++++

AFAICS this has nothing to do with NX, so why is it called nx-ftw ?

Also aren't we going to want to use this on pseries eventually? If so
should it go in arch/powerpc/sysdev ?

> diff --git a/arch/powerpc/platforms/powernv/Makefile 
> b/arch/powerpc/platforms/powernv/Makefile
> index e4db292..dc60046 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -13,3 +13,4 @@ obj-$(CONFIG_MEMORY_FAILURE)        += opal-memory-errors.o
>  obj-$(CONFIG_TRACEPOINTS)    += opal-tracepoints.o
>  obj-$(CONFIG_OPAL_PRD)       += opal-prd.o
>  obj-$(CONFIG_PPC_VAS)        += vas.o vas-window.o
> +obj-$(CONFIG_PPC_FTW)        += nx-ftw.o
> diff --git a/arch/powerpc/platforms/powernv/nx-ftw.c 
> b/arch/powerpc/platforms/powernv/nx-ftw.c
> new file mode 100644
> index 0000000..a0b6388
> --- /dev/null
> +++ b/arch/powerpc/platforms/powernv/nx-ftw.c
> @@ -0,0 +1,486 @@

Missing license header.

> +#include <linux/module.h>
> +#include <linux/kernel.h>
> +#include <linux/export.h>
> +#include <asm/cputable.h>
> +#include <linux/device.h>
> +#include <linux/debugfs.h>
> +#include <linux/cdev.h>
> +#include <linux/mutex.h>
> +#include <linux/fs.h>
> +#include <linux/mm.h>
> +#include <linux/slab.h>
> +#include <linux/sched.h>
> +#include <linux/uaccess.h>
> +#include <linux/bootmem.h>
> +#include <asm/opal-api.h>
> +#include <asm/opal.h>
> +#include <asm/page.h>
> +#include <asm/vas.h>
> +#include <asm/reg.h>

Please try and trim the list to what you need.

> +
> +/*
> + * NX-FTW is a device driver used to provide user space access to the
> + * Core-to-Core aka Fast Thread Wakeup (FTW) functionality provided by
> + * the Virtual Accelerator Subsystem (VAS) in POWER9 systems. See also
> + * arch/powerpc/platforms/powernv/vas*.
> + *
> + * The driver creates the device node /dev/crypto/nx-ftw that can be
> + * used as follows:
> + *
> + *   fd = open("/dev/crypto/nx-ftw", O_RDWR);
> + *   rc = ioctl(fd, VAS_RX_WIN_OPEN, &rxattr);
> + *   rc = ioctl(fd, VAS_TX_WIN_OPEN, &txattr);
> + *   paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL).
> + *   vas_copy(&crb, 0, 1);
> + *   vas_paste(paste_addr, 0, 1);
> + *
> + * where "vas_copy" and "vas_paste" are defined in copy-paste.h.
> + */
> +
> +static char          *nxftw_dev_name = "nx-ftw";
> +static atomic_t              nxftw_instid = ATOMIC_INIT(0);
> +static dev_t         nxftw_devt;
> +static struct dentry *nxftw_debugfs;
> +static struct class  *nxftw_dbgfs_class;

The class doesn't go in debugfs, which is what "dbgfs" says to me.

> +/*
> + * Wrapper object for the nx-ftw device node - there is just one

Just "device".

"device node" is ambiguous vs device tree.

> + * instance of this node for the whole system.

So why not put the globals above in here also?

> + */
> +struct nxftw_dev {
> +     struct cdev cdev;
> +     struct device *device;
> +     char *name;
> +     atomic_t refcount;
> +} nxftw_device;
> +
> +/*
> + * One instance per open of a nx-ftw device. Each nxftw_instance is
> + * associated with a VAS window, after the caller issues VAS_RX_WIN_OPEN
> + * or VAS_TX_WIN_OPEN ioctl.
> + */
> +struct nxftw_instance {
> +     int instance;
> +     bool tx_win;
> +     struct vas_window *window;
> +};
> +
> +#define VAS_DEFAULT_VAS_ID   0
> +#define POWERNV_LPID         0       /* TODO: For VM/KVM guests? */

mfspr(SPRN_LPID)

would seem to do the trick?

> +static char *nxftw_devnode(struct device *dev, umode_t *mode)
> +{
> +     return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev));

This isn't a crypto device?

> +}
> +
> +static int nxftw_open(struct inode *inode, struct file *fp)
> +{
> +     int minor;
> +     struct nxftw_instance *nxti;

instance would be a better name.

> +     minor = MINOR(inode->i_rdev);

Not used?

> +     nxti = kzalloc(sizeof(*nxti), GFP_KERNEL);
> +     if (!nxti)
> +             return -ENOMEM;
> +
> +     nxti->instance = atomic_inc_return(&nxftw_instid);

And this would read better if the variable was "id". eg.

        instance->id = atomic_inc_return(&next_instance_id);

> +     nxti->window = NULL;
> +
> +     fp->private_data = nxti;
> +     return 0;
> +}
> +
> +static int validate_txwin_user_attr(struct vas_tx_win_open_attr *uattr)
> +{
> +     int i;
> +
> +     if (uattr->version != 1)
> +             return -EINVAL;
> +
> +     if (uattr->flags & ~VAS_FLAGS_HIGH_PRI)
> +             return -EINVAL;
> +
> +     if (uattr->reserved1 || uattr->reserved2)
> +             return -EINVAL;
> +
> +     for (i = 0; i < sizeof(uattr->reserved3) / sizeof(uint64_t); i++) {
> +             if (uattr->reserved3[i])
> +                     return -EINVAL;
> +     }

That struct is a mess and needs to be reworked.

> +     return 0;
> +}
> +
> +static bool validate_rxwin_user_attr(struct vas_rx_win_open_attr *uattr)
> +{
> +     int i;
> +
> +     if (uattr->version != 1)
> +             return -EINVAL;
> +
> +     for (i = 0; i < sizeof(uattr->reserved) / sizeof(uint64_t); i++) {
> +             if (uattr->reserved[i])
> +                     return -EINVAL;
> +     }

Ditto.

> +     return 0;
> +}
> +
> +#ifdef vas_debug

This is dead code, which makes it very easy for it to get out of sync
with the vas_rx_win_attr for example.

Better to just make these pr_debug() in the only caller, that way they
get type checked.

> +static inline void dump_rx_win_attr(struct vas_rx_win_attr *attr)
> +{
> +     pr_err("NX-FTW: user %d, nx %d, fault %d, ntfy %d, intr %d early %d\n",
> +                     attr->user_win ? 1 : 0,
> +                     attr->nx_win ? 1 : 0,
> +                     attr->fault_win ? 1 : 0,
> +                     attr->notify_disable ? 1 : 0,
> +                     attr->intr_disable ? 1 : 0,
> +                     attr->notify_early ? 1 : 0);
> +
> +     pr_err("NX-FTW: rx_fifo %p, rx_fifo_size %d, max value 0x%x\n",
> +                     attr->rx_fifo, attr->rx_fifo_size,
> +                     VAS_RX_FIFO_SIZE_MAX);
> +
> +}
> +#else
> +static inline void dump_rx_win_attr(struct vas_rx_win_attr *attr)
> +{
> +}
> +#endif
> +
> +static int nxftw_ioc_open_rx_window(struct file *fp, unsigned long arg)
> +{
> +     int rc;
> +     struct vas_rx_win_open_attr uattr;
> +     struct vas_rx_win_attr rxattr;
> +     struct nxftw_instance *nxti = fp->private_data;
> +     struct vas_window *win;

        struct vas_rx_win_open_attr uattr;
        struct vas_rx_win_attr rxattr;
        struct nxftw_instance *nxti;
        struct vas_window *win;
        int rc;

        nxti = fp->private_data;

Ah much better :)

Aka. reverse-christmas-tree.

> +
> +     rc = copy_from_user(&uattr, (void *)arg, sizeof(uattr));

Nicer would be:

        void __user *uptr = (void *)arg;

        rc = copy_from_user(&uattr, uptr, sizeof(uattr));

> +     if (rc) {
> +             pr_devel("%s(): copy_from_user() returns %d\n", __func__, rc);
> +             return -EFAULT;
> +     }
> +
> +     rc = validate_rxwin_user_attr(&uattr);
> +     if (rc)
> +             return rc;
> +
> +     memset(&rxattr, 0, sizeof(rxattr));
> +
> +     rxattr.lnotify_lpid = POWERNV_LPID;
> +
> +     /*
> +      * Only caller can own the window for now. Not sure if there is need
> +      * for process P1 to make P2 the owner of a window. If so, we need to
> +      * find P2, make sure we have permissions, get a reference etc.
> +      */
> +     rxattr.lnotify_pid = mfspr(SPRN_PID);
> +     rxattr.lnotify_tid = mfspr(SPRN_TIDR);
> +     rxattr.rx_fifo = NULL;
> +     rxattr.rx_fifo_size = 0;
> +     rxattr.intr_disable = true;
> +     rxattr.user_win = true;

vas_init_rx_win_attr() ?

> +
> +     dump_rx_win_attr(&rxattr);
> +
> +     /*
> +      * TODO: Rather than the default vas id, choose an instance of VAS
> +      *       based on the chip the caller is running.
> +      */

Seems like that will be a common pattern so maybe the vas core should
handle it for callers who want it.

> +     win = vas_rx_win_open(VAS_DEFAULT_VAS_ID, VAS_COP_TYPE_FTW, &rxattr);
> +     if (IS_ERR(win)) {
> +             pr_devel("%s() vas_rx_win_open() failed, %ld\n", __func__,
> +                             PTR_ERR(win));
> +             return PTR_ERR(win);
> +     }
> +
> +     nxti->window = win;
> +     uattr.rx_win_handle = vas_win_id(win);
> +
> +     rc = copy_to_user((void *)arg, &uattr, sizeof(uattr));
> +     if (rc) {
> +             pr_devel("%s(): copy_to_user() failed, %d\n", __func__, rc);
> +             return -EFAULT;
> +     }

You defined the ioctl as:

  #define VAS_RX_WIN_OPEN               _IOW('v', 2, struct 
vas_rx_win_open_attr)

But you're reading and writing from the user arg, so it should be _IOWR.

> +
> +     return 0;
> +}
> +
> +static int nxftw_ioc_open_tx_window(struct file *fp, unsigned long arg)
> +{
> +     int rc;
> +     enum vas_cop_type cop;
> +     struct vas_window *win;
> +     struct vas_tx_win_open_attr uattr;
> +     struct vas_tx_win_attr txattr;

Those two struct names are quite confusing.

> +     struct nxftw_instance *nxti = fp->private_data;
> +
> +     rc = copy_from_user(&uattr, (void *)arg, sizeof(uattr));
> +     if (rc) {
> +             pr_devel("%s(): copy_from_user() failed, %d\n", __func__, rc);
> +             return -EFAULT;
> +     }

All you use is rx_win_handle, so why does this ioctl take the whole struct?

> +     cop = VAS_COP_TYPE_FTW;
> +
> +     rc = validate_txwin_user_attr(&uattr);
> +     if (rc)
> +             return rc;
> +
> +     pr_devel("Pid %d: Opening txwin, cop %d, PIDR %ld\n",
> +                             task_pid_nr(current), cop, mfspr(SPRN_PID));
> +
> +     vas_init_tx_win_attr(&txattr, cop);
> +
> +     txattr.lpid = POWERNV_LPID;
> +     txattr.pidr = mfspr(SPRN_PID);
> +     txattr.pid = task_pid_nr(current);

Why is that in txattr?

The pid can be freed and given to another process so it's fishy to be
saving the pid without also holding a reference on the task.

> +     txattr.user_win = true;

Has been done for us.

> +     txattr.pswid = uattr.rx_win_handle;
> +
> +     win = vas_tx_win_open(VAS_DEFAULT_VAS_ID, cop, &txattr);
> +     if (IS_ERR(win)) {
> +             pr_devel("%s() vas_tx_win_open() failed, %ld\n", __func__,
> +                                     PTR_ERR(win));
> +             return PTR_ERR(win);
> +     }
> +     nxti->window = win;
> +     nxti->tx_win = true;

is_tx would be clearer IMHO.

> +     return 0;
> +}
> +
> +static int nxftw_release(struct inode *inode, struct file *fp)
> +{
> +     struct nxftw_instance *nxti;
> +
> +     nxti = fp->private_data;
> +
> +     vas_win_close(nxti->window);
> +     nxti->window = NULL;
> +
> +     kfree(nxti);
> +     fp->private_data = NULL;

Flipping the order of those would be preferable though it's not actually
a bug.

> +     atomic_dec(&nxftw_instid);
> +
> +     return 0;
> +}
> +
> +static ssize_t nxftw_write(struct file *fp, const char __user *buf,
> +                     size_t len, loff_t *offsetp)
> +{
> +     return -ENOTSUPP;
> +}
> +
> +static ssize_t nxftw_read(struct file *fp, char __user *buf, size_t len,
> +                     loff_t *offsetp)
> +{
> +     return -ENOTSUPP;
> +}

Do you need those?

> +static int nxftw_vma_fault(struct vm_fault *vmf)
> +{
> +     u64 offset;
> +     unsigned long vaddr;
> +     uint64_t pbaddr_start;
> +     struct nxftw_instance *nxti;
> +     struct vm_area_struct *vma = vmf->vma;
> +
> +     nxti = vma->vm_private_data;
> +     offset = vmf->pgoff << PAGE_SHIFT;
> +     vaddr = (unsigned long)vmf->address;
> +
> +     pbaddr_start = vas_win_paste_addr(nxti->window);
> +
> +     pr_devel("%s() instance %d, pbaddr 0x%llx, vaddr 0x%lx,"
> +                     "offset %llx, pgoff 0x%lx, vma-start 0x%zx,"
> +                     "size %zd\n", __func__, nxti->instance,
> +                     pbaddr_start, vaddr, offset, vmf->pgoff,
> +                     vma->vm_start, vma->vm_end-vma->vm_start);
> +
> +     vm_insert_pfn(vma, vaddr, (pbaddr_start + offset) >> PAGE_SHIFT);
> +
> +     return VM_FAULT_NOPAGE;
> +}
> +
> +const struct vm_operations_struct nxftw_vm_ops = {
> +     .fault = nxftw_vma_fault,
> +};

Is there some particular reason you need to implement those, you appear
to be just mapping a page into the address space. Can't you just use
remap_pfn_range() in your mmap routine?

> +static int nxftw_mmap(struct file *fp, struct vm_area_struct *vma)
> +{
> +     struct nxftw_instance *nxti = fp->private_data;
> +
> +     if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
> +             pr_devel("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__,
> +                             (vma->vm_end - vma->vm_start), PAGE_SIZE);
> +             return -EINVAL;
> +     }
> +
> +     /* Ensure instance has an open send window */
> +     if (!nxti->window || !nxti->tx_win) {
> +             pr_devel("%s(): No send window open?\n", __func__);
> +             return -EINVAL;
> +     }
> +
> +     /* flags, page_prot from cxl_mmap(), except we want cachable */
> +     vma->vm_flags |= VM_IO | VM_PFNMAP;
> +     vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
> +
> +     vma->vm_ops = &nxftw_vm_ops;
> +     vma->vm_private_data = nxti;

ie. here.

See eg. opal-prd.c for an example.

> +     return 0;
> +}
> +
> +static long nxftw_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
> +{
> +     struct nxftw_instance *nxti;
> +
> +     nxti = fp->private_data;

Not used.
> +
> +     pr_devel("%s() cmd 0x%x, TX_WIN_OPEN 0x%lx\n", __func__, cmd,
> +                     VAS_TX_WIN_OPEN);

Can we drop that?

> +     switch (cmd) {
> +
> +     case VAS_TX_WIN_OPEN:
> +             return nxftw_ioc_open_tx_window(fp, arg);
> +
> +     case VAS_RX_WIN_OPEN:
> +             return nxftw_ioc_open_rx_window(fp, arg);
> +
> +     default:
> +             return -EINVAL;
> +     }
> +}
> +
> +const struct file_operations nxftw_fops = {
> +     .owner = THIS_MODULE,
> +     .open = nxftw_open,
> +     .release = nxftw_release,
> +     .read = nxftw_read,
> +     .write = nxftw_write,
> +     .mmap = nxftw_mmap,
> +     .unlocked_ioctl = nxftw_ioctl,
> +};
> +
> +
> +int nxftw_file_init(void)
> +{
> +     int rc;
> +     dev_t devno;
> +
> +     rc = alloc_chrdev_region(&nxftw_devt, 1, 1, "nx-ftw");
> +     if (rc) {
> +             pr_err("Unable to allocate nxftw major number: %i\n", rc);
> +             return rc;
> +     }
> +
> +     pr_devel("NX-FTW device allocated, dev [%i,%i]\n", MAJOR(nxftw_devt),
> +                     MINOR(nxftw_devt));
> +
> +     nxftw_dbgfs_class = class_create(THIS_MODULE, "nxftw");
> +     if (IS_ERR(nxftw_dbgfs_class)) {
> +             pr_err("Unable to create NX-FTW class\n");
> +             rc = PTR_ERR(nxftw_dbgfs_class);
> +             goto err;
> +     }
> +     nxftw_dbgfs_class->devnode = nxftw_devnode;
> +
> +     cdev_init(&nxftw_device.cdev, &nxftw_fops);
> +
> +     devno = MKDEV(MAJOR(nxftw_devt), 0);
> +     if (cdev_add(&nxftw_device.cdev, devno, 1)) {
> +             pr_err("NX-FTW: cdev_add() failed\n");
> +             goto err;
> +     }
> +
> +     nxftw_device.device = device_create(nxftw_dbgfs_class, NULL,
> +                     devno, NULL, nxftw_dev_name, MINOR(devno));
> +     if (IS_ERR(nxftw_device.device)) {
> +             pr_err("Unable to create nxftw-%d\n", MINOR(devno));
> +             goto err;
> +     }
> +
> +     pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno),
> +                     MINOR(devno));
> +     return 0;
> +
> +err:
> +     unregister_chrdev_region(nxftw_devt, 1);
> +     return rc;
> +}
> +
> +void nxftw_file_exit(void)
> +{
> +     dev_t devno;
> +
> +     pr_devel("NX-FTW: %s entered\n", __func__);
> +
> +     cdev_del(&nxftw_device.cdev);
> +     devno = MKDEV(MAJOR(nxftw_devt), MINOR(nxftw_devt));
> +     device_destroy(nxftw_dbgfs_class, devno);
> +
> +     class_destroy(nxftw_dbgfs_class);
> +     unregister_chrdev_region(nxftw_devt, 1);
> +}
> +
> +
> +/*
> + * Create a debugfs entry. Not sure what for yet, though
> + */

Please just drop it.

> +int __init nxftw_debugfs_init(void)
> +{
> +     struct dentry *ent;
> +
> +     ent = debugfs_create_dir("nxftw", NULL);
> +     if (IS_ERR(ent)) {
> +             pr_devel("nxftw: %s(): error creating dbgfs dir\n", __func__);
> +             return PTR_ERR(ent);
> +     }
> +     nxftw_debugfs = ent;
> +
> +     return 0;
> +}
> +
> +void nxftw_debugfs_exit(void)
> +{
> +     debugfs_remove_recursive(nxftw_debugfs);
> +}
> +
> +int __init nxftw_init(void)
> +{
> +     int rc;
> +
> +     rc = nxftw_file_init();
> +     if (rc)
> +             return rc;
> +
> +     rc = nxftw_debugfs_init();
> +     if (rc)
> +             goto free_file;
> +
> +     pr_err("NX-FTW Device initialized\n");

That's not an error.

> +
> +     return 0;
> +
> +free_file:
> +     nxftw_file_exit();
> +     return rc;
> +}
> +
> +void __init nxftw_exit(void)
> +{
> +     pr_devel("NX-FTW Device exiting\n");
> +     nxftw_debugfs_exit();
> +     nxftw_file_exit();
> +}
> +
> +module_init(nxftw_init);
> +module_exit(nxftw_exit);

This can't be a module, so you shouldn't be using these.

Or these:

> +MODULE_DESCRIPTION("IBM NX Fast Thread Wakeup Device");
> +MODULE_AUTHOR("Sukadev Bhattiprolu <suka...@linux.vnet.ibm.com>");
> +MODULE_LICENSE("GPL");

cheers

Re: [PATCH v6 16/17] powerpc/vas: Implement a simple FTW driver

Reply via email to