Mathieu Desnoyers wrote:
[..]
> The change for -EOPNOTSUPP in header and implementation would look like
> this (more questions below):
> 
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index b463502b16e1..df2d52b8a245 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -86,11 +86,7 @@ static inline void *dax_holder(struct dax_device *dax_dev)
>   static inline struct dax_device *alloc_dax(void *private,
>               const struct dax_operations *ops)
>   {
> -     /*
> -      * Callers should check IS_ENABLED(CONFIG_DAX) to know if this
> -      * NULL is an error or expected.
> -      */
> -     return NULL;
> +     return ERR_PTR(-EOPNOTSUPP);
>   }
>   static inline void put_dax(struct dax_device *dax_dev)
>   {
> diff --git a/drivers/dax/super.c b/drivers/dax/super.c
> index 8c3a6e8e6334..c1cf6f0bbe12 100644
> --- a/drivers/dax/super.c
> +++ b/drivers/dax/super.c
> @@ -448,7 +448,7 @@ struct dax_device *alloc_dax(void *private, const struct 
> dax_operations *ops)
>   
>       /* Unavailable on architectures with virtually aliased data caches. */
>       if (cpu_dcache_is_aliasing())
> -             return NULL;
> +             return ERR_PTR(-EOPNOTSUPP);
>   
>       if (WARN_ON_ONCE(ops && !ops->zero_page_range))
>               return ERR_PTR(-EINVAL);
> 
> > 
> > diff --git a/drivers/dax/super.c b/drivers/dax/super.c
> > index f4b635526345..254d3b1e420e 100644
> > --- a/drivers/dax/super.c
> > +++ b/drivers/dax/super.c
> > @@ -322,7 +322,7 @@ EXPORT_SYMBOL_GPL(dax_alive);
> >    */
> >   void kill_dax(struct dax_device *dax_dev)
> >   {
> > -   if (!dax_dev)
> > +   if (IS_ERR_OR_NULL(dax_dev))
> 
> I am tempted to just leave the "if (!dax_dev)" check here, because
> many other functions of this API are only protected by a NULL
> pointer check. I would hate to forget to convert one check in this
> change, and I don't think it simplifies anything.

It's not that it simplifies anything, but mistakes fail safely as a
memory leak rather than a crash.

> The alternative route I intend to take is to audit all callers
> of alloc_dax() and make sure they all save the alloc_dax() return
> value in a struct dax_device * local variable first for the sake
> of checking for IS_ERR(). This will leave the xyz->dax_dev pointer
> initialized to NULL in the error case and simplify the rest of
> error checking.

I could maybe get on board with that, but it needs a comment somewhere
about the asymmetric subtlety.

> 
> 
> >             return;
> >   
> >     if (dax_dev->holder_data != NULL)
> > diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> > index 4e8fdcb3f1c8..b69c9e442cf4 100644
> > --- a/drivers/nvdimm/pmem.c
> > +++ b/drivers/nvdimm/pmem.c
> > @@ -560,17 +560,19 @@ static int pmem_attach_disk(struct device *dev,
> >     dax_dev = alloc_dax(pmem, &pmem_dax_ops);
> >     if (IS_ERR(dax_dev)) {
> >             rc = PTR_ERR(dax_dev);
> > -           goto out;
> > +           if (rc != -EOPNOTSUPP)
> > +                   goto out;
> 
> If I compare the before / after this change, if previously
> pmem_attach_disk() was called in a configuration with FS_DAX=n, it would
> result in a NULL pointer dereference.

No, alloc_dax() only returns NULL CONFIG_DAX=n case, not the
CONFIG_FS_DAX=n case. So that means that pmem devices on ARM have been
possible without FS_DAX. So, in order for alloc_dax() returning
ERR_PTR(-EOPNOTSUPP) to not regress pmem device availability this error
path needs to be changed.

> This would be an error handling fix all by itself. Do we really want
> to return successfully if dax is unsupported, or should we return
> an error here ?

Per above, there is no error handling fix, and pmem block device
available should not depend on alloc_dax() succeeding.

The real question is what to do about device-dax. I *think* it is not
affected by cpu_dcache aliasing because it never accesses user mappings
through a kernel alias. I doubt device-dax is in use on these platforms,
but we might need another fixup for that if someone screams about the
alloc_dax() behavior change making them lose device-dax access.

> > +   } else {
> > +           set_dax_nocache(dax_dev);
> > +           set_dax_nomc(dax_dev);
> > +           if (is_nvdimm_sync(nd_region))
> > +                   set_dax_synchronous(dax_dev);
> > +           rc = dax_add_host(dax_dev, disk);
> > +           if (rc)
> > +                   goto out_cleanup_dax;
> > +           dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
> > +           pmem->dax_dev = dax_dev;
> >     }
> > -   set_dax_nocache(dax_dev);
> > -   set_dax_nomc(dax_dev);
> > -   if (is_nvdimm_sync(nd_region))
> > -           set_dax_synchronous(dax_dev);
> > -   rc = dax_add_host(dax_dev, disk);
> > -   if (rc)
> > -           goto out_cleanup_dax;
> > -   dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
> > -   pmem->dax_dev = dax_dev;
> >   
> >     rc = device_add_disk(dev, disk, pmem_attribute_groups);
> >     if (rc)
> > diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
> > index 4b7ecd4fd431..f911e58a24dd 100644
> > --- a/drivers/s390/block/dcssblk.c
> > +++ b/drivers/s390/block/dcssblk.c
> > @@ -681,12 +681,14 @@ dcssblk_add_store(struct device *dev, struct 
> > device_attribute *attr, const char
> >     if (IS_ERR(dev_info->dax_dev)) {
> >             rc = PTR_ERR(dev_info->dax_dev);
> >             dev_info->dax_dev = NULL;
> > -           goto put_dev;
> > +           if (rc != -EOPNOTSUPP)
> > +                   goto put_dev;
> 
> config DCSSBLK selects FS_DAX_LIMITED and DAX.
> 
> I'm not sure what selecting DAX is trying to achieve here, because the
> Kconfig option is "FS_DAX".
> 
> So depending on the real motivation behind this select, we may want to
> consider failure rather than success in the -EOPNOTSUPP case.

Ah, true, s390 is a !cpu_dcache_is_aliasing() arch, so it is ok to fail
driver load on alloc_dax() failure as it knows that ERR_PTR(-EOPNOTSUPP)
will never be returned.

> 
> > +   } else {
> > +           set_dax_synchronous(dev_info->dax_dev);
> > +           rc = dax_add_host(dev_info->dax_dev, dev_info->gd);
> > +           if (rc)
> > +                   goto out_dax;
> >     }
> > -   set_dax_synchronous(dev_info->dax_dev);
> > -   rc = dax_add_host(dev_info->dax_dev, dev_info->gd);
> > -   if (rc)
> > -           goto out_dax;
> >   
> >     get_device(&dev_info->dev);
> >     rc = device_add_disk(&dev_info->dev, dev_info->gd, NULL);
> 
> My own changes, if we want failure on -EOPNOTSUPP:
> 
> diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
> index 4b7ecd4fd431..f363c1d51d9a 100644
> --- a/drivers/s390/block/dcssblk.c
> +++ b/drivers/s390/block/dcssblk.c
> @@ -549,6 +549,7 @@ dcssblk_add_store(struct device *dev, struct 
> device_attribute *attr, const char
>       int rc, i, j, num_of_segments;
>       struct dcssblk_dev_info *dev_info;
>       struct segment_info *seg_info, *temp;
> +     struct dax_device *dax_dev;
>       char *local_buf;
>       unsigned long seg_byte_size;
>   
> @@ -677,13 +678,13 @@ dcssblk_add_store(struct device *dev, struct 
> device_attribute *attr, const char
>       if (rc)
>               goto put_dev;
>   
> -     dev_info->dax_dev = alloc_dax(dev_info, &dcssblk_dax_ops);
> -     if (IS_ERR(dev_info->dax_dev)) {
> -             rc = PTR_ERR(dev_info->dax_dev);
> -             dev_info->dax_dev = NULL;
> +     dax_dev = alloc_dax(dev_info, &dcssblk_dax_ops);
> +     if (IS_ERR(dax_dev)) {
> +             rc = PTR_ERR(dax_dev);
>               goto put_dev;
>       }
> -     set_dax_synchronous(dev_info->dax_dev);
> +     set_dax_synchronous(dax_dev);
> +     dev_info->dax_dev = dax_dev;

Looks good to me.

>       rc = dax_add_host(dev_info->dax_dev, dev_info->gd);
>       if (rc)
>               goto out_dax;
> 
> 
> > diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> > index 5f1be1da92ce..11053a70f5ab 100644
> > --- a/fs/fuse/virtio_fs.c
> > +++ b/fs/fuse/virtio_fs.c
> > @@ -16,6 +16,7 @@
> >   #include <linux/fs_context.h>
> >   #include <linux/fs_parser.h>
> >   #include <linux/highmem.h>
> > +#include <linux/cleanup.h>
> >   #include <linux/uio.h>
> >   #include "fuse_i.h"
> >   
> > @@ -795,8 +796,11 @@ static void virtio_fs_cleanup_dax(void *data)
> >     put_dax(dax_dev);
> >   }
> >   
> > +DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR_OR_NULL(_T)) 
> > virtio_fs_cleanup_dax(_T))
> > +
> >   static int virtio_fs_setup_dax(struct virtio_device *vdev, struct 
> > virtio_fs *fs)
> 
> So either I'm completely missing how ownership works in this function, or
> we should be really concerned about the fact that it does no actual
> cleanup of anything on any error.
> 
> I would be tempted to first refactor this function without using cleanup.h
> so those fixes can be easily backported to older kernels (?)
> 
> Here what I'm seeing so far:
> 
> - devm_release_mem_region() is never called after devm_request_mem_region(). 
> Not
>    on error, neither on teardown,

devm_release_mem_region() is called from virtio_fs_probe() context. That
means that when virtio_fs_probe() returns an error the driver core will
automatically call devm_request_mem_region().

> - pgmap is never freed on error after devm_kzalloc.

That is what the "devm_" in devm_kzalloc() does, free the memory on
driver-probe failure, or after the driver remove callback is invoked.

> 
> >   {
> > +   struct dax_device *dax_dev __free(cleanup_dax) = NULL;
> >     struct virtio_shm_region cache_reg;
> >     struct dev_pagemap *pgmap;
> >     bool have_cache;
> > @@ -804,6 +808,15 @@ static int virtio_fs_setup_dax(struct virtio_device 
> > *vdev, struct virtio_fs *fs)
> >     if (!IS_ENABLED(CONFIG_FUSE_DAX))
> >             return 0;
> >   
> > +   dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
> > +   if (IS_ERR(dax_dev)) {
> > +           int rc = PTR_ERR(dax_dev);
> > +
> > +           if (rc == -EOPNOTSUPP)
> > +                   return 0;
> > +           return rc;
> > +   }
> 
> What is gained by moving this allocation here ?

The gain is to fail early in virtio_fs_setup_dax() since the fundamental
dependency of alloc_dax() success is not met. For example why let the
setup progress to devm_memremap_pages() when alloc_dax() is going to
return ERR_PTR(-EOPNOTSUPP).

> 
> > +
> >     /* Get cache region */
> >     have_cache = virtio_get_shm_region(vdev, &cache_reg,
> >                                        (u8)VIRTIO_FS_SHMCAP_ID_CACHE);
> > @@ -849,10 +862,7 @@ static int virtio_fs_setup_dax(struct virtio_device 
> > *vdev, struct virtio_fs *fs)
> >     dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 
> > 0x%llx\n",
> >             __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
> >   
> > -   fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
> > -   if (IS_ERR(fs->dax_dev))
> > -           return PTR_ERR(fs->dax_dev);
> > -
> > +   fs->dax_dev = no_free_ptr(dax_dev);
> >     return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax,
> >                                     fs->dax_dev);
> >   }
> 
> In addition I have:
> 
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index f90743a94da9..86de91b35f4d 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -2054,6 +2054,7 @@ static void cleanup_mapped_device(struct mapped_device 
> *md)
>   static struct mapped_device *alloc_dev(int minor)
>   {
>       int r, numa_node_id = dm_get_numa_node();
> +     struct dax_device *dax_dev;
>       struct mapped_device *md;
>       void *old_md;
>   
> @@ -2122,16 +2123,13 @@ static struct mapped_device *alloc_dev(int minor)
>       md->disk->private_data = md;
>       sprintf(md->disk->disk_name, "dm-%d", minor);
>   
> -     if (IS_ENABLED(CONFIG_FS_DAX)) {
> -             md->dax_dev = alloc_dax(md, &dm_dax_ops);
> -             if (IS_ERR(md->dax_dev)) {
> -                     md->dax_dev = NULL;
> -             } else {
> -                     set_dax_nocache(md->dax_dev);
> -                     set_dax_nomc(md->dax_dev);
> -                     if (dax_add_host(md->dax_dev, md->disk))
> -                             goto bad;
> -             }
> +     dax_dev = alloc_dax(md, &dm_dax_ops);
> +     if (!IS_ERR(dax_dev)) {
> +             set_dax_nocache(dax_dev);
> +             set_dax_nomc(dax_dev);
> +             md->dax_dev = dax_dev;
> +             if (dax_add_host(dax_dev, md->disk))
> +                     goto bad;

Looks good.

Reply via email to