On 5/22/26 12:18 PM, John Groves wrote:
> From: John Groves <[email protected]>
> 
> Three fixes for fsdev.c:
> 
> 1. Fix memory_failure offset calculation for multi-range devices.
>    The old code subtracted ranges[0].range.start from the faulting PFN's
>    physical address, which produces an incorrect (inflated) logical offset
>    when the PFN falls in ranges[1] or beyond due to physical gaps between
>    ranges. Add fsdev_pfn_to_offset() to walk the range list and compute
>    the correct device-linear byte offset.
> 
> 2. Clear pgmap->vmemmap_shift for static DAX devices. When rebinding a
>    static device from device_dax (which may set vmemmap_shift based on
>    alignment) to fsdev_dax, the stale vmemmap_shift persists on the
>    shared pgmap. Explicitly zero it before devm_memremap_pages() so the
>    vmemmap is built for order-0 folios as fsdev requires.
> 
> 3. Clear dev_dax->pgmap on probe failure for dynamic devices. After the
>    dynamic path sets dev_dax->pgmap, if a later probe step fails, devres
>    frees the devm_kzalloc'd pgmap but leaves dev_dax->pgmap dangling.
>    Subsequent probe attempts would hit the "dynamic-dax with pre-populated
>    page map" check and fail permanently. Use a goto cleanup to NULL
>    dev_dax->pgmap on error.

3 fixes, 3 separate patches?

DJ

> 
> Fixes: d5406bd458b0a ("dax: add fsdev.c driver for fs-dax on character dax")
> Signed-off-by: John Groves <[email protected]>
> ---
>  drivers/dax/fsdev.c | 50 ++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 40 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/dax/fsdev.c b/drivers/dax/fsdev.c
> index 188b2526bee45..42aac7e952516 100644
> --- a/drivers/dax/fsdev.c
> +++ b/drivers/dax/fsdev.c
> @@ -135,11 +135,26 @@ static void fsdev_clear_ops(void *data)
>   * The core mm code in free_zone_device_folio() handles the wake_up_var()
>   * directly for this memory type.
>   */
> +static u64 fsdev_pfn_to_offset(struct dev_dax *dev_dax, unsigned long pfn)
> +{
> +     phys_addr_t phys = PFN_PHYS(pfn);
> +     u64 offset = 0;
> +
> +     for (int i = 0; i < dev_dax->nr_range; i++) {
> +             struct range *range = &dev_dax->ranges[i].range;
> +
> +             if (phys >= range->start && phys <= range->end)
> +                     return offset + (phys - range->start);
> +             offset += range_len(range);
> +     }
> +     return -1ULL;
> +}
> +
>  static int fsdev_pagemap_memory_failure(struct dev_pagemap *pgmap,
>               unsigned long pfn, unsigned long nr_pages, int mf_flags)
>  {
>       struct dev_dax *dev_dax = pgmap->owner;
> -     u64 offset = PFN_PHYS(pfn) - dev_dax->ranges[0].range.start;
> +     u64 offset = fsdev_pfn_to_offset(dev_dax, pfn);
>       u64 len = nr_pages << PAGE_SHIFT;
>  
>       return dax_holder_notify_failure(dev_dax->dax_dev, offset,
> @@ -208,6 +223,7 @@ static int fsdev_dax_probe(struct dev_dax *dev_dax)
>  {
>       struct dax_device *dax_dev = dev_dax->dax_dev;
>       struct device *dev = &dev_dax->dev;
> +     bool pgmap_allocated = false;
>       struct dev_pagemap *pgmap;
>       struct inode *inode;
>       u64 data_offset = 0;
> @@ -222,6 +238,7 @@ static int fsdev_dax_probe(struct dev_dax *dev_dax)
>               }
>  
>               pgmap = dev_dax->pgmap;
> +             pgmap->vmemmap_shift = 0;
>       } else {
>               size_t pgmap_size;
>  
> @@ -237,6 +254,7 @@ static int fsdev_dax_probe(struct dev_dax *dev_dax)
>  
>               pgmap->nr_range = dev_dax->nr_range;
>               dev_dax->pgmap = pgmap;
> +             pgmap_allocated = true;
>  
>               for (i = 0; i < dev_dax->nr_range; i++) {
>                       struct range *range = &dev_dax->ranges[i].range;
> @@ -252,7 +270,8 @@ static int fsdev_dax_probe(struct dev_dax *dev_dax)
>                                       range_len(range), dev_name(dev))) {
>                       dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve 
> range\n",
>                                i, range->start, range->end);
> -                     return -EBUSY;
> +                     rc = -EBUSY;
> +                     goto err_pgmap;
>               }
>       }
>  
> @@ -272,8 +291,10 @@ static int fsdev_dax_probe(struct dev_dax *dev_dax)
>       pgmap->owner = dev_dax;
>  
>       addr = devm_memremap_pages(dev, pgmap);
> -     if (IS_ERR(addr))
> -             return PTR_ERR(addr);
> +     if (IS_ERR(addr)) {
> +             rc = PTR_ERR(addr);
> +             goto err_pgmap;
> +     }
>  
>       /*
>        * Clear any stale compound folio state left over from a previous
> @@ -285,7 +306,7 @@ static int fsdev_dax_probe(struct dev_dax *dev_dax)
>       rc = devm_add_action_or_reset(dev, fsdev_clear_folio_state_action,
>                                     dev_dax);
>       if (rc)
> -             return rc;
> +             goto err_pgmap;
>  
>       /* Detect whether the data is at a non-zero offset into the memory */
>       if (pgmap->range.start != dev_dax->ranges[0].range.start) {
> @@ -307,23 +328,32 @@ static int fsdev_dax_probe(struct dev_dax *dev_dax)
>       cdev_set_parent(cdev, &dev->kobj);
>       rc = cdev_add(cdev, dev->devt, 1);
>       if (rc)
> -             return rc;
> +             goto err_pgmap;
>  
>       rc = devm_add_action_or_reset(dev, fsdev_cdev_del, cdev);
>       if (rc)
> -             return rc;
> +             goto err_pgmap;
>  
>       /* Set the dax operations for fs-dax access path */
>       rc = dax_set_ops(dax_dev, &dev_dax_ops);
>       if (rc)
> -             return rc;
> +             goto err_pgmap;
>  
>       rc = devm_add_action_or_reset(dev, fsdev_clear_ops, dev_dax);
>       if (rc)
> -             return rc;
> +             goto err_pgmap;
>  
>       run_dax(dax_dev);
> -     return devm_add_action_or_reset(dev, fsdev_kill, dev_dax);
> +     rc = devm_add_action_or_reset(dev, fsdev_kill, dev_dax);
> +     if (rc)
> +             goto err_pgmap;
> +
> +     return 0;
> +
> +err_pgmap:
> +     if (pgmap_allocated)
> +             dev_dax->pgmap = NULL;
> +     return rc;
>  }
>  
>  static struct dax_device_driver fsdev_dax_driver = {


Reply via email to