[PATCH 11/11] dax: move bdev_dax_pgoff to fs/dax.c

2021-10-17 Thread Christoph Hellwig
No functional changet, but this will allow for a tighter integration
with the iomap code, including possible passing the partition offset
in the iomap in the future.  For now it mostly avoids growing more
callers outside of fs/dax.c.

Signed-off-by: Christoph Hellwig 
---
 drivers/dax/super.c | 14 --
 fs/dax.c| 13 +
 include/linux/dax.h |  1 -
 3 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 803942586d1b6..c0910687fbcb2 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -67,20 +67,6 @@ void dax_remove_host(struct gendisk *disk)
 }
 EXPORT_SYMBOL_GPL(dax_remove_host);
 
-int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
-   pgoff_t *pgoff)
-{
-   sector_t start_sect = bdev ? get_start_sect(bdev) : 0;
-   phys_addr_t phys_off = (start_sect + sector) * 512;
-
-   if (pgoff)
-   *pgoff = PHYS_PFN(phys_off);
-   if (phys_off % PAGE_SIZE || size % PAGE_SIZE)
-   return -EINVAL;
-   return 0;
-}
-EXPORT_SYMBOL(bdev_dax_pgoff);
-
 /**
  * dax_get_by_host() - temporary lookup mechanism for filesystem-dax
  * @bdev: block device to find a dax_device for
diff --git a/fs/dax.c b/fs/dax.c
index 4e3e5a283a916..eb715363fd667 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -709,6 +709,19 @@ int dax_invalidate_mapping_entry_sync(struct address_space 
*mapping,
return __dax_invalidate_entry(mapping, index, false);
 }
 
+static int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t 
size,
+   pgoff_t *pgoff)
+{
+   sector_t start_sect = bdev ? get_start_sect(bdev) : 0;
+   phys_addr_t phys_off = (start_sect + sector) * 512;
+
+   if (pgoff)
+   *pgoff = PHYS_PFN(phys_off);
+   if (phys_off % PAGE_SIZE || size % PAGE_SIZE)
+   return -EINVAL;
+   return 0;
+}
+
 static int copy_cow_page_dax(struct block_device *bdev, struct dax_device 
*dax_dev,
 sector_t sector, struct page *to, unsigned long 
vaddr)
 {
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 439c3c70e347b..324363b798ecd 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -107,7 +107,6 @@ static inline bool daxdev_mapping_supported(struct 
vm_area_struct *vma,
 #endif
 
 struct writeback_control;
-int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
 #if IS_ENABLED(CONFIG_FS_DAX)
 int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk);
 void dax_remove_host(struct gendisk *disk);
-- 
2.30.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 10/11] dm-stripe: add a stripe_dax_pgoff helper

2021-10-17 Thread Christoph Hellwig
Add a helper to perform the entire remapping for DAX accesses.  This
helper open codes bdev_dax_pgoff given that the alignment checks have
already been done by the submitting file system and don't need to be
repeated.

Signed-off-by: Christoph Hellwig 
---
 drivers/md/dm-stripe.c | 63 ++
 1 file changed, 15 insertions(+), 48 deletions(-)

diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index f084607220293..50dba3f39274c 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -301,83 +301,50 @@ static int stripe_map(struct dm_target *ti, struct bio 
*bio)
 }
 
 #if IS_ENABLED(CONFIG_FS_DAX)
-static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
-   long nr_pages, void **kaddr, pfn_t *pfn)
+static struct dax_device *stripe_dax_pgoff(struct dm_target *ti, pgoff_t 
*pgoff)
 {
-   sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
struct stripe_c *sc = ti->private;
-   struct dax_device *dax_dev;
struct block_device *bdev;
+   sector_t dev_sector;
uint32_t stripe;
-   long ret;
 
-   stripe_map_sector(sc, sector, , _sector);
+   stripe_map_sector(sc, *pgoff * PAGE_SECTORS, , _sector);
dev_sector += sc->stripe[stripe].physical_start;
-   dax_dev = sc->stripe[stripe].dev->dax_dev;
bdev = sc->stripe[stripe].dev->bdev;
 
-   ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, );
-   if (ret)
-   return ret;
+   *pgoff = (get_start_sect(bdev) + dev_sector) >> PAGE_SECTORS_SHIFT;
+   return sc->stripe[stripe].dev->dax_dev;
+}
+
+static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
+   long nr_pages, void **kaddr, pfn_t *pfn)
+{
+   struct dax_device *dax_dev = stripe_dax_pgoff(ti, );
+
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
 }
 
 static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
 {
-   sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
-   struct stripe_c *sc = ti->private;
-   struct dax_device *dax_dev;
-   struct block_device *bdev;
-   uint32_t stripe;
-
-   stripe_map_sector(sc, sector, , _sector);
-   dev_sector += sc->stripe[stripe].physical_start;
-   dax_dev = sc->stripe[stripe].dev->dax_dev;
-   bdev = sc->stripe[stripe].dev->bdev;
+   struct dax_device *dax_dev = stripe_dax_pgoff(ti, );
 
-   if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), ))
-   return 0;
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
 static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
 {
-   sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
-   struct stripe_c *sc = ti->private;
-   struct dax_device *dax_dev;
-   struct block_device *bdev;
-   uint32_t stripe;
-
-   stripe_map_sector(sc, sector, , _sector);
-   dev_sector += sc->stripe[stripe].physical_start;
-   dax_dev = sc->stripe[stripe].dev->dax_dev;
-   bdev = sc->stripe[stripe].dev->bdev;
+   struct dax_device *dax_dev = stripe_dax_pgoff(ti, );
 
-   if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), ))
-   return 0;
return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
 static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
  size_t nr_pages)
 {
-   int ret;
-   sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
-   struct stripe_c *sc = ti->private;
-   struct dax_device *dax_dev;
-   struct block_device *bdev;
-   uint32_t stripe;
+   struct dax_device *dax_dev = stripe_dax_pgoff(ti, );
 
-   stripe_map_sector(sc, sector, , _sector);
-   dev_sector += sc->stripe[stripe].physical_start;
-   dax_dev = sc->stripe[stripe].dev->dax_dev;
-   bdev = sc->stripe[stripe].dev->bdev;
-
-   ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, );
-   if (ret)
-   return ret;
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
 }
 
-- 
2.30.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 09/11] dm-log-writes: add a log_writes_dax_pgoff helper

2021-10-17 Thread Christoph Hellwig
Add a helper to perform the entire remapping for DAX accesses.  This
helper open codes bdev_dax_pgoff given that the alignment checks have
already been done by the submitting file system and don't need to be
repeated.

Signed-off-by: Christoph Hellwig 
---
 drivers/md/dm-log-writes.c | 42 +++---
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 6d694526881d0..5aac60c1b774c 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -949,17 +949,21 @@ static int log_dax(struct log_writes_c *lc, sector_t 
sector, size_t bytes,
return 0;
 }
 
+static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti,
+   pgoff_t *pgoff)
+{
+   struct log_writes_c *lc = ti->private;
+
+   *pgoff += (get_start_sect(lc->dev->bdev) >> PAGE_SECTORS_SHIFT);
+   return lc->dev->dax_dev;
+}
+
 static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
 long nr_pages, void **kaddr, pfn_t 
*pfn)
 {
-   struct log_writes_c *lc = ti->private;
-   sector_t sector = pgoff * PAGE_SECTORS;
-   int ret;
+   struct dax_device *dax_dev = log_writes_dax_pgoff(ti, );
 
-   ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages * PAGE_SIZE, 
);
-   if (ret)
-   return ret;
-   return dax_direct_access(lc->dev->dax_dev, pgoff, nr_pages, kaddr, pfn);
+   return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
 }
 
 static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
@@ -968,11 +972,9 @@ static size_t log_writes_dax_copy_from_iter(struct 
dm_target *ti,
 {
struct log_writes_c *lc = ti->private;
sector_t sector = pgoff * PAGE_SECTORS;
+   struct dax_device *dax_dev = log_writes_dax_pgoff(ti, );
int err;
 
-   if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), 
))
-   return 0;
-
/* Don't bother doing anything if logging has been disabled */
if (!lc->logging_enabled)
goto dax_copy;
@@ -983,34 +985,24 @@ static size_t log_writes_dax_copy_from_iter(struct 
dm_target *ti,
return 0;
}
 dax_copy:
-   return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
+   return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
 static size_t log_writes_dax_copy_to_iter(struct dm_target *ti,
  pgoff_t pgoff, void *addr, size_t 
bytes,
  struct iov_iter *i)
 {
-   struct log_writes_c *lc = ti->private;
-   sector_t sector = pgoff * PAGE_SECTORS;
+   struct dax_device *dax_dev = log_writes_dax_pgoff(ti, );
 
-   if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), 
))
-   return 0;
-   return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
+   return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
 static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
  size_t nr_pages)
 {
-   int ret;
-   struct log_writes_c *lc = ti->private;
-   sector_t sector = pgoff * PAGE_SECTORS;
+   struct dax_device *dax_dev = log_writes_dax_pgoff(ti, );
 
-   ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages << PAGE_SHIFT,
-);
-   if (ret)
-   return ret;
-   return dax_zero_page_range(lc->dev->dax_dev, pgoff,
-  nr_pages << PAGE_SHIFT);
+   return dax_zero_page_range(dax_dev, pgoff, nr_pages << PAGE_SHIFT);
 }
 
 #else
-- 
2.30.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 06/11] xfs: factor out a xfs_setup_dax helper

2021-10-17 Thread Christoph Hellwig
Factor out another DAX setup helper to simplify future changes.  Also
move the experimental warning after the checks to not clutter the log
too much if the setup failed.

Signed-off-by: Christoph Hellwig 
---
 fs/xfs/xfs_super.c | 47 +++---
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index c4e0cd1c1c8ca..d07020a8eb9e3 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -339,6 +339,32 @@ xfs_buftarg_is_dax(
bdev_nr_sectors(bt->bt_bdev));
 }
 
+static int
+xfs_setup_dax(
+   struct xfs_mount*mp)
+{
+   struct super_block  *sb = mp->m_super;
+
+   if (!xfs_buftarg_is_dax(sb, mp->m_ddev_targp) &&
+  (!mp->m_rtdev_targp || !xfs_buftarg_is_dax(sb, mp->m_rtdev_targp))) {
+   xfs_alert(mp,
+   "DAX unsupported by block device. Turning off DAX.");
+   goto disable_dax;
+   }
+
+   if (xfs_has_reflink(mp)) {
+   xfs_alert(mp, "DAX and reflink cannot be used together!");
+   return -EINVAL;
+   }
+
+   xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own 
risk");
+   return 0;
+
+disable_dax:
+   xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
+   return 0;
+}
+
 STATIC int
 xfs_blkdev_get(
xfs_mount_t *mp,
@@ -1592,26 +1618,9 @@ xfs_fs_fill_super(
sb->s_flags |= SB_I_VERSION;
 
if (xfs_has_dax_always(mp)) {
-   bool rtdev_is_dax = false, datadev_is_dax;
-
-   xfs_warn(mp,
-   "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
-
-   datadev_is_dax = xfs_buftarg_is_dax(sb, mp->m_ddev_targp);
-   if (mp->m_rtdev_targp)
-   rtdev_is_dax = xfs_buftarg_is_dax(sb,
-   mp->m_rtdev_targp);
-   if (!rtdev_is_dax && !datadev_is_dax) {
-   xfs_alert(mp,
-   "DAX unsupported by block device. Turning off DAX.");
-   xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
-   }
-   if (xfs_has_reflink(mp)) {
-   xfs_alert(mp,
-   "DAX and reflink cannot be used together!");
-   error = -EINVAL;
+   error = xfs_setup_dax(mp);
+   if (error)
goto out_filestream_unmount;
-   }
}
 
if (xfs_has_discard(mp)) {
-- 
2.30.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 07/11] dax: remove dax_capable

2021-10-17 Thread Christoph Hellwig
Just open code the block size and dax_dev == NULL checks in the callers.

Signed-off-by: Christoph Hellwig 
---
 drivers/dax/super.c  | 36 
 drivers/md/dm-table.c| 22 +++---
 drivers/md/dm.c  | 21 -
 drivers/md/dm.h  |  4 
 drivers/nvdimm/pmem.c|  1 -
 drivers/s390/block/dcssblk.c |  1 -
 fs/erofs/super.c | 11 +++
 fs/ext2/super.c  |  6 --
 fs/ext4/super.c  |  9 ++---
 fs/xfs/xfs_super.c   | 21 -
 include/linux/dax.h  | 14 --
 11 files changed, 36 insertions(+), 110 deletions(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 482fe775324a4..803942586d1b6 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -108,42 +108,6 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device 
*bdev)
return dax_dev;
 }
 EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
-
-bool generic_fsdax_supported(struct dax_device *dax_dev,
-   struct block_device *bdev, int blocksize, sector_t start,
-   sector_t sectors)
-{
-   if (blocksize != PAGE_SIZE) {
-   pr_info("%pg: error: unsupported blocksize for dax\n", bdev);
-   return false;
-   }
-
-   if (!dax_dev) {
-   pr_debug("%pg: error: dax unsupported by block device\n", bdev);
-   return false;
-   }
-
-   return true;
-}
-EXPORT_SYMBOL_GPL(generic_fsdax_supported);
-
-bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
-   int blocksize, sector_t start, sector_t len)
-{
-   bool ret = false;
-   int id;
-
-   if (!dax_dev)
-   return false;
-
-   id = dax_read_lock();
-   if (dax_alive(dax_dev) && dax_dev->ops->dax_supported)
-   ret = dax_dev->ops->dax_supported(dax_dev, bdev, blocksize,
- start, len);
-   dax_read_unlock(id);
-   return ret;
-}
-EXPORT_SYMBOL_GPL(dax_supported);
 #endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
 
 enum dax_device_flags {
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 1fa4d5582dca5..4ae671c2168ea 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -807,12 +807,14 @@ void dm_table_set_type(struct dm_table *t, enum 
dm_queue_mode type)
 EXPORT_SYMBOL_GPL(dm_table_set_type);
 
 /* validate the dax capability of the target device span */
-int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
+static int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
 {
-   int blocksize = *(int *) data;
+   if (dev->dax_dev)
+   return false;
 
-   return !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
+   pr_debug("%pg: error: dax unsupported by block device\n", dev->bdev);
+   return true;
 }
 
 /* Check devices support synchronous DAX */
@@ -822,8 +824,8 @@ static int device_not_dax_synchronous_capable(struct 
dm_target *ti, struct dm_de
return !dev->dax_dev || !dax_synchronous(dev->dax_dev);
 }
 
-bool dm_table_supports_dax(struct dm_table *t,
-  iterate_devices_callout_fn iterate_fn, int 
*blocksize)
+static bool dm_table_supports_dax(struct dm_table *t,
+  iterate_devices_callout_fn iterate_fn)
 {
struct dm_target *ti;
unsigned i;
@@ -836,7 +838,7 @@ bool dm_table_supports_dax(struct dm_table *t,
return false;
 
if (!ti->type->iterate_devices ||
-   ti->type->iterate_devices(ti, iterate_fn, blocksize))
+   ti->type->iterate_devices(ti, iterate_fn, NULL))
return false;
}
 
@@ -863,7 +865,6 @@ static int dm_table_determine_type(struct dm_table *t)
struct dm_target *tgt;
struct list_head *devices = dm_table_get_devices(t);
enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
-   int page_size = PAGE_SIZE;
 
if (t->type != DM_TYPE_NONE) {
/* target already set the table's type */
@@ -907,7 +908,7 @@ static int dm_table_determine_type(struct dm_table *t)
 verify_bio_based:
/* We must use this table as bio-based */
t->type = DM_TYPE_BIO_BASED;
-   if (dm_table_supports_dax(t, device_not_dax_capable, 
_size) ||
+   if (dm_table_supports_dax(t, device_not_dax_capable) ||
(list_empty(devices) && live_md_type == 
DM_TYPE_DAX_BIO_BASED)) {
t->type = DM_TYPE_DAX_BIO_BASED;
}
@@ -1981,7 +1982,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct 
request_queue *q,
  struct queue_limits *limits)
 {
bool wc = false, fua = false;
-   int 

[PATCH 08/11] dm-linear: add a linear_dax_pgoff helper

2021-10-17 Thread Christoph Hellwig
Add a helper to perform the entire remapping for DAX accesses.  This
helper open codes bdev_dax_pgoff given that the alignment checks have
already been done by the submitting file system and don't need to be
repeated.

Signed-off-by: Christoph Hellwig 
---
 drivers/md/dm-linear.c | 49 +-
 1 file changed, 15 insertions(+), 34 deletions(-)

diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 32fbab11bf90c..bf03f73fd0f36 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -164,63 +164,44 @@ static int linear_iterate_devices(struct dm_target *ti,
 }
 
 #if IS_ENABLED(CONFIG_FS_DAX)
+static struct dax_device *linear_dax_pgoff(struct dm_target *ti, pgoff_t 
*pgoff)
+{
+   struct linear_c *lc = ti->private;
+   sector_t sector = linear_map_sector(ti, *pgoff << PAGE_SECTORS_SHIFT);
+
+   *pgoff = (get_start_sect(lc->dev->bdev) + sector) >> PAGE_SECTORS_SHIFT;
+   return lc->dev->dax_dev;
+}
+
 static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn)
 {
-   long ret;
-   struct linear_c *lc = ti->private;
-   struct block_device *bdev = lc->dev->bdev;
-   struct dax_device *dax_dev = lc->dev->dax_dev;
-   sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
-
-   dev_sector = linear_map_sector(ti, sector);
-   ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, );
-   if (ret)
-   return ret;
+   struct dax_device *dax_dev = linear_dax_pgoff(ti, );
+
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
 }
 
 static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
 {
-   struct linear_c *lc = ti->private;
-   struct block_device *bdev = lc->dev->bdev;
-   struct dax_device *dax_dev = lc->dev->dax_dev;
-   sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
+   struct dax_device *dax_dev = linear_dax_pgoff(ti, );
 
-   dev_sector = linear_map_sector(ti, sector);
-   if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), ))
-   return 0;
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
 static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
 {
-   struct linear_c *lc = ti->private;
-   struct block_device *bdev = lc->dev->bdev;
-   struct dax_device *dax_dev = lc->dev->dax_dev;
-   sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
+   struct dax_device *dax_dev = linear_dax_pgoff(ti, );
 
-   dev_sector = linear_map_sector(ti, sector);
-   if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), ))
-   return 0;
return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
 static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
  size_t nr_pages)
 {
-   int ret;
-   struct linear_c *lc = ti->private;
-   struct block_device *bdev = lc->dev->bdev;
-   struct dax_device *dax_dev = lc->dev->dax_dev;
-   sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
-
-   dev_sector = linear_map_sector(ti, sector);
-   ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, );
-   if (ret)
-   return ret;
+   struct dax_device *dax_dev = linear_dax_pgoff(ti, );
+
return dax_zero_page_range(dax_dev, pgoff, nr_pages);
 }
 
-- 
2.30.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 05/11] dax: move the partition alignment check into fs_dax_get_by_bdev

2021-10-17 Thread Christoph Hellwig
fs_dax_get_by_bdev is the primary interface to find a dax device for a
block device, so move the partition alignment check there instead of
wiring it up through ->dax_supported.

Signed-off-by: Christoph Hellwig 
---
 drivers/dax/super.c | 23 ++-
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 04fc680542e8d..482fe775324a4 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -93,6 +93,12 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device 
*bdev)
if (!blk_queue_dax(bdev->bd_disk->queue))
return NULL;
 
+   if ((get_start_sect(bdev) * SECTOR_SIZE) % PAGE_SIZE ||
+   (bdev_nr_sectors(bdev) * SECTOR_SIZE) % PAGE_SIZE) {
+   pr_info("%pg: error: unaligned partition for dax\n", bdev);
+   return NULL;
+   }
+
id = dax_read_lock();
dax_dev = xa_load(_hosts, (unsigned long)bdev->bd_disk);
if (!dax_dev || !dax_alive(dax_dev) || !igrab(_dev->inode))
@@ -107,10 +113,6 @@ bool generic_fsdax_supported(struct dax_device *dax_dev,
struct block_device *bdev, int blocksize, sector_t start,
sector_t sectors)
 {
-   pgoff_t pgoff, pgoff_end;
-   sector_t last_page;
-   int err;
-
if (blocksize != PAGE_SIZE) {
pr_info("%pg: error: unsupported blocksize for dax\n", bdev);
return false;
@@ -121,19 +123,6 @@ bool generic_fsdax_supported(struct dax_device *dax_dev,
return false;
}
 
-   err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, );
-   if (err) {
-   pr_info("%pg: error: unaligned partition for dax\n", bdev);
-   return false;
-   }
-
-   last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
-   err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, _end);
-   if (err) {
-   pr_info("%pg: error: unaligned partition for dax\n", bdev);
-   return false;
-   }
-
return true;
 }
 EXPORT_SYMBOL_GPL(generic_fsdax_supported);
-- 
2.30.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 01/11] dm: make the DAX support dependend on CONFIG_FS_DAX

2021-10-17 Thread Christoph Hellwig
The device mapper DAX support is all hanging off a block device and thus
can't be used with device dax.  Make it depend on CONFIG_FS_DAX instead
of CONFIG_DAX_DRIVER.  This also means that bdev_dax_pgoff only needs to
be built under CONFIG_FS_DAX now.

Signed-off-by: Christoph Hellwig 
---
 drivers/dax/super.c| 6 ++
 drivers/md/dm-linear.c | 2 +-
 drivers/md/dm-log-writes.c | 2 +-
 drivers/md/dm-stripe.c | 2 +-
 drivers/md/dm-writecache.c | 2 +-
 drivers/md/dm.c| 2 +-
 6 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index b882cf8106ea3..e20d0cef10a18 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -63,7 +63,7 @@ static int dax_host_hash(const char *host)
return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
 }
 
-#ifdef CONFIG_BLOCK
+#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
 #include 
 
 int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
@@ -80,7 +80,6 @@ int bdev_dax_pgoff(struct block_device *bdev, sector_t 
sector, size_t size,
 }
 EXPORT_SYMBOL(bdev_dax_pgoff);
 
-#if IS_ENABLED(CONFIG_FS_DAX)
 /**
  * dax_get_by_host() - temporary lookup mechanism for filesystem-dax
  * @host: alternate name for the device registered by a dax driver
@@ -219,8 +218,7 @@ bool dax_supported(struct dax_device *dax_dev, struct 
block_device *bdev,
return ret;
 }
 EXPORT_SYMBOL_GPL(dax_supported);
-#endif /* CONFIG_FS_DAX */
-#endif /* CONFIG_BLOCK */
+#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
 
 enum dax_device_flags {
/* !alive + rcu grace period == no new operations / mappings */
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 679b4c0a2eea1..32fbab11bf90c 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -163,7 +163,7 @@ static int linear_iterate_devices(struct dm_target *ti,
return fn(ti, lc->dev, lc->start, ti->len, data);
 }
 
-#if IS_ENABLED(CONFIG_DAX_DRIVER)
+#if IS_ENABLED(CONFIG_FS_DAX)
 static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn)
 {
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index d93a4db235124..6d694526881d0 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -903,7 +903,7 @@ static void log_writes_io_hints(struct dm_target *ti, 
struct queue_limits *limit
limits->io_min = limits->physical_block_size;
 }
 
-#if IS_ENABLED(CONFIG_DAX_DRIVER)
+#if IS_ENABLED(CONFIG_FS_DAX)
 static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
   struct iov_iter *i)
 {
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 6660b6b53d5bf..f084607220293 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -300,7 +300,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
 }
 
-#if IS_ENABLED(CONFIG_DAX_DRIVER)
+#if IS_ENABLED(CONFIG_FS_DAX)
 static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn)
 {
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 18320444fb0a9..4c3a6e33604d3 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -38,7 +38,7 @@
 #define BITMAP_GRANULARITY PAGE_SIZE
 #endif
 
-#if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_DAX_DRIVER)
+#if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_FS_DAX)
 #define DM_WRITECACHE_HAS_PMEM
 #endif
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7870e6460633f..79737aee516b1 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1783,7 +1783,7 @@ static struct mapped_device *alloc_dev(int minor)
md->disk->private_data = md;
sprintf(md->disk->disk_name, "dm-%d", minor);
 
-   if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
+   if (IS_ENABLED(CONFIG_FS_DAX)) {
md->dax_dev = alloc_dax(md, md->disk->disk_name,
_dax_ops, 0);
if (IS_ERR(md->dax_dev))
-- 
2.30.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 04/11] dax: remove the pgmap sanity checks in generic_fsdax_supported

2021-10-17 Thread Christoph Hellwig
Drivers that register a dax_dev should make sure it works, no need
to double check from the file system.

Signed-off-by: Christoph Hellwig 
---
 drivers/dax/super.c | 49 +
 1 file changed, 1 insertion(+), 48 deletions(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 9383c11b21853..04fc680542e8d 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -107,13 +107,9 @@ bool generic_fsdax_supported(struct dax_device *dax_dev,
struct block_device *bdev, int blocksize, sector_t start,
sector_t sectors)
 {
-   bool dax_enabled = false;
pgoff_t pgoff, pgoff_end;
-   void *kaddr, *end_kaddr;
-   pfn_t pfn, end_pfn;
sector_t last_page;
-   long len, len2;
-   int err, id;
+   int err;
 
if (blocksize != PAGE_SIZE) {
pr_info("%pg: error: unsupported blocksize for dax\n", bdev);
@@ -138,49 +134,6 @@ bool generic_fsdax_supported(struct dax_device *dax_dev,
return false;
}
 
-   id = dax_read_lock();
-   len = dax_direct_access(dax_dev, pgoff, 1, , );
-   len2 = dax_direct_access(dax_dev, pgoff_end, 1, _kaddr, _pfn);
-
-   if (len < 1 || len2 < 1) {
-   pr_info("%pg: error: dax access failed (%ld)\n",
-   bdev, len < 1 ? len : len2);
-   dax_read_unlock(id);
-   return false;
-   }
-
-   if (IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) {
-   /*
-* An arch that has enabled the pmem api should also
-* have its drivers support pfn_t_devmap()
-*
-* This is a developer warning and should not trigger in
-* production. dax_flush() will crash since it depends
-* on being able to do (page_address(pfn_to_page())).
-*/
-   WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
-   dax_enabled = true;
-   } else if (pfn_t_devmap(pfn) && pfn_t_devmap(end_pfn)) {
-   struct dev_pagemap *pgmap, *end_pgmap;
-
-   pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL);
-   end_pgmap = get_dev_pagemap(pfn_t_to_pfn(end_pfn), NULL);
-   if (pgmap && pgmap == end_pgmap && pgmap->type == 
MEMORY_DEVICE_FS_DAX
-   && pfn_t_to_page(pfn)->pgmap == pgmap
-   && pfn_t_to_page(end_pfn)->pgmap == pgmap
-   && pfn_t_to_pfn(pfn) == PHYS_PFN(__pa(kaddr))
-   && pfn_t_to_pfn(end_pfn) == 
PHYS_PFN(__pa(end_kaddr)))
-   dax_enabled = true;
-   put_dev_pagemap(pgmap);
-   put_dev_pagemap(end_pgmap);
-
-   }
-   dax_read_unlock(id);
-
-   if (!dax_enabled) {
-   pr_info("%pg: error: dax support not enabled\n", bdev);
-   return false;
-   }
return true;
 }
 EXPORT_SYMBOL_GPL(generic_fsdax_supported);
-- 
2.30.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 02/11] dax: remove CONFIG_DAX_DRIVER

2021-10-17 Thread Christoph Hellwig
CONFIG_DAX_DRIVER only selects CONFIG_DAX now, so remove it.

Signed-off-by: Christoph Hellwig 
---
 drivers/dax/Kconfig| 4 
 drivers/nvdimm/Kconfig | 2 +-
 drivers/s390/block/Kconfig | 2 +-
 fs/fuse/Kconfig| 2 +-
 4 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index d2834c2cfa10d..954ab14ba7778 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -1,8 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-config DAX_DRIVER
-   select DAX
-   bool
-
 menuconfig DAX
tristate "DAX: direct access to differentiated memory"
select SRCU
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index b7d1eb38b27d4..347fe7afa5830 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -22,7 +22,7 @@ if LIBNVDIMM
 config BLK_DEV_PMEM
tristate "PMEM: Persistent memory block device support"
default LIBNVDIMM
-   select DAX_DRIVER
+   select DAX
select ND_BTT if BTT
select ND_PFN if NVDIMM_PFN
help
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig
index d0416dbd0cd81..e3710a762abae 100644
--- a/drivers/s390/block/Kconfig
+++ b/drivers/s390/block/Kconfig
@@ -5,7 +5,7 @@ comment "S/390 block device drivers"
 config DCSSBLK
def_tristate m
select FS_DAX_LIMITED
-   select DAX_DRIVER
+   select DAX
prompt "DCSSBLK support"
depends on S390 && BLOCK
help
diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index 40ce9a1c12e5d..038ed0b9aaa5d 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -45,7 +45,7 @@ config FUSE_DAX
select INTERVAL_TREE
depends on VIRTIO_FS
depends on FS_DAX
-   depends on DAX_DRIVER
+   depends on DAX
help
  This allows bypassing guest page cache and allows mapping host page
  cache directly in guest address space.
-- 
2.30.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


futher decouple DAX from block devices

2021-10-17 Thread Christoph Hellwig
Hi Dan,

this series cleans up and simplifies the association between DAX and block
devices in preparation of allowing to mount file systems directly on DAX
devices without a detour through block devices.

Diffstat:
 drivers/dax/Kconfig  |4 
 drivers/dax/bus.c|2 
 drivers/dax/super.c  |  220 +--
 drivers/md/dm-linear.c   |   51 +++--
 drivers/md/dm-log-writes.c   |   44 +++-
 drivers/md/dm-stripe.c   |   65 +++-
 drivers/md/dm-table.c|   22 ++--
 drivers/md/dm-writecache.c   |2 
 drivers/md/dm.c  |   29 -
 drivers/md/dm.h  |4 
 drivers/nvdimm/Kconfig   |2 
 drivers/nvdimm/pmem.c|9 -
 drivers/s390/block/Kconfig   |2 
 drivers/s390/block/dcssblk.c |   12 +-
 fs/dax.c |   13 ++
 fs/erofs/super.c |   11 +-
 fs/ext2/super.c  |6 -
 fs/ext4/super.c  |9 +
 fs/fuse/Kconfig  |2 
 fs/fuse/virtio_fs.c  |2 
 fs/xfs/xfs_super.c   |   54 +-
 include/linux/dax.h  |   30 ++---
 22 files changed, 185 insertions(+), 410 deletions(-)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 03/11] dax: simplify the dax_device <-> gendisk association

2021-10-17 Thread Christoph Hellwig
Replace the dax_host_hash with an xarray indexed by the pointer value
of the gendisk, and require explicitl calls from the block drivers that
want to associate their gendisk with a dax_device.

Signed-off-by: Christoph Hellwig 
---
 drivers/dax/bus.c|   2 +-
 drivers/dax/super.c  | 106 +--
 drivers/md/dm.c  |   6 +-
 drivers/nvdimm/pmem.c|   8 ++-
 drivers/s390/block/dcssblk.c |  11 +++-
 fs/fuse/virtio_fs.c  |   2 +-
 include/linux/dax.h  |  19 +--
 7 files changed, 60 insertions(+), 94 deletions(-)

diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 6cc4da4c713d9..6d91b0186e3be 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -1326,7 +1326,7 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data 
*data)
 * No 'host' or dax_operations since there is no access to this
 * device outside of mmap of the resulting character device.
 */
-   dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
+   dax_dev = alloc_dax(dev_dax, NULL, DAXDEV_F_SYNC);
if (IS_ERR(dax_dev)) {
rc = PTR_ERR(dax_dev);
goto err_alloc_dax;
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index e20d0cef10a18..9383c11b21853 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -7,10 +7,8 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -26,10 +24,8 @@
  * @flags: state and boolean properties
  */
 struct dax_device {
-   struct hlist_node list;
struct inode inode;
struct cdev cdev;
-   const char *host;
void *private;
unsigned long flags;
const struct dax_operations *ops;
@@ -42,10 +38,6 @@ static DEFINE_IDA(dax_minor_ida);
 static struct kmem_cache *dax_cache __read_mostly;
 static struct super_block *dax_superblock __read_mostly;
 
-#define DAX_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head))
-static struct hlist_head dax_host_list[DAX_HASH_SIZE];
-static DEFINE_SPINLOCK(dax_host_lock);
-
 int dax_read_lock(void)
 {
return srcu_read_lock(_srcu);
@@ -58,13 +50,22 @@ void dax_read_unlock(int id)
 }
 EXPORT_SYMBOL_GPL(dax_read_unlock);
 
-static int dax_host_hash(const char *host)
+#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
+#include 
+
+static DEFINE_XARRAY(dax_hosts);
+
+int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk)
 {
-   return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
+   return xa_insert(_hosts, (unsigned long)disk, dax_dev, GFP_KERNEL);
 }
+EXPORT_SYMBOL_GPL(dax_add_host);
 
-#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
-#include 
+void dax_remove_host(struct gendisk *disk)
+{
+   xa_erase(_hosts, (unsigned long)disk);
+}
+EXPORT_SYMBOL_GPL(dax_remove_host);
 
 int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
pgoff_t *pgoff)
@@ -82,40 +83,23 @@ EXPORT_SYMBOL(bdev_dax_pgoff);
 
 /**
  * dax_get_by_host() - temporary lookup mechanism for filesystem-dax
- * @host: alternate name for the device registered by a dax driver
+ * @bdev: block device to find a dax_device for
  */
-static struct dax_device *dax_get_by_host(const char *host)
+struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
 {
-   struct dax_device *dax_dev, *found = NULL;
-   int hash, id;
+   struct dax_device *dax_dev;
+   int id;
 
-   if (!host)
+   if (!blk_queue_dax(bdev->bd_disk->queue))
return NULL;
 
-   hash = dax_host_hash(host);
-
id = dax_read_lock();
-   spin_lock(_host_lock);
-   hlist_for_each_entry(dax_dev, _host_list[hash], list) {
-   if (!dax_alive(dax_dev)
-   || strcmp(host, dax_dev->host) != 0)
-   continue;
-
-   if (igrab(_dev->inode))
-   found = dax_dev;
-   break;
-   }
-   spin_unlock(_host_lock);
+   dax_dev = xa_load(_hosts, (unsigned long)bdev->bd_disk);
+   if (!dax_dev || !dax_alive(dax_dev) || !igrab(_dev->inode))
+   dax_dev = NULL;
dax_read_unlock(id);
 
-   return found;
-}
-
-struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
-{
-   if (!blk_queue_dax(bdev->bd_disk->queue))
-   return NULL;
-   return dax_get_by_host(bdev->bd_disk->disk_name);
+   return dax_dev;
 }
 EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
 
@@ -361,12 +345,7 @@ void kill_dax(struct dax_device *dax_dev)
return;
 
clear_bit(DAXDEV_ALIVE, _dev->flags);
-
synchronize_srcu(_srcu);
-
-   spin_lock(_host_lock);
-   hlist_del_init(_dev->list);
-   spin_unlock(_host_lock);
 }
 EXPORT_SYMBOL_GPL(kill_dax);
 
@@ -398,8 +377,6 @@ static struct dax_device *to_dax_dev(struct inode *inode)
 static void dax_free_inode(struct inode *inode)
 {
struct dax_device 

Re: [PATCH v5 12/16] PCI: Add pci_iomap_host_shared(), pci_iomap_host_shared_range()

2021-10-17 Thread Thomas Gleixner
On Mon, Oct 18 2021 at 02:55, Thomas Gleixner wrote:
> On Sun, Oct 10 2021 at 15:11, Andi Kleen wrote:
>> The 5.15 tree has something like ~2.4k IO accesses (including MMIO and 
>> others) in init functions that also register drivers (thanks Elena for 
>> the number)
>
> These numbers are completely useless simply because they are based on
> nonsensical criteria. See:
>
>   https://lore.kernel.org/r/87r1cj2uad.ffs@tglx
>
>> My point is just that the ecosystem of devices that Linux supports is 
>> messy enough that there are legitimate exceptions from the "First IO 
>> only in probe call only" rule.
>
> Your point is based on your outright refusal to actualy do a proper
> analysis and your outright refusal to help fixing the real problems.
>
> All you have provided so far is handwaving based on a completely useless
> analysis.
>
> Sure, your goal is to get this TDX problem solved, but it's not going to
> be solved by:
>
>   1) Providing a nonsensical analysis
>
>   2) Using #1 as an argument to hack some half baken interfaces into the
>  kernel which allow you to tick off your checkbox and then leave the
>  resulting mess for others to clean up.
>  
> Try again when you have factual data to back up your claims and factual
> arguments which prove that the problem can't be fixed otherwise.
>
> I might be repeating myself, but kernel development works this way:
>
>   1) Hack your private POC - Yay!
>
>   2) Sit down and think hard about the problems you identified in step
>  #1. Do a thorough analysis.
>   
>   3) Come up with a sensible integration plan.
>
>   4) Do the necessary grump work of cleanups all over the place
>
>   5) Add sensible infrastructure which is understandable for the bulk
>  of kernel/driver developers
>
>   6) Let your feature fall in place
>
> and not in the way you are insisting on:
>
>   1) Hack your private POC - Yay!
>
>   2) Define that this is the only way to do it and try to shove it down
>  the throat of everyone.
>
>   3) Getting told that this is not the way it works
>
>   4) Insist on it forever and blame the grumpy maintainers who are just
>  not understanding the great value of your approach.
>
>   5) Go back to #2
>
> You should know that already, but I have no problem to give that lecture
> to you over and over again. I probably should create a form letter.
>
> And no, you can bitch about me as much as you want. These are not my
> personal rules and personal pet pieves. These are rules Linus cares
> about very much and aside of that they just reflect common sense.
>
>   The kernel is a common good and not the dump ground for your personal
>   brain waste.
>
>   The kernel does not serve Intel. Quite the contrary Intel depends on
>   the kernel to work nicely with it's hardware. Ergo, Intel should have
>   a vested interest to serve the kernel and take responsibility for it
>   as a whole. And so should you as an Intel employee.
>
> Just dumping your next half baken workaround does not cut it especially
> not when it is not backed up by sensible arguments.
>
> Please try again, but not before you have something substantial to back
> up your claims.

That said, I can't resist the urge to say a few words to the responsible
senior and management people at Intel in this context:

I surely know that a lot of Intel people claim that their lack of
progress is _only_ because Thomas is hard to work with and Thomas wants
unreasonable changes to their code, which I could perceive as an abuse of
myself for the purpose of self-deception. TBH, I don't give a damn.

Let me ask a few questions instead:

  - Is it unreasonable to expect that argumentations are based on facts
and proper analysis?

  - Is it unreasonable to expect a proper integration of a new feature?

  - Does it take unreasonable effort to do a proper design?

  - Is it unreasonable to ask that he necessary cleanups are done
upfront?

If anyone of the responsible people at Intel thinks so, then they should
speak up now and tell me in public and into my face what's so
unreasonable about that.

Thanks,

Thomas
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v5 12/16] PCI: Add pci_iomap_host_shared(), pci_iomap_host_shared_range()

2021-10-17 Thread Thomas Gleixner
Andi,

On Sun, Oct 10 2021 at 15:11, Andi Kleen wrote:
> On 10/9/2021 1:39 PM, Dan Williams wrote:
>> I agree with you and Greg here. If a driver is accessing hardware
>> resources outside of the bind lifetime of one of the devices it
>> supports, and in a way that neither modrobe-policy nor
>> device-authorization -policy infrastructure can block, that sounds
>> like a bug report.
>
> The 5.15 tree has something like ~2.4k IO accesses (including MMIO and 
> others) in init functions that also register drivers (thanks Elena for 
> the number)

These numbers are completely useless simply because they are based on
nonsensical criteria. See:

  https://lore.kernel.org/r/87r1cj2uad.ffs@tglx

> My point is just that the ecosystem of devices that Linux supports is 
> messy enough that there are legitimate exceptions from the "First IO 
> only in probe call only" rule.

Your point is based on your outright refusal to actualy do a proper
analysis and your outright refusal to help fixing the real problems.

All you have provided so far is handwaving based on a completely useless
analysis.

Sure, your goal is to get this TDX problem solved, but it's not going to
be solved by:

  1) Providing a nonsensical analysis

  2) Using #1 as an argument to hack some half baken interfaces into the
 kernel which allow you to tick off your checkbox and then leave the
 resulting mess for others to clean up.
 
Try again when you have factual data to back up your claims and factual
arguments which prove that the problem can't be fixed otherwise.

I might be repeating myself, but kernel development works this way:

  1) Hack your private POC - Yay!

  2) Sit down and think hard about the problems you identified in step
 #1. Do a thorough analysis.
  
  3) Come up with a sensible integration plan.

  4) Do the necessary grump work of cleanups all over the place

  5) Add sensible infrastructure which is understandable for the bulk
 of kernel/driver developers

  6) Let your feature fall in place

and not in the way you are insisting on:

  1) Hack your private POC - Yay!

  2) Define that this is the only way to do it and try to shove it down
 the throat of everyone.

  3) Getting told that this is not the way it works

  4) Insist on it forever and blame the grumpy maintainers who are just
 not understanding the great value of your approach.

  5) Go back to #2

You should know that already, but I have no problem to give that lecture
to you over and over again. I probably should create a form letter.

And no, you can bitch about me as much as you want. These are not my
personal rules and personal pet pieves. These are rules Linus cares
about very much and aside of that they just reflect common sense.

  The kernel is a common good and not the dump ground for your personal
  brain waste.

  The kernel does not serve Intel. Quite the contrary Intel depends on
  the kernel to work nicely with it's hardware. Ergo, Intel should have
  a vested interest to serve the kernel and take responsibility for it
  as a whole. And so should you as an Intel employee.

Just dumping your next half baken workaround does not cut it especially
not when it is not backed up by sensible arguments.

Please try again, but not before you have something substantial to back
up your claims.

Thanks,

Thomas
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v5 12/16] PCI: Add pci_iomap_host_shared(), pci_iomap_host_shared_range()

2021-10-17 Thread Michael S. Tsirkin
On Thu, Oct 14, 2021 at 12:33:49PM +, Reshetova, Elena wrote:
> > On Thu, Oct 14, 2021 at 07:27:42AM +, Reshetova, Elena wrote:
> > > > On Thu, Oct 14, 2021 at 06:32:32AM +, Reshetova, Elena wrote:
> > > > > > On Tue, Oct 12, 2021 at 06:36:16PM +, Reshetova, Elena wrote:
> > > > > > > > The 5.15 tree has something like ~2.4k IO accesses (including 
> > > > > > > > MMIO and
> > > > > > > > others) in init functions that also register drivers (thanks 
> > > > > > > > Elena for
> > > > > > > > the number)
> > > > > > >
> > > > > > > To provide more numbers on this. What I can see so far from a 
> > > > > > > smatch-
> > based
> > > > > > > analysis, we have 409 __init style functions (.probe & 
> > > > > > > builtin/module_
> > > > > > > _platform_driver_probe excluded) for 5.15 with allyesconfig.
> > > > > >
> > > > > > I don't think we care about allyesconfig at all though.
> > > > > > Just don't do that.
> > > > > > How about allmodconfig? This is closer to what distros actually do.
> > > > >
> > > > > It does not make any difference really for the content of the 
> > > > > /drivers/*:
> > > > > gives 408 __init style functions doing IO (.probe & builtin/module_
> > > > > > > _platform_driver_probe excluded) for 5.15 with allmodconfig:
> > > > >
> > > > > ['doc200x_ident_chip',
> > > > > 'doc_probe', 'doc2001_init', 'mtd_speedtest_init',
> > > > > 'mtd_nandbiterrs_init', 'mtd_oobtest_init', 'mtd_pagetest_init',
> > > > > 'tort_init', 'mtd_subpagetest_init', 'fixup_pmc551',
> > > > > 'doc_set_driver_info', 'init_amd76xrom', 'init_l440gx',
> > > > > 'init_sc520cdp', 'init_ichxrom', 'init_ck804xrom', 'init_esb2rom',
> > > > > 'probe_acpi_namespace_devices', 'amd_iommu_init_pci', 'state_next',
> > > > > 'arm_v7s_do_selftests', 'arm_lpae_run_tests', 'init_iommu_one',
> > > >
> > > > Um. ARM? Which architecture is this build for?
> > >
> > > The list of smatch IO findings is built for x86, but the smatch cross 
> > > function
> > > database covers all archs, so when queried for all potential function 
> > > callers,
> > > it would show non x86 arch call chains also.
> > >
> > > Here is the original x86 finding and call chain for the 
> > > 'arm_v7s_do_selftests':
> > >
> > >   Detected low-level IO from arm_v7s_do_selftests in fun
> > > __iommu_queue_command_sync
> > >
> > > drivers/iommu/amd/iommu.c:1025 __iommu_queue_command_sync() error:
> > > {15002074744551330002}
> > > 'check_host_input' read from the host using function 'readl' to a
> > > member of the structure 'iommu->cmd_buf_head';
> > >
> > > __iommu_queue_command_sync()
> > >   iommu_completion_wait()
> > > amd_iommu_domain_flush_complete()
> > >   iommu_v1_map_page()
> > > arm_v7s_do_selftests()
> > >
> > > So, the results can be further filtered if you want a specified arch.
> > 
> > So what is it just for x86? Could you tell?
> 
> I can probably figure out how to do additional filtering here, but does
> it really matter for the case that is being discussed here? Andi's point was
> that there quite many existing places in the kernel when low-level IO
> happens before the probe stage. So I brought these numbers here.
> What do you plan to do with the pure x86 results? 

If the list is short - just suggest securing that ;)


> And here are the full results for allyesconfig, if anyone is interested (just 
> got permission to create
> the repository today):
> https://github.com/intel/ccc-linux-guest-hardening/tree/master/audit/sample_output/5.15-rc1
> We will be pushing to this repo all the scripts and fuzzing setups we use as 
> part of
> our Linux guest hardening effort for confidential cloud computing, but it is 
> going to take
> some time to get all the approvals collected.  
> 
> Best Regards,
> Elena.

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


RE: [PATCH v5 12/16] PCI: Add pci_iomap_host_shared(), pci_iomap_host_shared_range()

2021-10-17 Thread Thomas Gleixner
Elena,

On Thu, Oct 14 2021 at 06:32, Elena Reshetova wrote:
>> On Tue, Oct 12, 2021 at 06:36:16PM +, Reshetova, Elena wrote:
> It does not make any difference really for the content of the /drivers/*:
> gives 408 __init style functions doing IO (.probe & builtin/module_
>> > _platform_driver_probe excluded) for 5.15 with allmodconfig:
>
> ['doc200x_ident_chip',
> 'doc_probe', 'doc2001_init', 'mtd_speedtest_init',
> 'mtd_nandbiterrs_init', 'mtd_oobtest_init', 'mtd_pagetest_init',
> 'tort_init', 'mtd_subpagetest_init', 'fixup_pmc551',
> 'doc_set_driver_info', 'init_amd76xrom', 'init_l440gx',
> 'init_sc520cdp', 'init_ichxrom', 'init_ck804xrom', 'init_esb2rom',
> 'ubi_gluebi_init', 'ubiblock_init'
> 'ubi_init', 'mtd_stresstest_init',

All of this is MTD and can just be disabled wholesale.

Aside of that, most of these depend on either platform devices or device
tree enumerations which are not ever available on X86.

> 'probe_acpi_namespace_devices',

> 'amd_iommu_init_pci', 'state_next',
> 'init_dmars', 'iommu_init_pci', 'early_amd_iommu_init',
> 'late_iommu_features_init', 'detect_ivrs',
> 'intel_prepare_irq_remapping', 'intel_enable_irq_remapping',
> 'intel_cleanup_irq_remapping', 'detect_intel_iommu',
> 'parse_ioapics_under_ir', 'si_domain_init',
> 'intel_iommu_init', 'dmar_table_init',
> 'enable_drhd_fault_handling',
> 'check_tylersburg_isoch', 

None of this is reachable because the initial detection which is ACPI
table based will fail for TDX. If not, it's a guest firmware problem.

> 'fb_console_init', 'xenbus_probe_backend_init',
> 'xenbus_probe_frontend_init', 'setup_vcpu_hotplug_event',
> 'balloon_init',

XEN, that's relevant because magically the TDX guest will assume that it
is a XEN instance?

> 'ostm_init_clksrc', 'ftm_clockevent_init', 'ftm_clocksource_init',
> 'kona_timer_init', 'mtk_gpt_init', 'samsung_clockevent_init',
> 'samsung_clocksource_init', 'sysctr_timer_init', 'mxs_timer_init',
> 'sun4i_timer_init', 'at91sam926x_pit_dt_init', 'owl_timer_init',
> 'sun5i_setup_clockevent',
> 'mt7621_clk_init',
> 'samsung_clk_register_mux', 'samsung_clk_register_gate',
> 'samsung_clk_register_fixed_rate', 'clk_boston_setup',
> 'gemini_cc_init', 'aspeed_ast2400_cc', 'aspeed_ast2500_cc',
> 'sun6i_rtc_clk_init', 'phy_init', 'ingenic_ost_register_clock',
> 'meson6_timer_init', 'atcpit100_timer_init',
> 'npcm7xx_clocksource_init', 'clksrc_dbx500_prcmu_init',
> 'rcar_sysc_pd_setup', 'r8a779a0_sysc_pd_setup', 'renesas_soc_init',
> 'rcar_rst_init', 'rmobile_setup_pm_domain', 'mcp_write_pairing_set',
> 'a72_b53_rac_enable_all', 'mcp_a72_b53_set',
> 'brcmstb_soc_device_early_init', 'imx8mq_soc_revision',
> 'imx8mm_soc_uid', 'imx8mm_soc_revision', 'qe_init',
> 'exynos5x_clk_init', 'exynos5250_clk_init', 'exynos4_get_xom',
> 'create_one_cmux', 'create_one_pll', 'p2041_init_periph',
> 'p4080_init_periph', 'p5020_init_periph', 'p5040_init_periph',
> 'r9a06g032_clocks_probe', 'r8a73a4_cpg_clocks_init',
> 'sh73a0_cpg_clocks_init', 'cpg_div6_register',
> 'r8a7740_cpg_clocks_init', 'cpg_mssr_register_mod_clk',
> 'cpg_mssr_register_core_clk', 'rcar_gen3_cpg_clk_register',
> 'cpg_sd_clk_register', 'r7s9210_update_clk_table',
> 'rz_cpg_read_mode_pins', 'rz_cpg_clocks_init',
> 'rcar_r8a779a0_cpg_clk_register', 'rcar_gen2_cpg_clk_register',
> 'sun8i_a33_ccu_setup', 'sun8i_a23_ccu_setup', 'sun5i_ccu_init',
> 'suniv_f1c100s_ccu_setup', 'sun6i_a31_ccu_setup',
> 'sun8i_v3_v3s_ccu_init', 'sun50i_h616_ccu_setup',
> 'sunxi_h3_h5_ccu_init', 'sun4i_ccu_init', 'kona_ccu_init',
> 'ns2_genpll_scr_clk_init', 'ns2_genpll_sw_clk_init',
> 'ns2_lcpll_ddr_clk_init', 'ns2_lcpll_ports_clk_init',
> 'nsp_genpll_clk_init', 'nsp_lcpll0_clk_init',
> 'cygnus_genpll_clk_init', 'cygnus_lcpll0_clk_init',
> 'cygnus_mipipll_clk_init', 'cygnus_audiopll_clk_init',
> 'of_fixed_mmio_clk_setup',
> 'arm_v7s_do_selftests', 'arm_lpae_run_tests', 'init_iommu_one',

ARM based drivers are initialized on x86 in which way?

> 'hv_init_tsc_clocksource', 'hv_init_clocksource',

HyperV. See XEN

> 'skx_init',
> 'i10nm_init', 'sbridge_init', 'i82975x_init', 'i3000_init',
> 'x38_init', 'ie31200_init', 'i3200_init', 'amd64_edac_init',
> 'pnd2_init', 'edac_init', 'adummy_init',

EDAC has already hypervisor checks

> 'init_acpi_pm_clocksource',

Requires ACPI table entry or command line override

> 'intel_rng_mod_init',

Has an old style PCI table which is searched via pci_get_device(). Could
do with a cleanup which converts it to proper PCI probing.



So I stop here, because it would be way simpler to have the file names
but so far I could identify all of it from the top of my head.

So what are you trying to tell me? That you found tons of ioremaps in
__init functions which are completely irrelevant.

Please stop making arguments based on completely nonsensical data. It
took me less than 5 minutes to eliminate more than 50% of that list and
I'm pretty sure that I could have eliminated the bulk of the rest as
well.

The fact that a large part of this is ARM only, the 

[GIT PULL] virtio,vdpa: fixes

2021-10-17 Thread Michael S. Tsirkin
The following changes since commit be9c6bad9b46451ba5bb8d366c51e2475f374981:

  vdpa: potential uninitialized return in vhost_vdpa_va_map() (2021-09-14 
18:10:43 -0400)

are available in the Git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus

for you to fetch changes up to bcef9356fc2e1302daf373c83c826aa27954d128:

  vhost-vdpa: Fix the wrong input in config_cb (2021-10-13 08:42:07 -0400)


virtio,vdpa: fixes

Fixes up some issues in rc5.

Signed-off-by: Michael S. Tsirkin 


Cindy Lu (1):
  vhost-vdpa: Fix the wrong input in config_cb

Halil Pasic (1):
  virtio: write back F_VERSION_1 before validate

Michael S. Tsirkin (1):
  Revert "virtio-blk: Add validation for block size in config space"

Randy Dunlap (1):
  VDUSE: fix documentation underline warning

Wu Zongyong (1):
  vhost_vdpa: unset vq irq before freeing irq

 Documentation/userspace-api/vduse.rst |  2 +-
 drivers/block/virtio_blk.c| 37 ++-
 drivers/vhost/vdpa.c  | 10 +-
 drivers/virtio/virtio.c   | 11 +++
 4 files changed, 23 insertions(+), 37 deletions(-)

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization