[PATCH 11/11] dax: move bdev_dax_pgoff to fs/dax.c
No functional changet, but this will allow for a tighter integration with the iomap code, including possible passing the partition offset in the iomap in the future. For now it mostly avoids growing more callers outside of fs/dax.c. Signed-off-by: Christoph Hellwig --- drivers/dax/super.c | 14 -- fs/dax.c| 13 + include/linux/dax.h | 1 - 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 803942586d1b6..c0910687fbcb2 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -67,20 +67,6 @@ void dax_remove_host(struct gendisk *disk) } EXPORT_SYMBOL_GPL(dax_remove_host); -int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, - pgoff_t *pgoff) -{ - sector_t start_sect = bdev ? get_start_sect(bdev) : 0; - phys_addr_t phys_off = (start_sect + sector) * 512; - - if (pgoff) - *pgoff = PHYS_PFN(phys_off); - if (phys_off % PAGE_SIZE || size % PAGE_SIZE) - return -EINVAL; - return 0; -} -EXPORT_SYMBOL(bdev_dax_pgoff); - /** * dax_get_by_host() - temporary lookup mechanism for filesystem-dax * @bdev: block device to find a dax_device for diff --git a/fs/dax.c b/fs/dax.c index 4e3e5a283a916..eb715363fd667 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -709,6 +709,19 @@ int dax_invalidate_mapping_entry_sync(struct address_space *mapping, return __dax_invalidate_entry(mapping, index, false); } +static int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, + pgoff_t *pgoff) +{ + sector_t start_sect = bdev ? get_start_sect(bdev) : 0; + phys_addr_t phys_off = (start_sect + sector) * 512; + + if (pgoff) + *pgoff = PHYS_PFN(phys_off); + if (phys_off % PAGE_SIZE || size % PAGE_SIZE) + return -EINVAL; + return 0; +} + static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_dev, sector_t sector, struct page *to, unsigned long vaddr) { diff --git a/include/linux/dax.h b/include/linux/dax.h index 439c3c70e347b..324363b798ecd 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -107,7 +107,6 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma, #endif struct writeback_control; -int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if IS_ENABLED(CONFIG_FS_DAX) int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); void dax_remove_host(struct gendisk *disk); -- 2.30.2 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 10/11] dm-stripe: add a stripe_dax_pgoff helper
Add a helper to perform the entire remapping for DAX accesses. This helper open codes bdev_dax_pgoff given that the alignment checks have already been done by the submitting file system and don't need to be repeated. Signed-off-by: Christoph Hellwig --- drivers/md/dm-stripe.c | 63 ++ 1 file changed, 15 insertions(+), 48 deletions(-) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index f084607220293..50dba3f39274c 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -301,83 +301,50 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) } #if IS_ENABLED(CONFIG_FS_DAX) -static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) +static struct dax_device *stripe_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff) { - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; struct stripe_c *sc = ti->private; - struct dax_device *dax_dev; struct block_device *bdev; + sector_t dev_sector; uint32_t stripe; - long ret; - stripe_map_sector(sc, sector, , _sector); + stripe_map_sector(sc, *pgoff * PAGE_SECTORS, , _sector); dev_sector += sc->stripe[stripe].physical_start; - dax_dev = sc->stripe[stripe].dev->dax_dev; bdev = sc->stripe[stripe].dev->bdev; - ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, ); - if (ret) - return ret; + *pgoff = (get_start_sect(bdev) + dev_sector) >> PAGE_SECTORS_SHIFT; + return sc->stripe[stripe].dev->dax_dev; +} + +static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, + long nr_pages, void **kaddr, pfn_t *pfn) +{ + struct dax_device *dax_dev = stripe_dax_pgoff(ti, ); + return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); } static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; - struct stripe_c *sc = ti->private; - struct dax_device *dax_dev; - struct block_device *bdev; - uint32_t stripe; - - stripe_map_sector(sc, sector, , _sector); - dev_sector += sc->stripe[stripe].physical_start; - dax_dev = sc->stripe[stripe].dev->dax_dev; - bdev = sc->stripe[stripe].dev->bdev; + struct dax_device *dax_dev = stripe_dax_pgoff(ti, ); - if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), )) - return 0; return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; - struct stripe_c *sc = ti->private; - struct dax_device *dax_dev; - struct block_device *bdev; - uint32_t stripe; - - stripe_map_sector(sc, sector, , _sector); - dev_sector += sc->stripe[stripe].physical_start; - dax_dev = sc->stripe[stripe].dev->dax_dev; - bdev = sc->stripe[stripe].dev->bdev; + struct dax_device *dax_dev = stripe_dax_pgoff(ti, ); - if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), )) - return 0; return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); } static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, size_t nr_pages) { - int ret; - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; - struct stripe_c *sc = ti->private; - struct dax_device *dax_dev; - struct block_device *bdev; - uint32_t stripe; + struct dax_device *dax_dev = stripe_dax_pgoff(ti, ); - stripe_map_sector(sc, sector, , _sector); - dev_sector += sc->stripe[stripe].physical_start; - dax_dev = sc->stripe[stripe].dev->dax_dev; - bdev = sc->stripe[stripe].dev->bdev; - - ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, ); - if (ret) - return ret; return dax_zero_page_range(dax_dev, pgoff, nr_pages); } -- 2.30.2 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 09/11] dm-log-writes: add a log_writes_dax_pgoff helper
Add a helper to perform the entire remapping for DAX accesses. This helper open codes bdev_dax_pgoff given that the alignment checks have already been done by the submitting file system and don't need to be repeated. Signed-off-by: Christoph Hellwig --- drivers/md/dm-log-writes.c | 42 +++--- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 6d694526881d0..5aac60c1b774c 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -949,17 +949,21 @@ static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes, return 0; } +static struct dax_device *log_writes_dax_pgoff(struct dm_target *ti, + pgoff_t *pgoff) +{ + struct log_writes_c *lc = ti->private; + + *pgoff += (get_start_sect(lc->dev->bdev) >> PAGE_SECTORS_SHIFT); + return lc->dev->dax_dev; +} + static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) { - struct log_writes_c *lc = ti->private; - sector_t sector = pgoff * PAGE_SECTORS; - int ret; + struct dax_device *dax_dev = log_writes_dax_pgoff(ti, ); - ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages * PAGE_SIZE, ); - if (ret) - return ret; - return dax_direct_access(lc->dev->dax_dev, pgoff, nr_pages, kaddr, pfn); + return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); } static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, @@ -968,11 +972,9 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, { struct log_writes_c *lc = ti->private; sector_t sector = pgoff * PAGE_SECTORS; + struct dax_device *dax_dev = log_writes_dax_pgoff(ti, ); int err; - if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), )) - return 0; - /* Don't bother doing anything if logging has been disabled */ if (!lc->logging_enabled) goto dax_copy; @@ -983,34 +985,24 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, return 0; } dax_copy: - return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); + return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } static size_t log_writes_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { - struct log_writes_c *lc = ti->private; - sector_t sector = pgoff * PAGE_SECTORS; + struct dax_device *dax_dev = log_writes_dax_pgoff(ti, ); - if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), )) - return 0; - return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); + return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); } static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, size_t nr_pages) { - int ret; - struct log_writes_c *lc = ti->private; - sector_t sector = pgoff * PAGE_SECTORS; + struct dax_device *dax_dev = log_writes_dax_pgoff(ti, ); - ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages << PAGE_SHIFT, -); - if (ret) - return ret; - return dax_zero_page_range(lc->dev->dax_dev, pgoff, - nr_pages << PAGE_SHIFT); + return dax_zero_page_range(dax_dev, pgoff, nr_pages << PAGE_SHIFT); } #else -- 2.30.2 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 06/11] xfs: factor out a xfs_setup_dax helper
Factor out another DAX setup helper to simplify future changes. Also move the experimental warning after the checks to not clutter the log too much if the setup failed. Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_super.c | 47 +++--- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c4e0cd1c1c8ca..d07020a8eb9e3 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -339,6 +339,32 @@ xfs_buftarg_is_dax( bdev_nr_sectors(bt->bt_bdev)); } +static int +xfs_setup_dax( + struct xfs_mount*mp) +{ + struct super_block *sb = mp->m_super; + + if (!xfs_buftarg_is_dax(sb, mp->m_ddev_targp) && + (!mp->m_rtdev_targp || !xfs_buftarg_is_dax(sb, mp->m_rtdev_targp))) { + xfs_alert(mp, + "DAX unsupported by block device. Turning off DAX."); + goto disable_dax; + } + + if (xfs_has_reflink(mp)) { + xfs_alert(mp, "DAX and reflink cannot be used together!"); + return -EINVAL; + } + + xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); + return 0; + +disable_dax: + xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); + return 0; +} + STATIC int xfs_blkdev_get( xfs_mount_t *mp, @@ -1592,26 +1618,9 @@ xfs_fs_fill_super( sb->s_flags |= SB_I_VERSION; if (xfs_has_dax_always(mp)) { - bool rtdev_is_dax = false, datadev_is_dax; - - xfs_warn(mp, - "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); - - datadev_is_dax = xfs_buftarg_is_dax(sb, mp->m_ddev_targp); - if (mp->m_rtdev_targp) - rtdev_is_dax = xfs_buftarg_is_dax(sb, - mp->m_rtdev_targp); - if (!rtdev_is_dax && !datadev_is_dax) { - xfs_alert(mp, - "DAX unsupported by block device. Turning off DAX."); - xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); - } - if (xfs_has_reflink(mp)) { - xfs_alert(mp, - "DAX and reflink cannot be used together!"); - error = -EINVAL; + error = xfs_setup_dax(mp); + if (error) goto out_filestream_unmount; - } } if (xfs_has_discard(mp)) { -- 2.30.2 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 07/11] dax: remove dax_capable
Just open code the block size and dax_dev == NULL checks in the callers. Signed-off-by: Christoph Hellwig --- drivers/dax/super.c | 36 drivers/md/dm-table.c| 22 +++--- drivers/md/dm.c | 21 - drivers/md/dm.h | 4 drivers/nvdimm/pmem.c| 1 - drivers/s390/block/dcssblk.c | 1 - fs/erofs/super.c | 11 +++ fs/ext2/super.c | 6 -- fs/ext4/super.c | 9 ++--- fs/xfs/xfs_super.c | 21 - include/linux/dax.h | 14 -- 11 files changed, 36 insertions(+), 110 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 482fe775324a4..803942586d1b6 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -108,42 +108,6 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) return dax_dev; } EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev); - -bool generic_fsdax_supported(struct dax_device *dax_dev, - struct block_device *bdev, int blocksize, sector_t start, - sector_t sectors) -{ - if (blocksize != PAGE_SIZE) { - pr_info("%pg: error: unsupported blocksize for dax\n", bdev); - return false; - } - - if (!dax_dev) { - pr_debug("%pg: error: dax unsupported by block device\n", bdev); - return false; - } - - return true; -} -EXPORT_SYMBOL_GPL(generic_fsdax_supported); - -bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, - int blocksize, sector_t start, sector_t len) -{ - bool ret = false; - int id; - - if (!dax_dev) - return false; - - id = dax_read_lock(); - if (dax_alive(dax_dev) && dax_dev->ops->dax_supported) - ret = dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, - start, len); - dax_read_unlock(id); - return ret; -} -EXPORT_SYMBOL_GPL(dax_supported); #endif /* CONFIG_BLOCK && CONFIG_FS_DAX */ enum dax_device_flags { diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 1fa4d5582dca5..4ae671c2168ea 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -807,12 +807,14 @@ void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type) EXPORT_SYMBOL_GPL(dm_table_set_type); /* validate the dax capability of the target device span */ -int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev, +static int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - int blocksize = *(int *) data; + if (dev->dax_dev) + return false; - return !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len); + pr_debug("%pg: error: dax unsupported by block device\n", dev->bdev); + return true; } /* Check devices support synchronous DAX */ @@ -822,8 +824,8 @@ static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_de return !dev->dax_dev || !dax_synchronous(dev->dax_dev); } -bool dm_table_supports_dax(struct dm_table *t, - iterate_devices_callout_fn iterate_fn, int *blocksize) +static bool dm_table_supports_dax(struct dm_table *t, + iterate_devices_callout_fn iterate_fn) { struct dm_target *ti; unsigned i; @@ -836,7 +838,7 @@ bool dm_table_supports_dax(struct dm_table *t, return false; if (!ti->type->iterate_devices || - ti->type->iterate_devices(ti, iterate_fn, blocksize)) + ti->type->iterate_devices(ti, iterate_fn, NULL)) return false; } @@ -863,7 +865,6 @@ static int dm_table_determine_type(struct dm_table *t) struct dm_target *tgt; struct list_head *devices = dm_table_get_devices(t); enum dm_queue_mode live_md_type = dm_get_md_type(t->md); - int page_size = PAGE_SIZE; if (t->type != DM_TYPE_NONE) { /* target already set the table's type */ @@ -907,7 +908,7 @@ static int dm_table_determine_type(struct dm_table *t) verify_bio_based: /* We must use this table as bio-based */ t->type = DM_TYPE_BIO_BASED; - if (dm_table_supports_dax(t, device_not_dax_capable, _size) || + if (dm_table_supports_dax(t, device_not_dax_capable) || (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) { t->type = DM_TYPE_DAX_BIO_BASED; } @@ -1981,7 +1982,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { bool wc = false, fua = false; - int
[PATCH 08/11] dm-linear: add a linear_dax_pgoff helper
Add a helper to perform the entire remapping for DAX accesses. This helper open codes bdev_dax_pgoff given that the alignment checks have already been done by the submitting file system and don't need to be repeated. Signed-off-by: Christoph Hellwig --- drivers/md/dm-linear.c | 49 +- 1 file changed, 15 insertions(+), 34 deletions(-) diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 32fbab11bf90c..bf03f73fd0f36 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -164,63 +164,44 @@ static int linear_iterate_devices(struct dm_target *ti, } #if IS_ENABLED(CONFIG_FS_DAX) +static struct dax_device *linear_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff) +{ + struct linear_c *lc = ti->private; + sector_t sector = linear_map_sector(ti, *pgoff << PAGE_SECTORS_SHIFT); + + *pgoff = (get_start_sect(lc->dev->bdev) + sector) >> PAGE_SECTORS_SHIFT; + return lc->dev->dax_dev; +} + static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) { - long ret; - struct linear_c *lc = ti->private; - struct block_device *bdev = lc->dev->bdev; - struct dax_device *dax_dev = lc->dev->dax_dev; - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; - - dev_sector = linear_map_sector(ti, sector); - ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, ); - if (ret) - return ret; + struct dax_device *dax_dev = linear_dax_pgoff(ti, ); + return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); } static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { - struct linear_c *lc = ti->private; - struct block_device *bdev = lc->dev->bdev; - struct dax_device *dax_dev = lc->dev->dax_dev; - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + struct dax_device *dax_dev = linear_dax_pgoff(ti, ); - dev_sector = linear_map_sector(ti, sector); - if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), )) - return 0; return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { - struct linear_c *lc = ti->private; - struct block_device *bdev = lc->dev->bdev; - struct dax_device *dax_dev = lc->dev->dax_dev; - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + struct dax_device *dax_dev = linear_dax_pgoff(ti, ); - dev_sector = linear_map_sector(ti, sector); - if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), )) - return 0; return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); } static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, size_t nr_pages) { - int ret; - struct linear_c *lc = ti->private; - struct block_device *bdev = lc->dev->bdev; - struct dax_device *dax_dev = lc->dev->dax_dev; - sector_t dev_sector, sector = pgoff * PAGE_SECTORS; - - dev_sector = linear_map_sector(ti, sector); - ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, ); - if (ret) - return ret; + struct dax_device *dax_dev = linear_dax_pgoff(ti, ); + return dax_zero_page_range(dax_dev, pgoff, nr_pages); } -- 2.30.2 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 05/11] dax: move the partition alignment check into fs_dax_get_by_bdev
fs_dax_get_by_bdev is the primary interface to find a dax device for a block device, so move the partition alignment check there instead of wiring it up through ->dax_supported. Signed-off-by: Christoph Hellwig --- drivers/dax/super.c | 23 ++- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 04fc680542e8d..482fe775324a4 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -93,6 +93,12 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) if (!blk_queue_dax(bdev->bd_disk->queue)) return NULL; + if ((get_start_sect(bdev) * SECTOR_SIZE) % PAGE_SIZE || + (bdev_nr_sectors(bdev) * SECTOR_SIZE) % PAGE_SIZE) { + pr_info("%pg: error: unaligned partition for dax\n", bdev); + return NULL; + } + id = dax_read_lock(); dax_dev = xa_load(_hosts, (unsigned long)bdev->bd_disk); if (!dax_dev || !dax_alive(dax_dev) || !igrab(_dev->inode)) @@ -107,10 +113,6 @@ bool generic_fsdax_supported(struct dax_device *dax_dev, struct block_device *bdev, int blocksize, sector_t start, sector_t sectors) { - pgoff_t pgoff, pgoff_end; - sector_t last_page; - int err; - if (blocksize != PAGE_SIZE) { pr_info("%pg: error: unsupported blocksize for dax\n", bdev); return false; @@ -121,19 +123,6 @@ bool generic_fsdax_supported(struct dax_device *dax_dev, return false; } - err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, ); - if (err) { - pr_info("%pg: error: unaligned partition for dax\n", bdev); - return false; - } - - last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512; - err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, _end); - if (err) { - pr_info("%pg: error: unaligned partition for dax\n", bdev); - return false; - } - return true; } EXPORT_SYMBOL_GPL(generic_fsdax_supported); -- 2.30.2 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 01/11] dm: make the DAX support dependend on CONFIG_FS_DAX
The device mapper DAX support is all hanging off a block device and thus can't be used with device dax. Make it depend on CONFIG_FS_DAX instead of CONFIG_DAX_DRIVER. This also means that bdev_dax_pgoff only needs to be built under CONFIG_FS_DAX now. Signed-off-by: Christoph Hellwig --- drivers/dax/super.c| 6 ++ drivers/md/dm-linear.c | 2 +- drivers/md/dm-log-writes.c | 2 +- drivers/md/dm-stripe.c | 2 +- drivers/md/dm-writecache.c | 2 +- drivers/md/dm.c| 2 +- 6 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index b882cf8106ea3..e20d0cef10a18 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -63,7 +63,7 @@ static int dax_host_hash(const char *host) return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE; } -#ifdef CONFIG_BLOCK +#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) #include int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, @@ -80,7 +80,6 @@ int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, } EXPORT_SYMBOL(bdev_dax_pgoff); -#if IS_ENABLED(CONFIG_FS_DAX) /** * dax_get_by_host() - temporary lookup mechanism for filesystem-dax * @host: alternate name for the device registered by a dax driver @@ -219,8 +218,7 @@ bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev, return ret; } EXPORT_SYMBOL_GPL(dax_supported); -#endif /* CONFIG_FS_DAX */ -#endif /* CONFIG_BLOCK */ +#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */ enum dax_device_flags { /* !alive + rcu grace period == no new operations / mappings */ diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 679b4c0a2eea1..32fbab11bf90c 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -163,7 +163,7 @@ static int linear_iterate_devices(struct dm_target *ti, return fn(ti, lc->dev, lc->start, ti->len, data); } -#if IS_ENABLED(CONFIG_DAX_DRIVER) +#if IS_ENABLED(CONFIG_FS_DAX) static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) { diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index d93a4db235124..6d694526881d0 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -903,7 +903,7 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit limits->io_min = limits->physical_block_size; } -#if IS_ENABLED(CONFIG_DAX_DRIVER) +#if IS_ENABLED(CONFIG_FS_DAX) static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes, struct iov_iter *i) { diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 6660b6b53d5bf..f084607220293 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -300,7 +300,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } -#if IS_ENABLED(CONFIG_DAX_DRIVER) +#if IS_ENABLED(CONFIG_FS_DAX) static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) { diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 18320444fb0a9..4c3a6e33604d3 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -38,7 +38,7 @@ #define BITMAP_GRANULARITY PAGE_SIZE #endif -#if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_DAX_DRIVER) +#if IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && IS_ENABLED(CONFIG_FS_DAX) #define DM_WRITECACHE_HAS_PMEM #endif diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7870e6460633f..79737aee516b1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1783,7 +1783,7 @@ static struct mapped_device *alloc_dev(int minor) md->disk->private_data = md; sprintf(md->disk->disk_name, "dm-%d", minor); - if (IS_ENABLED(CONFIG_DAX_DRIVER)) { + if (IS_ENABLED(CONFIG_FS_DAX)) { md->dax_dev = alloc_dax(md, md->disk->disk_name, _dax_ops, 0); if (IS_ERR(md->dax_dev)) -- 2.30.2 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 04/11] dax: remove the pgmap sanity checks in generic_fsdax_supported
Drivers that register a dax_dev should make sure it works, no need to double check from the file system. Signed-off-by: Christoph Hellwig --- drivers/dax/super.c | 49 + 1 file changed, 1 insertion(+), 48 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 9383c11b21853..04fc680542e8d 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -107,13 +107,9 @@ bool generic_fsdax_supported(struct dax_device *dax_dev, struct block_device *bdev, int blocksize, sector_t start, sector_t sectors) { - bool dax_enabled = false; pgoff_t pgoff, pgoff_end; - void *kaddr, *end_kaddr; - pfn_t pfn, end_pfn; sector_t last_page; - long len, len2; - int err, id; + int err; if (blocksize != PAGE_SIZE) { pr_info("%pg: error: unsupported blocksize for dax\n", bdev); @@ -138,49 +134,6 @@ bool generic_fsdax_supported(struct dax_device *dax_dev, return false; } - id = dax_read_lock(); - len = dax_direct_access(dax_dev, pgoff, 1, , ); - len2 = dax_direct_access(dax_dev, pgoff_end, 1, _kaddr, _pfn); - - if (len < 1 || len2 < 1) { - pr_info("%pg: error: dax access failed (%ld)\n", - bdev, len < 1 ? len : len2); - dax_read_unlock(id); - return false; - } - - if (IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) { - /* -* An arch that has enabled the pmem api should also -* have its drivers support pfn_t_devmap() -* -* This is a developer warning and should not trigger in -* production. dax_flush() will crash since it depends -* on being able to do (page_address(pfn_to_page())). -*/ - WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)); - dax_enabled = true; - } else if (pfn_t_devmap(pfn) && pfn_t_devmap(end_pfn)) { - struct dev_pagemap *pgmap, *end_pgmap; - - pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL); - end_pgmap = get_dev_pagemap(pfn_t_to_pfn(end_pfn), NULL); - if (pgmap && pgmap == end_pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX - && pfn_t_to_page(pfn)->pgmap == pgmap - && pfn_t_to_page(end_pfn)->pgmap == pgmap - && pfn_t_to_pfn(pfn) == PHYS_PFN(__pa(kaddr)) - && pfn_t_to_pfn(end_pfn) == PHYS_PFN(__pa(end_kaddr))) - dax_enabled = true; - put_dev_pagemap(pgmap); - put_dev_pagemap(end_pgmap); - - } - dax_read_unlock(id); - - if (!dax_enabled) { - pr_info("%pg: error: dax support not enabled\n", bdev); - return false; - } return true; } EXPORT_SYMBOL_GPL(generic_fsdax_supported); -- 2.30.2 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 02/11] dax: remove CONFIG_DAX_DRIVER
CONFIG_DAX_DRIVER only selects CONFIG_DAX now, so remove it. Signed-off-by: Christoph Hellwig --- drivers/dax/Kconfig| 4 drivers/nvdimm/Kconfig | 2 +- drivers/s390/block/Kconfig | 2 +- fs/fuse/Kconfig| 2 +- 4 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index d2834c2cfa10d..954ab14ba7778 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -1,8 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -config DAX_DRIVER - select DAX - bool - menuconfig DAX tristate "DAX: direct access to differentiated memory" select SRCU diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index b7d1eb38b27d4..347fe7afa5830 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -22,7 +22,7 @@ if LIBNVDIMM config BLK_DEV_PMEM tristate "PMEM: Persistent memory block device support" default LIBNVDIMM - select DAX_DRIVER + select DAX select ND_BTT if BTT select ND_PFN if NVDIMM_PFN help diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig index d0416dbd0cd81..e3710a762abae 100644 --- a/drivers/s390/block/Kconfig +++ b/drivers/s390/block/Kconfig @@ -5,7 +5,7 @@ comment "S/390 block device drivers" config DCSSBLK def_tristate m select FS_DAX_LIMITED - select DAX_DRIVER + select DAX prompt "DCSSBLK support" depends on S390 && BLOCK help diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig index 40ce9a1c12e5d..038ed0b9aaa5d 100644 --- a/fs/fuse/Kconfig +++ b/fs/fuse/Kconfig @@ -45,7 +45,7 @@ config FUSE_DAX select INTERVAL_TREE depends on VIRTIO_FS depends on FS_DAX - depends on DAX_DRIVER + depends on DAX help This allows bypassing guest page cache and allows mapping host page cache directly in guest address space. -- 2.30.2 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
futher decouple DAX from block devices
Hi Dan, this series cleans up and simplifies the association between DAX and block devices in preparation of allowing to mount file systems directly on DAX devices without a detour through block devices. Diffstat: drivers/dax/Kconfig |4 drivers/dax/bus.c|2 drivers/dax/super.c | 220 +-- drivers/md/dm-linear.c | 51 +++-- drivers/md/dm-log-writes.c | 44 +++- drivers/md/dm-stripe.c | 65 +++- drivers/md/dm-table.c| 22 ++-- drivers/md/dm-writecache.c |2 drivers/md/dm.c | 29 - drivers/md/dm.h |4 drivers/nvdimm/Kconfig |2 drivers/nvdimm/pmem.c|9 - drivers/s390/block/Kconfig |2 drivers/s390/block/dcssblk.c | 12 +- fs/dax.c | 13 ++ fs/erofs/super.c | 11 +- fs/ext2/super.c |6 - fs/ext4/super.c |9 + fs/fuse/Kconfig |2 fs/fuse/virtio_fs.c |2 fs/xfs/xfs_super.c | 54 +- include/linux/dax.h | 30 ++--- 22 files changed, 185 insertions(+), 410 deletions(-) ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH 03/11] dax: simplify the dax_device <-> gendisk association
Replace the dax_host_hash with an xarray indexed by the pointer value of the gendisk, and require explicitl calls from the block drivers that want to associate their gendisk with a dax_device. Signed-off-by: Christoph Hellwig --- drivers/dax/bus.c| 2 +- drivers/dax/super.c | 106 +-- drivers/md/dm.c | 6 +- drivers/nvdimm/pmem.c| 8 ++- drivers/s390/block/dcssblk.c | 11 +++- fs/fuse/virtio_fs.c | 2 +- include/linux/dax.h | 19 +-- 7 files changed, 60 insertions(+), 94 deletions(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 6cc4da4c713d9..6d91b0186e3be 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -1326,7 +1326,7 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) * No 'host' or dax_operations since there is no access to this * device outside of mmap of the resulting character device. */ - dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC); + dax_dev = alloc_dax(dev_dax, NULL, DAXDEV_F_SYNC); if (IS_ERR(dax_dev)) { rc = PTR_ERR(dax_dev); goto err_alloc_dax; diff --git a/drivers/dax/super.c b/drivers/dax/super.c index e20d0cef10a18..9383c11b21853 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -7,10 +7,8 @@ #include #include #include -#include #include #include -#include #include #include #include @@ -26,10 +24,8 @@ * @flags: state and boolean properties */ struct dax_device { - struct hlist_node list; struct inode inode; struct cdev cdev; - const char *host; void *private; unsigned long flags; const struct dax_operations *ops; @@ -42,10 +38,6 @@ static DEFINE_IDA(dax_minor_ida); static struct kmem_cache *dax_cache __read_mostly; static struct super_block *dax_superblock __read_mostly; -#define DAX_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head)) -static struct hlist_head dax_host_list[DAX_HASH_SIZE]; -static DEFINE_SPINLOCK(dax_host_lock); - int dax_read_lock(void) { return srcu_read_lock(_srcu); @@ -58,13 +50,22 @@ void dax_read_unlock(int id) } EXPORT_SYMBOL_GPL(dax_read_unlock); -static int dax_host_hash(const char *host) +#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) +#include + +static DEFINE_XARRAY(dax_hosts); + +int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk) { - return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE; + return xa_insert(_hosts, (unsigned long)disk, dax_dev, GFP_KERNEL); } +EXPORT_SYMBOL_GPL(dax_add_host); -#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) -#include +void dax_remove_host(struct gendisk *disk) +{ + xa_erase(_hosts, (unsigned long)disk); +} +EXPORT_SYMBOL_GPL(dax_remove_host); int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size, pgoff_t *pgoff) @@ -82,40 +83,23 @@ EXPORT_SYMBOL(bdev_dax_pgoff); /** * dax_get_by_host() - temporary lookup mechanism for filesystem-dax - * @host: alternate name for the device registered by a dax driver + * @bdev: block device to find a dax_device for */ -static struct dax_device *dax_get_by_host(const char *host) +struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) { - struct dax_device *dax_dev, *found = NULL; - int hash, id; + struct dax_device *dax_dev; + int id; - if (!host) + if (!blk_queue_dax(bdev->bd_disk->queue)) return NULL; - hash = dax_host_hash(host); - id = dax_read_lock(); - spin_lock(_host_lock); - hlist_for_each_entry(dax_dev, _host_list[hash], list) { - if (!dax_alive(dax_dev) - || strcmp(host, dax_dev->host) != 0) - continue; - - if (igrab(_dev->inode)) - found = dax_dev; - break; - } - spin_unlock(_host_lock); + dax_dev = xa_load(_hosts, (unsigned long)bdev->bd_disk); + if (!dax_dev || !dax_alive(dax_dev) || !igrab(_dev->inode)) + dax_dev = NULL; dax_read_unlock(id); - return found; -} - -struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) -{ - if (!blk_queue_dax(bdev->bd_disk->queue)) - return NULL; - return dax_get_by_host(bdev->bd_disk->disk_name); + return dax_dev; } EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev); @@ -361,12 +345,7 @@ void kill_dax(struct dax_device *dax_dev) return; clear_bit(DAXDEV_ALIVE, _dev->flags); - synchronize_srcu(_srcu); - - spin_lock(_host_lock); - hlist_del_init(_dev->list); - spin_unlock(_host_lock); } EXPORT_SYMBOL_GPL(kill_dax); @@ -398,8 +377,6 @@ static struct dax_device *to_dax_dev(struct inode *inode) static void dax_free_inode(struct inode *inode) { struct dax_device
Re: [PATCH v5 12/16] PCI: Add pci_iomap_host_shared(), pci_iomap_host_shared_range()
On Mon, Oct 18 2021 at 02:55, Thomas Gleixner wrote: > On Sun, Oct 10 2021 at 15:11, Andi Kleen wrote: >> The 5.15 tree has something like ~2.4k IO accesses (including MMIO and >> others) in init functions that also register drivers (thanks Elena for >> the number) > > These numbers are completely useless simply because they are based on > nonsensical criteria. See: > > https://lore.kernel.org/r/87r1cj2uad.ffs@tglx > >> My point is just that the ecosystem of devices that Linux supports is >> messy enough that there are legitimate exceptions from the "First IO >> only in probe call only" rule. > > Your point is based on your outright refusal to actualy do a proper > analysis and your outright refusal to help fixing the real problems. > > All you have provided so far is handwaving based on a completely useless > analysis. > > Sure, your goal is to get this TDX problem solved, but it's not going to > be solved by: > > 1) Providing a nonsensical analysis > > 2) Using #1 as an argument to hack some half baken interfaces into the > kernel which allow you to tick off your checkbox and then leave the > resulting mess for others to clean up. > > Try again when you have factual data to back up your claims and factual > arguments which prove that the problem can't be fixed otherwise. > > I might be repeating myself, but kernel development works this way: > > 1) Hack your private POC - Yay! > > 2) Sit down and think hard about the problems you identified in step > #1. Do a thorough analysis. > > 3) Come up with a sensible integration plan. > > 4) Do the necessary grump work of cleanups all over the place > > 5) Add sensible infrastructure which is understandable for the bulk > of kernel/driver developers > > 6) Let your feature fall in place > > and not in the way you are insisting on: > > 1) Hack your private POC - Yay! > > 2) Define that this is the only way to do it and try to shove it down > the throat of everyone. > > 3) Getting told that this is not the way it works > > 4) Insist on it forever and blame the grumpy maintainers who are just > not understanding the great value of your approach. > > 5) Go back to #2 > > You should know that already, but I have no problem to give that lecture > to you over and over again. I probably should create a form letter. > > And no, you can bitch about me as much as you want. These are not my > personal rules and personal pet pieves. These are rules Linus cares > about very much and aside of that they just reflect common sense. > > The kernel is a common good and not the dump ground for your personal > brain waste. > > The kernel does not serve Intel. Quite the contrary Intel depends on > the kernel to work nicely with it's hardware. Ergo, Intel should have > a vested interest to serve the kernel and take responsibility for it > as a whole. And so should you as an Intel employee. > > Just dumping your next half baken workaround does not cut it especially > not when it is not backed up by sensible arguments. > > Please try again, but not before you have something substantial to back > up your claims. That said, I can't resist the urge to say a few words to the responsible senior and management people at Intel in this context: I surely know that a lot of Intel people claim that their lack of progress is _only_ because Thomas is hard to work with and Thomas wants unreasonable changes to their code, which I could perceive as an abuse of myself for the purpose of self-deception. TBH, I don't give a damn. Let me ask a few questions instead: - Is it unreasonable to expect that argumentations are based on facts and proper analysis? - Is it unreasonable to expect a proper integration of a new feature? - Does it take unreasonable effort to do a proper design? - Is it unreasonable to ask that he necessary cleanups are done upfront? If anyone of the responsible people at Intel thinks so, then they should speak up now and tell me in public and into my face what's so unreasonable about that. Thanks, Thomas ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v5 12/16] PCI: Add pci_iomap_host_shared(), pci_iomap_host_shared_range()
Andi, On Sun, Oct 10 2021 at 15:11, Andi Kleen wrote: > On 10/9/2021 1:39 PM, Dan Williams wrote: >> I agree with you and Greg here. If a driver is accessing hardware >> resources outside of the bind lifetime of one of the devices it >> supports, and in a way that neither modrobe-policy nor >> device-authorization -policy infrastructure can block, that sounds >> like a bug report. > > The 5.15 tree has something like ~2.4k IO accesses (including MMIO and > others) in init functions that also register drivers (thanks Elena for > the number) These numbers are completely useless simply because they are based on nonsensical criteria. See: https://lore.kernel.org/r/87r1cj2uad.ffs@tglx > My point is just that the ecosystem of devices that Linux supports is > messy enough that there are legitimate exceptions from the "First IO > only in probe call only" rule. Your point is based on your outright refusal to actualy do a proper analysis and your outright refusal to help fixing the real problems. All you have provided so far is handwaving based on a completely useless analysis. Sure, your goal is to get this TDX problem solved, but it's not going to be solved by: 1) Providing a nonsensical analysis 2) Using #1 as an argument to hack some half baken interfaces into the kernel which allow you to tick off your checkbox and then leave the resulting mess for others to clean up. Try again when you have factual data to back up your claims and factual arguments which prove that the problem can't be fixed otherwise. I might be repeating myself, but kernel development works this way: 1) Hack your private POC - Yay! 2) Sit down and think hard about the problems you identified in step #1. Do a thorough analysis. 3) Come up with a sensible integration plan. 4) Do the necessary grump work of cleanups all over the place 5) Add sensible infrastructure which is understandable for the bulk of kernel/driver developers 6) Let your feature fall in place and not in the way you are insisting on: 1) Hack your private POC - Yay! 2) Define that this is the only way to do it and try to shove it down the throat of everyone. 3) Getting told that this is not the way it works 4) Insist on it forever and blame the grumpy maintainers who are just not understanding the great value of your approach. 5) Go back to #2 You should know that already, but I have no problem to give that lecture to you over and over again. I probably should create a form letter. And no, you can bitch about me as much as you want. These are not my personal rules and personal pet pieves. These are rules Linus cares about very much and aside of that they just reflect common sense. The kernel is a common good and not the dump ground for your personal brain waste. The kernel does not serve Intel. Quite the contrary Intel depends on the kernel to work nicely with it's hardware. Ergo, Intel should have a vested interest to serve the kernel and take responsibility for it as a whole. And so should you as an Intel employee. Just dumping your next half baken workaround does not cut it especially not when it is not backed up by sensible arguments. Please try again, but not before you have something substantial to back up your claims. Thanks, Thomas ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v5 12/16] PCI: Add pci_iomap_host_shared(), pci_iomap_host_shared_range()
On Thu, Oct 14, 2021 at 12:33:49PM +, Reshetova, Elena wrote: > > On Thu, Oct 14, 2021 at 07:27:42AM +, Reshetova, Elena wrote: > > > > On Thu, Oct 14, 2021 at 06:32:32AM +, Reshetova, Elena wrote: > > > > > > On Tue, Oct 12, 2021 at 06:36:16PM +, Reshetova, Elena wrote: > > > > > > > > The 5.15 tree has something like ~2.4k IO accesses (including > > > > > > > > MMIO and > > > > > > > > others) in init functions that also register drivers (thanks > > > > > > > > Elena for > > > > > > > > the number) > > > > > > > > > > > > > > To provide more numbers on this. What I can see so far from a > > > > > > > smatch- > > based > > > > > > > analysis, we have 409 __init style functions (.probe & > > > > > > > builtin/module_ > > > > > > > _platform_driver_probe excluded) for 5.15 with allyesconfig. > > > > > > > > > > > > I don't think we care about allyesconfig at all though. > > > > > > Just don't do that. > > > > > > How about allmodconfig? This is closer to what distros actually do. > > > > > > > > > > It does not make any difference really for the content of the > > > > > /drivers/*: > > > > > gives 408 __init style functions doing IO (.probe & builtin/module_ > > > > > > > _platform_driver_probe excluded) for 5.15 with allmodconfig: > > > > > > > > > > ['doc200x_ident_chip', > > > > > 'doc_probe', 'doc2001_init', 'mtd_speedtest_init', > > > > > 'mtd_nandbiterrs_init', 'mtd_oobtest_init', 'mtd_pagetest_init', > > > > > 'tort_init', 'mtd_subpagetest_init', 'fixup_pmc551', > > > > > 'doc_set_driver_info', 'init_amd76xrom', 'init_l440gx', > > > > > 'init_sc520cdp', 'init_ichxrom', 'init_ck804xrom', 'init_esb2rom', > > > > > 'probe_acpi_namespace_devices', 'amd_iommu_init_pci', 'state_next', > > > > > 'arm_v7s_do_selftests', 'arm_lpae_run_tests', 'init_iommu_one', > > > > > > > > Um. ARM? Which architecture is this build for? > > > > > > The list of smatch IO findings is built for x86, but the smatch cross > > > function > > > database covers all archs, so when queried for all potential function > > > callers, > > > it would show non x86 arch call chains also. > > > > > > Here is the original x86 finding and call chain for the > > > 'arm_v7s_do_selftests': > > > > > > Detected low-level IO from arm_v7s_do_selftests in fun > > > __iommu_queue_command_sync > > > > > > drivers/iommu/amd/iommu.c:1025 __iommu_queue_command_sync() error: > > > {15002074744551330002} > > > 'check_host_input' read from the host using function 'readl' to a > > > member of the structure 'iommu->cmd_buf_head'; > > > > > > __iommu_queue_command_sync() > > > iommu_completion_wait() > > > amd_iommu_domain_flush_complete() > > > iommu_v1_map_page() > > > arm_v7s_do_selftests() > > > > > > So, the results can be further filtered if you want a specified arch. > > > > So what is it just for x86? Could you tell? > > I can probably figure out how to do additional filtering here, but does > it really matter for the case that is being discussed here? Andi's point was > that there quite many existing places in the kernel when low-level IO > happens before the probe stage. So I brought these numbers here. > What do you plan to do with the pure x86 results? If the list is short - just suggest securing that ;) > And here are the full results for allyesconfig, if anyone is interested (just > got permission to create > the repository today): > https://github.com/intel/ccc-linux-guest-hardening/tree/master/audit/sample_output/5.15-rc1 > We will be pushing to this repo all the scripts and fuzzing setups we use as > part of > our Linux guest hardening effort for confidential cloud computing, but it is > going to take > some time to get all the approvals collected. > > Best Regards, > Elena. ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
RE: [PATCH v5 12/16] PCI: Add pci_iomap_host_shared(), pci_iomap_host_shared_range()
Elena, On Thu, Oct 14 2021 at 06:32, Elena Reshetova wrote: >> On Tue, Oct 12, 2021 at 06:36:16PM +, Reshetova, Elena wrote: > It does not make any difference really for the content of the /drivers/*: > gives 408 __init style functions doing IO (.probe & builtin/module_ >> > _platform_driver_probe excluded) for 5.15 with allmodconfig: > > ['doc200x_ident_chip', > 'doc_probe', 'doc2001_init', 'mtd_speedtest_init', > 'mtd_nandbiterrs_init', 'mtd_oobtest_init', 'mtd_pagetest_init', > 'tort_init', 'mtd_subpagetest_init', 'fixup_pmc551', > 'doc_set_driver_info', 'init_amd76xrom', 'init_l440gx', > 'init_sc520cdp', 'init_ichxrom', 'init_ck804xrom', 'init_esb2rom', > 'ubi_gluebi_init', 'ubiblock_init' > 'ubi_init', 'mtd_stresstest_init', All of this is MTD and can just be disabled wholesale. Aside of that, most of these depend on either platform devices or device tree enumerations which are not ever available on X86. > 'probe_acpi_namespace_devices', > 'amd_iommu_init_pci', 'state_next', > 'init_dmars', 'iommu_init_pci', 'early_amd_iommu_init', > 'late_iommu_features_init', 'detect_ivrs', > 'intel_prepare_irq_remapping', 'intel_enable_irq_remapping', > 'intel_cleanup_irq_remapping', 'detect_intel_iommu', > 'parse_ioapics_under_ir', 'si_domain_init', > 'intel_iommu_init', 'dmar_table_init', > 'enable_drhd_fault_handling', > 'check_tylersburg_isoch', None of this is reachable because the initial detection which is ACPI table based will fail for TDX. If not, it's a guest firmware problem. > 'fb_console_init', 'xenbus_probe_backend_init', > 'xenbus_probe_frontend_init', 'setup_vcpu_hotplug_event', > 'balloon_init', XEN, that's relevant because magically the TDX guest will assume that it is a XEN instance? > 'ostm_init_clksrc', 'ftm_clockevent_init', 'ftm_clocksource_init', > 'kona_timer_init', 'mtk_gpt_init', 'samsung_clockevent_init', > 'samsung_clocksource_init', 'sysctr_timer_init', 'mxs_timer_init', > 'sun4i_timer_init', 'at91sam926x_pit_dt_init', 'owl_timer_init', > 'sun5i_setup_clockevent', > 'mt7621_clk_init', > 'samsung_clk_register_mux', 'samsung_clk_register_gate', > 'samsung_clk_register_fixed_rate', 'clk_boston_setup', > 'gemini_cc_init', 'aspeed_ast2400_cc', 'aspeed_ast2500_cc', > 'sun6i_rtc_clk_init', 'phy_init', 'ingenic_ost_register_clock', > 'meson6_timer_init', 'atcpit100_timer_init', > 'npcm7xx_clocksource_init', 'clksrc_dbx500_prcmu_init', > 'rcar_sysc_pd_setup', 'r8a779a0_sysc_pd_setup', 'renesas_soc_init', > 'rcar_rst_init', 'rmobile_setup_pm_domain', 'mcp_write_pairing_set', > 'a72_b53_rac_enable_all', 'mcp_a72_b53_set', > 'brcmstb_soc_device_early_init', 'imx8mq_soc_revision', > 'imx8mm_soc_uid', 'imx8mm_soc_revision', 'qe_init', > 'exynos5x_clk_init', 'exynos5250_clk_init', 'exynos4_get_xom', > 'create_one_cmux', 'create_one_pll', 'p2041_init_periph', > 'p4080_init_periph', 'p5020_init_periph', 'p5040_init_periph', > 'r9a06g032_clocks_probe', 'r8a73a4_cpg_clocks_init', > 'sh73a0_cpg_clocks_init', 'cpg_div6_register', > 'r8a7740_cpg_clocks_init', 'cpg_mssr_register_mod_clk', > 'cpg_mssr_register_core_clk', 'rcar_gen3_cpg_clk_register', > 'cpg_sd_clk_register', 'r7s9210_update_clk_table', > 'rz_cpg_read_mode_pins', 'rz_cpg_clocks_init', > 'rcar_r8a779a0_cpg_clk_register', 'rcar_gen2_cpg_clk_register', > 'sun8i_a33_ccu_setup', 'sun8i_a23_ccu_setup', 'sun5i_ccu_init', > 'suniv_f1c100s_ccu_setup', 'sun6i_a31_ccu_setup', > 'sun8i_v3_v3s_ccu_init', 'sun50i_h616_ccu_setup', > 'sunxi_h3_h5_ccu_init', 'sun4i_ccu_init', 'kona_ccu_init', > 'ns2_genpll_scr_clk_init', 'ns2_genpll_sw_clk_init', > 'ns2_lcpll_ddr_clk_init', 'ns2_lcpll_ports_clk_init', > 'nsp_genpll_clk_init', 'nsp_lcpll0_clk_init', > 'cygnus_genpll_clk_init', 'cygnus_lcpll0_clk_init', > 'cygnus_mipipll_clk_init', 'cygnus_audiopll_clk_init', > 'of_fixed_mmio_clk_setup', > 'arm_v7s_do_selftests', 'arm_lpae_run_tests', 'init_iommu_one', ARM based drivers are initialized on x86 in which way? > 'hv_init_tsc_clocksource', 'hv_init_clocksource', HyperV. See XEN > 'skx_init', > 'i10nm_init', 'sbridge_init', 'i82975x_init', 'i3000_init', > 'x38_init', 'ie31200_init', 'i3200_init', 'amd64_edac_init', > 'pnd2_init', 'edac_init', 'adummy_init', EDAC has already hypervisor checks > 'init_acpi_pm_clocksource', Requires ACPI table entry or command line override > 'intel_rng_mod_init', Has an old style PCI table which is searched via pci_get_device(). Could do with a cleanup which converts it to proper PCI probing. So I stop here, because it would be way simpler to have the file names but so far I could identify all of it from the top of my head. So what are you trying to tell me? That you found tons of ioremaps in __init functions which are completely irrelevant. Please stop making arguments based on completely nonsensical data. It took me less than 5 minutes to eliminate more than 50% of that list and I'm pretty sure that I could have eliminated the bulk of the rest as well. The fact that a large part of this is ARM only, the
[GIT PULL] virtio,vdpa: fixes
The following changes since commit be9c6bad9b46451ba5bb8d366c51e2475f374981: vdpa: potential uninitialized return in vhost_vdpa_va_map() (2021-09-14 18:10:43 -0400) are available in the Git repository at: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus for you to fetch changes up to bcef9356fc2e1302daf373c83c826aa27954d128: vhost-vdpa: Fix the wrong input in config_cb (2021-10-13 08:42:07 -0400) virtio,vdpa: fixes Fixes up some issues in rc5. Signed-off-by: Michael S. Tsirkin Cindy Lu (1): vhost-vdpa: Fix the wrong input in config_cb Halil Pasic (1): virtio: write back F_VERSION_1 before validate Michael S. Tsirkin (1): Revert "virtio-blk: Add validation for block size in config space" Randy Dunlap (1): VDUSE: fix documentation underline warning Wu Zongyong (1): vhost_vdpa: unset vq irq before freeing irq Documentation/userspace-api/vduse.rst | 2 +- drivers/block/virtio_blk.c| 37 ++- drivers/vhost/vdpa.c | 10 +- drivers/virtio/virtio.c | 11 +++ 4 files changed, 23 insertions(+), 37 deletions(-) ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization