Re: [dm-devel] don't use ->bd_inode to access the block device size v3

2021-10-19 Thread Kari Argillander
On Mon, Oct 18, 2021 at 11:53:08AM -0600, Jens Axboe wrote:

snip..

> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> index 7b0326661a1e..a967b3fb3c71 100644
> --- a/include/linux/genhd.h
> +++ b/include/linux/genhd.h
> @@ -236,14 +236,14 @@ static inline sector_t get_start_sect(struct 
> block_device *bdev)
>   return bdev->bd_start_sect;
>  }
>  
> -static inline loff_t bdev_nr_bytes(struct block_device *bdev)
> +static inline sector_t bdev_nr_sectors(struct block_device *bdev)
>  {
> - return i_size_read(bdev->bd_inode);
> + return bdev->bd_nr_sectors;
>  }
>  
> -static inline sector_t bdev_nr_sectors(struct block_device *bdev)
> +static inline loff_t bdev_nr_bytes(struct block_device *bdev)
>  {
> - return bdev_nr_bytes(bdev) >> SECTOR_SHIFT;
> + return bdev_nr_setors(bdev) << SECTOR_SHIFT;

setors -> sectors

>  }
>  
>  static inline sector_t get_capacity(struct gendisk *disk)
> 
> -- 
> Jens Axboe
> 

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel



Re: [dm-devel] don't use ->bd_inode to access the block device size v3

2021-10-18 Thread Jens Axboe
On 10/18/21 7:04 PM, Kari Argillander wrote:
> On Mon, Oct 18, 2021 at 11:53:08AM -0600, Jens Axboe wrote:
> 
> snip..
> 
>> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
>> index 7b0326661a1e..a967b3fb3c71 100644
>> --- a/include/linux/genhd.h
>> +++ b/include/linux/genhd.h
>> @@ -236,14 +236,14 @@ static inline sector_t get_start_sect(struct 
>> block_device *bdev)
>>  return bdev->bd_start_sect;
>>  }
>>  
>> -static inline loff_t bdev_nr_bytes(struct block_device *bdev)
>> +static inline sector_t bdev_nr_sectors(struct block_device *bdev)
>>  {
>> -return i_size_read(bdev->bd_inode);
>> +return bdev->bd_nr_sectors;
>>  }
>>  
>> -static inline sector_t bdev_nr_sectors(struct block_device *bdev)
>> +static inline loff_t bdev_nr_bytes(struct block_device *bdev)
>>  {
>> -return bdev_nr_bytes(bdev) >> SECTOR_SHIFT;
>> +return bdev_nr_setors(bdev) << SECTOR_SHIFT;
> 
> setors -> sectors

Yep, did catch that prior.

-- 
Jens Axboe

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel



Re: [dm-devel] don't use ->bd_inode to access the block device size v3

2021-10-18 Thread Christoph Hellwig
Looks good,

Reviewed-by: Christoph Hellwig 

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel



Re: [dm-devel] don't use ->bd_inode to access the block device size v3

2021-10-18 Thread Jens Axboe
On 10/18/21 11:49 AM, Christoph Hellwig wrote:
> On Mon, Oct 18, 2021 at 11:40:51AM -0600, Jens Axboe wrote:
>>  static inline loff_t bdev_nr_bytes(struct block_device *bdev)
>>  {
>> -return i_size_read(bdev->bd_inode);
>> +return bdev->bd_nr_sectors;
> 
> This hunk needs to go into bdev_nr_sectors, and the bdev_nr_bytes
> probably wants to call bdev_nr_sectors and do the shifting.

Makes sense.

commit dd018a580d0037f65d7dd801cbf3e053f36283de
Author: Jens Axboe 
Date:   Mon Oct 18 11:39:45 2021 -0600

block: cache inode size in bdev

Reading the inode size brings in a new cacheline for IO submit, and
it's in the hot path being checked for every single IO. When doing
millions of IOs per core per second, this is noticeable overhead.

Cache the nr_sectors in the bdev itself.

Signed-off-by: Jens Axboe 

diff --git a/block/genhd.c b/block/genhd.c
index 759bc06810f8..53495e3391e3 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -58,6 +58,7 @@ void set_capacity(struct gendisk *disk, sector_t sectors)
 
spin_lock(>bd_size_lock);
i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
+   bdev->bd_nr_sectors = sectors;
spin_unlock(>bd_size_lock);
 }
 EXPORT_SYMBOL(set_capacity);
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 9dbddc355b40..66ef9bc6d6a1 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -91,6 +91,7 @@ static void bdev_set_nr_sectors(struct block_device *bdev, 
sector_t sectors)
 {
spin_lock(>bd_size_lock);
i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
+   bdev->bd_nr_sectors = sectors;
spin_unlock(>bd_size_lock);
 }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 472e55e0e94f..fe065c394fff 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -39,6 +39,7 @@ struct bio_crypt_ctx;
 
 struct block_device {
sector_tbd_start_sect;
+   sector_tbd_nr_sectors;
struct disk_stats __percpu *bd_stats;
unsigned long   bd_stamp;
boolbd_read_only;   /* read-only policy */
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 7b0326661a1e..a967b3fb3c71 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -236,14 +236,14 @@ static inline sector_t get_start_sect(struct block_device 
*bdev)
return bdev->bd_start_sect;
 }
 
-static inline loff_t bdev_nr_bytes(struct block_device *bdev)
+static inline sector_t bdev_nr_sectors(struct block_device *bdev)
 {
-   return i_size_read(bdev->bd_inode);
+   return bdev->bd_nr_sectors;
 }
 
-static inline sector_t bdev_nr_sectors(struct block_device *bdev)
+static inline loff_t bdev_nr_bytes(struct block_device *bdev)
 {
-   return bdev_nr_bytes(bdev) >> SECTOR_SHIFT;
+   return bdev_nr_setors(bdev) << SECTOR_SHIFT;
 }
 
 static inline sector_t get_capacity(struct gendisk *disk)

-- 
Jens Axboe

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel



Re: [dm-devel] don't use ->bd_inode to access the block device size v3

2021-10-18 Thread Christoph Hellwig
On Mon, Oct 18, 2021 at 11:40:51AM -0600, Jens Axboe wrote:
>  static inline loff_t bdev_nr_bytes(struct block_device *bdev)
>  {
> - return i_size_read(bdev->bd_inode);
> + return bdev->bd_nr_sectors;

This hunk needs to go into bdev_nr_sectors, and the bdev_nr_bytes
probably wants to call bdev_nr_sectors and do the shifting.

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel



Re: [dm-devel] don't use ->bd_inode to access the block device size v3

2021-10-18 Thread Jens Axboe
On Mon, 18 Oct 2021 12:11:00 +0200, Christoph Hellwig wrote:
> various drivers currently poke directy at the block device inode, which
> is a bit of a mess.  This series cleans up the places that read the
> block device size to use the proper helpers.  I have separate patches
> for many of the other bd_inode uses, but this series is already big
> enough as-is,
> 
> Changes since v2:
>  - bdev_nr_bytes should return loff_t
>  - fix a commit message typo
>  - drop a redundant note in a commit message
> 
> [...]

Applied, thanks!

[01/30] block: move the SECTOR_SIZE related definitions to blk_types.h
commit: ac076a376d4c1fa7f01bedad76bab96a981b7464
[02/30] block: add a bdev_nr_bytes helper
commit: 449c780f68d9adbab2373c996d4341e61c088685
[03/30] bcache: remove bdev_sectors
commit: 519070e1b8411c93b483fb50511c9d5d7932f62a
[04/30] drbd: use bdev_nr_sectors instead of open coding it
commit: eee1958b9a7b912fff33319e5737d861703c3a47
[05/30] dm: use bdev_nr_sectors and bdev_nr_bytes instead of open coding them
commit: 34d7526093779e26c1a281992c7e91662f3afa85
[06/30] md: use bdev_nr_sectors instead of open coding it
commit: 1a70a0364bbbf29eab22c9fa4b3d71087df940a5
[07/30] nvmet: use bdev_nr_bytes instead of open coding it
commit: d61ec9eeaa161c6e385f4adebc5d671bc5290687
[08/30] target/iblock: use bdev_nr_bytes instead of open coding it
commit: 30de91d3df67291093736890b7496620d5025df9
[09/30] fs: use bdev_nr_bytes instead of open coding it in blkdev_max_block
commit: 011bb9476ef8f9867330e2bce22cf124d034cd33
[10/30] fs: simplify init_page_buffers
commit: 957c50dd8af9945fde3a3fb6c8baf5d638ef3177
[11/30] affs: use bdev_nr_sectors instead of open coding it
commit: ec003894a9db3858165dd61fb4cabf9a402aabe0
[12/30] btrfs: use bdev_nr_bytes instead of open coding it
commit: 167a1c754eae512e45de682e2cb4ea05f080fda5
[13/30] cramfs: use bdev_nr_bytes instead of open coding it
commit: cdf881e14aa127c7602110d208b3412b1412c1ab
[14/30] fat: use bdev_nr_sectors instead of open coding it
commit: 4513b8c903782c4f3963172d81414e08f48a0317
[15/30] hfs: use bdev_nr_sectors instead of open coding it
commit: 311b610de54a52c199e2a129da2c26ad5953edb3
[16/30] hfsplus: use bdev_nr_sectors instead of open coding it
commit: 03b67c1de5d3b085360f3d6dcf37560f44e8cb4b
[17/30] jfs: use bdev_nr_bytes instead of open coding it
commit: c1e80b87c3acd52817bea278310900ad2825686c
[18/30] nfs/blocklayout: use bdev_nr_bytes instead of open coding it
commit: 6b1b53cf606d70dc6dd375b42558cfe7e945
[19/30] nilfs2: use bdev_nr_bytes instead of open coding it
commit: a24d8bcfd590de5dc4a9e806c9e76558676c2eef
[20/30] ntfs3: use bdev_nr_bytes instead of open coding it
commit: 9242c8b0b4432b6929b030c729a1edd9d9116d4c
[21/30] pstore/blk: use bdev_nr_bytes instead of open coding it
commit: 989ab34bd83f075efdae2cf6026cec0507374696
[22/30] reiserfs: use bdev_nr_bytes instead of open coding it
commit: 8d147b3353c7fd853313521c4f66430d38d66391
[23/30] squashfs: use bdev_nr_bytes instead of open coding it
commit: 8538360bb42955166d0073ffb6dff6a4b0caa4ec
[24/30] block: use bdev_nr_bytes instead of open coding it in blkdev_fallocate
commit: 7ad94c3008a3f5e0ff8af1e3ff1c7061955ccec4
[25/30] block: add a sb_bdev_nr_blocks helper
commit: 5793a4ebc76566fd24d7afdbcefb3311355fd077
[26/30] ext4: use sb_bdev_nr_blocks
commit: 3a10af74c8f1d390857cf87648573bc4f157e4ca
[27/30] jfs: use sb_bdev_nr_blocks
commit: cd8ac55f93923c65e18204c99b08a8c4cba3d187
[28/30] ntfs: use sb_bdev_nr_blocks
commit: 8e2c901e6d1c97bf862514901beaae3e248655d8
[29/30] reiserfs: use sb_bdev_nr_blocks
commit: 93361ef44a8931d281583ea9c608247fe8127528
[30/30] udf: use sb_bdev_nr_blocks
commit: ea8befeb35c47cf95012032850fe3f0ec80e5cde

Best regards,
-- 
Jens Axboe


--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel



Re: [dm-devel] don't use ->bd_inode to access the block device size v3

2021-10-18 Thread Jens Axboe
On 10/18/21 11:18 AM, Christoph Hellwig wrote:
> On Mon, Oct 18, 2021 at 11:16:08AM -0600, Jens Axboe wrote:
>> This looks good to me. Followup question, as it's related - I've got a
>> hacky patch that caches the inode size in the bdev:
>>
>> https://git.kernel.dk/cgit/linux-block/commit/?h=perf-wip=c754951eb7193258c35a574bd1b7c4946ee4
>>
>> so we don't have to dip into the inode itself for the fast path. While
>> it's obviously not something being proposed for inclusion right now, is
>> there a world in which we can make something like that work?
> 
> There's just two places that update i_size for block devices:
> set_capacity and bdev_set_nr_sectors.  So you just need to update
> bd_nr_sectors there and you're done.

This on top of your patches should do the trick, then.


commit eebb7c5048163985fb21d6cb740ebac78cb46051
Author: Jens Axboe 
Date:   Mon Oct 18 11:39:45 2021 -0600

block: cache inode size in bdev

Reading the inode size brings in a new cacheline for IO submit, and
it's in the hot path being checked for every single IO. When doing
millions of IOs per core per second, this is noticeable overhead.

Cache the nr_sectors in the bdev itself.

Signed-off-by: Jens Axboe 

diff --git a/block/genhd.c b/block/genhd.c
index 759bc06810f8..53495e3391e3 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -58,6 +58,7 @@ void set_capacity(struct gendisk *disk, sector_t sectors)
 
spin_lock(>bd_size_lock);
i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
+   bdev->bd_nr_sectors = sectors;
spin_unlock(>bd_size_lock);
 }
 EXPORT_SYMBOL(set_capacity);
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 9dbddc355b40..66ef9bc6d6a1 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -91,6 +91,7 @@ static void bdev_set_nr_sectors(struct block_device *bdev, 
sector_t sectors)
 {
spin_lock(>bd_size_lock);
i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
+   bdev->bd_nr_sectors = sectors;
spin_unlock(>bd_size_lock);
 }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 472e55e0e94f..fe065c394fff 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -39,6 +39,7 @@ struct bio_crypt_ctx;
 
 struct block_device {
sector_tbd_start_sect;
+   sector_tbd_nr_sectors;
struct disk_stats __percpu *bd_stats;
unsigned long   bd_stamp;
boolbd_read_only;   /* read-only policy */
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 7b0326661a1e..001f617f82da 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -238,7 +238,7 @@ static inline sector_t get_start_sect(struct block_device 
*bdev)
 
 static inline loff_t bdev_nr_bytes(struct block_device *bdev)
 {
-   return i_size_read(bdev->bd_inode);
+   return bdev->bd_nr_sectors;
 }
 
 static inline sector_t bdev_nr_sectors(struct block_device *bdev)

-- 
Jens Axboe

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel



Re: [dm-devel] don't use ->bd_inode to access the block device size v3

2021-10-18 Thread Christoph Hellwig
On Mon, Oct 18, 2021 at 11:16:08AM -0600, Jens Axboe wrote:
> This looks good to me. Followup question, as it's related - I've got a
> hacky patch that caches the inode size in the bdev:
> 
> https://git.kernel.dk/cgit/linux-block/commit/?h=perf-wip=c754951eb7193258c35a574bd1b7c4946ee4
> 
> so we don't have to dip into the inode itself for the fast path. While
> it's obviously not something being proposed for inclusion right now, is
> there a world in which we can make something like that work?

There's just two places that update i_size for block devices:
set_capacity and bdev_set_nr_sectors.  So you just need to update
bd_nr_sectors there and you're done.

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel



Re: [dm-devel] don't use ->bd_inode to access the block device size v3

2021-10-18 Thread Jens Axboe
On 10/18/21 4:11 AM, Christoph Hellwig wrote:
> Hi Jens,
> 
> various drivers currently poke directy at the block device inode, which
> is a bit of a mess.  This series cleans up the places that read the
> block device size to use the proper helpers.  I have separate patches
> for many of the other bd_inode uses, but this series is already big
> enough as-is,

This looks good to me. Followup question, as it's related - I've got a
hacky patch that caches the inode size in the bdev:

https://git.kernel.dk/cgit/linux-block/commit/?h=perf-wip=c754951eb7193258c35a574bd1b7c4946ee4

so we don't have to dip into the inode itself for the fast path. While
it's obviously not something being proposed for inclusion right now, is
there a world in which we can make something like that work?

-- 
Jens Axboe

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel



[dm-devel] don't use ->bd_inode to access the block device size v3

2021-10-18 Thread Christoph Hellwig
Hi Jens,

various drivers currently poke directy at the block device inode, which
is a bit of a mess.  This series cleans up the places that read the
block device size to use the proper helpers.  I have separate patches
for many of the other bd_inode uses, but this series is already big
enough as-is,

Changes since v2:
 - bdev_nr_bytes should return loff_t
 - fix a commit message typo
 - drop a redundant note in a commit message

Changes since v1:
 - move SECTOR_SIZE & co
 - use SECTOR_SHIFT in sb_bdev_nr_blocks
 - add a bdev_nr_bytes helper
 - reuse a variable in the SCSI target code
 - drop the block2mtd patch, a bigger rewrite for that code is pending

Diffstat:
 block/fops.c|2 +-
 drivers/block/drbd/drbd_int.h   |3 +--
 drivers/md/bcache/super.c   |2 +-
 drivers/md/bcache/util.h|4 
 drivers/md/bcache/writeback.c   |2 +-
 drivers/md/dm-bufio.c   |2 +-
 drivers/md/dm-cache-metadata.c  |2 +-
 drivers/md/dm-cache-target.c|2 +-
 drivers/md/dm-clone-target.c|2 +-
 drivers/md/dm-dust.c|5 ++---
 drivers/md/dm-ebs-target.c  |2 +-
 drivers/md/dm-era-target.c  |2 +-
 drivers/md/dm-exception-store.h |2 +-
 drivers/md/dm-flakey.c  |3 +--
 drivers/md/dm-integrity.c   |6 +++---
 drivers/md/dm-linear.c  |3 +--
 drivers/md/dm-log-writes.c  |4 ++--
 drivers/md/dm-log.c |2 +-
 drivers/md/dm-mpath.c   |2 +-
 drivers/md/dm-raid.c|6 +++---
 drivers/md/dm-switch.c  |2 +-
 drivers/md/dm-table.c   |3 +--
 drivers/md/dm-thin-metadata.c   |2 +-
 drivers/md/dm-thin.c|2 +-
 drivers/md/dm-verity-target.c   |3 +--
 drivers/md/dm-writecache.c  |2 +-
 drivers/md/dm-zoned-target.c|2 +-
 drivers/md/md.c |   26 +++---
 drivers/nvme/target/io-cmd-bdev.c   |4 ++--
 drivers/target/target_core_iblock.c |4 ++--
 fs/affs/super.c |2 +-
 fs/btrfs/dev-replace.c  |3 +--
 fs/btrfs/disk-io.c  |2 +-
 fs/btrfs/ioctl.c|4 ++--
 fs/btrfs/volumes.c  |8 
 fs/buffer.c |4 ++--
 fs/cramfs/inode.c   |2 +-
 fs/ext4/super.c |2 +-
 fs/fat/inode.c  |5 +
 fs/hfs/mdb.c|2 +-
 fs/hfsplus/wrapper.c|2 +-
 fs/jfs/resize.c |5 ++---
 fs/jfs/super.c  |5 ++---
 fs/nfs/blocklayout/dev.c|4 ++--
 fs/nilfs2/ioctl.c   |2 +-
 fs/nilfs2/super.c   |2 +-
 fs/nilfs2/the_nilfs.c   |2 +-
 fs/ntfs/super.c |8 +++-
 fs/ntfs3/super.c|3 +--
 fs/pstore/blk.c |8 +++-
 fs/reiserfs/super.c |8 ++--
 fs/squashfs/super.c |5 +++--
 fs/udf/lowlevel.c   |5 ++---
 fs/udf/super.c  |9 +++--
 include/linux/blk_types.h   |   17 +
 include/linux/blkdev.h  |   17 -
 include/linux/genhd.h   |   13 -
 57 files changed, 118 insertions(+), 139 deletions(-)

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel