Re: [PATCH RFC v3 for-6.8/block 04/17] mtd: block2mtd: use bdev apis

2024-01-05 Thread Yu Kuai

Hi,

在 2024/01/05 14:10, Christoph Hellwig 写道:

On Thu, Jan 04, 2024 at 12:28:55PM +0100, Jan Kara wrote:

What do you think? Because when we are working with the folios it is rather
natural to use their mapping for dirty balancing?


The real problem is that block2mtd pokes way to deep into block
internals.

I think the saviour here is Christians series to replace the bdev handle
with a struct file, which will allow to use the normal file write path
here and get rid of the entire layering volation.


Yes, looks like lots of patches from this set is not needed anymore.
I'll stop sending v4 and just send some patches that is not related to
'bd_inode' separately.

Thanks,
Kuai



.






Re: [PATCH RFC v3 for-6.8/block 11/17] erofs: use bdev api

2024-01-04 Thread Yu Kuai

Hi, Jan!

在 2024/01/04 20:02, Jan Kara 写道:

On Thu 21-12-23 16:58:26, Yu Kuai wrote:

From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_device.

Signed-off-by: Yu Kuai 


I'm not erofs maintainer but IMO this is quite ugly and grows erofs_buf
unnecessarily. I'd rather store 'sb' pointer in erofs_buf and then do the
right thing in erofs_bread() which is the only place that seems to care
about the erofs_is_fscache_mode() distinction... Also blkszbits is then
trivially sb->s_blocksize_bits so it would all seem much more
straightforward.


Thanks for your suggestion, I'll follow this unless Gao Xiang has other
suggestions.

Kuai


Honza


---
  fs/erofs/data.c | 18 --
  fs/erofs/internal.h |  2 ++
  2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index c98aeda8abb2..bbe2fe199bf3 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -32,8 +32,8 @@ void erofs_put_metabuf(struct erofs_buf *buf)
  void *erofs_bread(struct erofs_buf *buf, erofs_blk_t blkaddr,
  enum erofs_kmap_type type)
  {
-   struct inode *inode = buf->inode;
-   erofs_off_t offset = (erofs_off_t)blkaddr << inode->i_blkbits;
+   u8 blkszbits = buf->inode ? buf->inode->i_blkbits : buf->blkszbits;
+   erofs_off_t offset = (erofs_off_t)blkaddr << blkszbits;
pgoff_t index = offset >> PAGE_SHIFT;
struct page *page = buf->page;
struct folio *folio;
@@ -43,7 +43,9 @@ void *erofs_bread(struct erofs_buf *buf, erofs_blk_t blkaddr,
erofs_put_metabuf(buf);
  
  		nofs_flag = memalloc_nofs_save();

-   folio = read_cache_folio(inode->i_mapping, index, NULL, NULL);
+   folio = buf->inode ?
+   read_mapping_folio(buf->inode->i_mapping, index, NULL) :
+   bdev_read_folio(buf->bdev, offset);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(folio))
return folio;
@@ -67,10 +69,14 @@ void *erofs_bread(struct erofs_buf *buf, erofs_blk_t 
blkaddr,
  
  void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)

  {
-   if (erofs_is_fscache_mode(sb))
+   if (erofs_is_fscache_mode(sb)) {
buf->inode = EROFS_SB(sb)->s_fscache->inode;
-   else
-   buf->inode = sb->s_bdev->bd_inode;
+   buf->bdev = NULL;
+   } else {
+   buf->inode = NULL;
+   buf->bdev = sb->s_bdev;
+   buf->blkszbits = EROFS_SB(sb)->blkszbits;
+   }
  }
  
  void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,

diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index b0409badb017..c9206351b485 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -224,8 +224,10 @@ enum erofs_kmap_type {
  
  struct erofs_buf {

struct inode *inode;
+   struct block_device *bdev;
struct page *page;
void *base;
+   u8 blkszbits;
enum erofs_kmap_type kmap_type;
  };
  #define __EROFS_BUF_INITIALIZER   ((struct erofs_buf){ .page = NULL })
--
2.39.2






Re: [PATCH RFC v3 for-6.8/block 04/17] mtd: block2mtd: use bdev apis

2024-01-04 Thread Yu Kuai

Hi,

在 2024/01/04 19:28, Jan Kara 写道:

On Thu 21-12-23 16:56:59, Yu Kuai wrote:

From: Yu Kuai 

On the one hand covert to use folio while reading bdev inode, on the
other hand prevent to access bd_inode directly.

Signed-off-by: Yu Kuai 

...

+   for (p = folio_address(folio); p < max; p++)
if (*p != -1UL) {
-   lock_page(page);
-   memset(page_address(page), 0xff, PAGE_SIZE);
-   set_page_dirty(page);
-   unlock_page(page);
-   balance_dirty_pages_ratelimited(mapping);
+   folio_lock(folio);
+   memset(folio_address(folio), 0xff,
+  folio_size(folio));
+   folio_mark_dirty(folio);
+   folio_unlock(folio);
+   bdev_balance_dirty_pages_ratelimited(bdev);


Rather then creating this bdev_balance_dirty_pages_ratelimited() just for
MTD perhaps we can have here (and in other functions):

...
mapping = folio_mapping(folio);
folio_unlock(folio);
if (mapping)

balance_dirty_pages_ratelimited(mapping);

What do you think? Because when we are working with the folios it is rather
natural to use their mapping for dirty balancing?


I think this is a great idea! And bdev_balance_dirty_pages_ratelimited()
can be removed as well.

Thanks,
Kuai



Honza






Re: [PATCH RFC v3 for-6.8/block 02/17] xen/blkback: use bdev api in xen_update_blkif_status()

2024-01-04 Thread Yu Kuai

Hi, Jan!

在 2024/01/04 19:06, Jan Kara 写道:

On Thu 21-12-23 16:56:57, Yu Kuai wrote:

From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
  drivers/block/xen-blkback/xenbus.c | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/block/xen-blkback/xenbus.c 
b/drivers/block/xen-blkback/xenbus.c
index e34219ea2b05..e645afa4af57 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -104,8 +104,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
xenbus_dev_error(blkif->be->dev, err, "block flush");
return;
}
-   invalidate_inode_pages2(
-   blkif->vbd.bdev_handle->bdev->bd_inode->i_mapping);
+   invalidate_bdev(blkif->vbd.bdev_handle->bdev);


This function uses invalidate_inode_pages2() while invalidate_bdev() ends
up using mapping_try_invalidate() and there are subtle behavioral
differences between these two (for example invalidate_inode_pages2() tries
to clean dirty pages using the ->launder_folio method). So I think you'll
need helper like invalidate_bdev2() for this.


Thanks for reviewing this patch, I know the differenct between then,
what I don't understand is that why using invalidate_inode_pages2()
here. sync_blockdev() is just called and 0 is returned, I think in this
case it's safe to call invalidate_bdev() directly, or am I missing
other things?

Thanks,
Kuai



Honza






[PATCH RFC v3 for-6.8/block 17/17] ext4: use bdev apis

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_device.

Signed-off-by: Yu Kuai 
Reviewed-by: Jan Kara 
---
 fs/ext4/dir.c   | 6 ++
 fs/ext4/ext4_jbd2.c | 6 +++---
 fs/ext4/super.c | 3 +--
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 3985f8c33f95..64e35eb6a324 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -191,10 +191,8 @@ static int ext4_readdir(struct file *file, struct 
dir_context *ctx)
pgoff_t index = map.m_pblk >>
(PAGE_SHIFT - inode->i_blkbits);
if (!ra_has_index(>f_ra, index))
-   page_cache_sync_readahead(
-   sb->s_bdev->bd_inode->i_mapping,
-   >f_ra, file,
-   index, 1);
+   bdev_sync_readahead(sb->s_bdev, >f_ra,
+   file, index, 1);
file->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
bh = ext4_bread(NULL, inode, map.m_lblk, 0);
if (IS_ERR(bh)) {
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index d1a2e6624401..c1bf3a00fad9 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -206,7 +206,6 @@ static void ext4_journal_abort_handle(const char *caller, 
unsigned int line,
 
 static void ext4_check_bdev_write_error(struct super_block *sb)
 {
-   struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
struct ext4_sb_info *sbi = EXT4_SB(sb);
int err;
 
@@ -216,9 +215,10 @@ static void ext4_check_bdev_write_error(struct super_block 
*sb)
 * we could read old data from disk and write it out again, which
 * may lead to on-disk filesystem inconsistency.
 */
-   if (errseq_check(>wb_err, READ_ONCE(sbi->s_bdev_wb_err))) {
+   if (bdev_wb_err_check(sb->s_bdev, READ_ONCE(sbi->s_bdev_wb_err))) {
spin_lock(>s_bdev_wb_lock);
-   err = errseq_check_and_advance(>wb_err, 
>s_bdev_wb_err);
+   err = bdev_wb_err_check_and_advance(sb->s_bdev,
+   >s_bdev_wb_err);
spin_unlock(>s_bdev_wb_lock);
if (err)
ext4_error_err(sb, -err,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a7935edbd7b1..25c3d2ac8559 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5544,8 +5544,7 @@ static int __ext4_fill_super(struct fs_context *fc, 
struct super_block *sb)
 * used to detect the metadata async write error.
 */
spin_lock_init(>s_bdev_wb_lock);
-   errseq_check_and_advance(>s_bdev->bd_inode->i_mapping->wb_err,
->s_bdev_wb_err);
+   bdev_wb_err_check_and_advance(sb->s_bdev, >s_bdev_wb_err);
EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
ext4_orphan_cleanup(sb, es);
EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
-- 
2.39.2




[PATCH RFC v3 for-6.8/block 16/17] ext4: remove block_device_ejected()

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

block_device_ejected() is added by commit bdfe0cbd746a ("Revert
"ext4: remove block_device_ejected"") in 2015. At that time 'bdi->wb'
is destroyed synchronized from del_gendisk(), hence if ext4 is still
mounted, and then mark_buffer_dirty() will reference destroyed 'wb'.
However, such problem doesn't exist anymore:

- commit d03f6cdc1fc4 ("block: Dynamically allocate and refcount
backing_dev_info") switch bdi to use refcounting;
- commit 13eec2363ef0 ("fs: Get proper reference for s_bdi"), will grab
additional reference of bdi while mounting, so that 'bdi->wb' will not
be destroyed until generic_shutdown_super().

Hence remove this dead function block_device_ejected().

Signed-off-by: Yu Kuai 
Reviewed-by: Jan Kara 
Reviewed-by: Christoph Hellwig 
---
 fs/ext4/super.c | 18 --
 1 file changed, 18 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3f07eaa2..a7935edbd7b1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -467,22 +467,6 @@ static void ext4_maybe_update_superblock(struct 
super_block *sb)
schedule_work(_SB(sb)->s_sb_upd_work);
 }
 
-/*
- * The del_gendisk() function uninitializes the disk-specific data
- * structures, including the bdi structure, without telling anyone
- * else.  Once this happens, any attempt to call mark_buffer_dirty()
- * (for example, by ext4_commit_super), will cause a kernel OOPS.
- * This is a kludge to prevent these oops until we can put in a proper
- * hook in del_gendisk() to inform the VFS and file system layers.
- */
-static int block_device_ejected(struct super_block *sb)
-{
-   struct inode *bd_inode = sb->s_bdev->bd_inode;
-   struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
-
-   return bdi->dev == NULL;
-}
-
 static void ext4_journal_commit_callback(journal_t *journal, transaction_t 
*txn)
 {
struct super_block  *sb = journal->j_private;
@@ -6162,8 +6146,6 @@ static int ext4_commit_super(struct super_block *sb)
 
if (!sbh)
return -EINVAL;
-   if (block_device_ejected(sb))
-   return -ENODEV;
 
ext4_update_super(sb);
 
-- 
2.39.2




[PATCH RFC v3 for-6.8/block 14/17] buffer: add a new helper to read sb block

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

Unlike __bread_gfp(), ext4 has special handing while reading sb block:

1) __GFP_NOFAIL is not set, and memory allocation can fail;
2) If buffer write failed before, set buffer uptodate and don't read
   block from disk;
3) REQ_META is set for all IO, and REQ_PRIO is set for reading xattr;
4) If failed, return error ptr instead of NULL;

This patch add a new helper __bread_gfp2() that will match above 2 and 3(
1 will be used, and 4 will still be encapsulated by ext4), and prepare to
prevent calling mapping_gfp_constraint() directly on bd_inode->i_mapping
in ext4.

Signed-off-by: Yu Kuai 
---
 fs/buffer.c | 68 ++---
 include/linux/buffer_head.h | 18 +-
 2 files changed, 65 insertions(+), 21 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 967f34b70aa8..188bd36c9fea 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1255,16 +1255,19 @@ void __bforget(struct buffer_head *bh)
 }
 EXPORT_SYMBOL(__bforget);
 
-static struct buffer_head *__bread_slow(struct buffer_head *bh)
+static struct buffer_head *__bread_slow(struct buffer_head *bh,
+   blk_opf_t op_flags,
+   bool check_write_error)
 {
lock_buffer(bh);
-   if (buffer_uptodate(bh)) {
+   if (buffer_uptodate(bh) ||
+   (check_write_error && buffer_uptodate_or_error(bh))) {
unlock_buffer(bh);
return bh;
} else {
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
-   submit_bh(REQ_OP_READ, bh);
+   submit_bh(REQ_OP_READ | op_flags, bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return bh;
@@ -1445,6 +1448,31 @@ void __breadahead(struct block_device *bdev, sector_t 
block, unsigned size)
 }
 EXPORT_SYMBOL(__breadahead);
 
+static struct buffer_head *
+bread_gfp(struct block_device *bdev, sector_t block, unsigned int size,
+ blk_opf_t op_flags, gfp_t gfp, bool check_write_error)
+{
+   struct buffer_head *bh;
+
+   gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
+
+   /*
+* Prefer looping in the allocator rather than here, at least that
+* code knows what it's doing.
+*/
+   gfp |= __GFP_NOFAIL;
+
+   bh = bdev_getblk(bdev, block, size, gfp);
+   if (unlikely(!bh))
+   return NULL;
+
+   if (buffer_uptodate(bh) ||
+   (check_write_error && buffer_uptodate_or_error(bh)))
+   return bh;
+
+   return __bread_slow(bh, op_flags, check_write_error);
+}
+
 /**
  *  __bread_gfp() - reads a specified block and returns the bh
  *  @bdev: the block_device to read from
@@ -1458,27 +1486,27 @@ EXPORT_SYMBOL(__breadahead);
  *  It returns NULL if the block was unreadable.
  */
 struct buffer_head *
-__bread_gfp(struct block_device *bdev, sector_t block,
-  unsigned size, gfp_t gfp)
+__bread_gfp(struct block_device *bdev, sector_t block, unsigned int size,
+   gfp_t gfp)
 {
-   struct buffer_head *bh;
-
-   gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
-
-   /*
-* Prefer looping in the allocator rather than here, at least that
-* code knows what it's doing.
-*/
-   gfp |= __GFP_NOFAIL;
-
-   bh = bdev_getblk(bdev, block, size, gfp);
-
-   if (likely(bh) && !buffer_uptodate(bh))
-   bh = __bread_slow(bh);
-   return bh;
+   return bread_gfp(bdev, block, size, 0, gfp, false);
 }
 EXPORT_SYMBOL(__bread_gfp);
 
+/*
+ * This works like __bread_gfp() except:
+ * 1) If buffer write failed before, set buffer uptodate and don't read
+ * block from disk;
+ * 2) Caller can pass in additional op_flags like REQ_META;
+ */
+struct buffer_head *
+__bread_gfp2(struct block_device *bdev, sector_t block, unsigned int size,
+blk_opf_t op_flags, gfp_t gfp)
+{
+   return bread_gfp(bdev, block, size, op_flags, gfp, true);
+}
+EXPORT_SYMBOL(__bread_gfp2);
+
 static void __invalidate_bh_lrus(struct bh_lru *b)
 {
int i;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 5f23ee599889..751b2744b4ae 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -171,6 +171,18 @@ static __always_inline int buffer_uptodate(const struct 
buffer_head *bh)
return test_bit_acquire(BH_Uptodate, >b_state);
 }
 
+static __always_inline int buffer_uptodate_or_error(struct buffer_head *bh)
+{
+   /*
+* If the buffer has the write error flag, data was failed to write
+* out in the block. In this case, set buffer uptodate to prevent
+* reading old data.
+*/
+   if (buffer_write_io_error(bh))
+   set_buffer_uptodate(bh);
+   return buffer_uptodate(bh);
+}
+
 static inline unsigned long bh_offset(co

[PATCH RFC v3 for-6.8/block 15/17] ext4: use new helper to read sb block

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

Remove __ext4_sb_bread_gfp() and ext4_buffer_uptodate() that is defined
by ext4, and convert to use common helper __bread_gfp2() and
buffer_uptodate_or_error().

Signed-off-by: Yu Kuai 
Reviewed-by: Jan Kara 
---
 fs/ext4/ext4.h| 13 -
 fs/ext4/inode.c   |  8 
 fs/ext4/super.c   | 45 ++---
 fs/ext4/symlink.c |  2 +-
 4 files changed, 15 insertions(+), 53 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a5d784872303..8377f6c5264f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3824,19 +3824,6 @@ extern const struct iomap_ops ext4_iomap_ops;
 extern const struct iomap_ops ext4_iomap_overwrite_ops;
 extern const struct iomap_ops ext4_iomap_report_ops;
 
-static inline int ext4_buffer_uptodate(struct buffer_head *bh)
-{
-   /*
-* If the buffer has the write error flag, we have failed
-* to write out data in the block.  In this  case, we don't
-* have to read the block because we may read the old data
-* successfully.
-*/
-   if (buffer_write_io_error(bh))
-   set_buffer_uptodate(bh);
-   return buffer_uptodate(bh);
-}
-
 #endif /* __KERNEL__ */
 
 #define EFSBADCRC  EBADMSG /* Bad CRC detected */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 61277f7f8722..efb0af6f02f7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -887,7 +887,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct 
inode *inode,
bh = ext4_getblk(handle, inode, block, map_flags);
if (IS_ERR(bh))
return bh;
-   if (!bh || ext4_buffer_uptodate(bh))
+   if (!bh || buffer_uptodate_or_error(bh))
return bh;
 
ret = ext4_read_bh_lock(bh, REQ_META | REQ_PRIO, true);
@@ -915,7 +915,7 @@ int ext4_bread_batch(struct inode *inode, ext4_lblk_t 
block, int bh_count,
 
for (i = 0; i < bh_count; i++)
/* Note that NULL bhs[i] is valid because of holes. */
-   if (bhs[i] && !ext4_buffer_uptodate(bhs[i]))
+   if (bhs[i] && !buffer_uptodate_or_error(bhs[i]))
ext4_read_bh_lock(bhs[i], REQ_META | REQ_PRIO, false);
 
if (!wait)
@@ -4392,11 +4392,11 @@ static int __ext4_get_inode_loc(struct super_block *sb, 
unsigned long ino,
bh = sb_getblk(sb, block);
if (unlikely(!bh))
return -ENOMEM;
-   if (ext4_buffer_uptodate(bh))
+   if (buffer_uptodate_or_error(bh))
goto has_buffer;
 
lock_buffer(bh);
-   if (ext4_buffer_uptodate(bh)) {
+   if (buffer_uptodate_or_error(bh)) {
/* Someone brought it uptodate while we waited */
unlock_buffer(bh);
goto has_buffer;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c5fcf377ab1f..3f07eaa2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -180,7 +180,7 @@ void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t 
op_flags,
 {
BUG_ON(!buffer_locked(bh));
 
-   if (ext4_buffer_uptodate(bh)) {
+   if (buffer_uptodate_or_error(bh)) {
unlock_buffer(bh);
return;
}
@@ -191,7 +191,7 @@ int ext4_read_bh(struct buffer_head *bh, blk_opf_t 
op_flags, bh_end_io_t *end_io
 {
BUG_ON(!buffer_locked(bh));
 
-   if (ext4_buffer_uptodate(bh)) {
+   if (buffer_uptodate_or_error(bh)) {
unlock_buffer(bh);
return 0;
}
@@ -214,49 +214,24 @@ int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t 
op_flags, bool wait)
return ext4_read_bh(bh, op_flags, NULL);
 }
 
-/*
- * This works like __bread_gfp() except it uses ERR_PTR for error
- * returns.  Currently with sb_bread it's impossible to distinguish
- * between ENOMEM and EIO situations (since both result in a NULL
- * return.
- */
-static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
-  sector_t block,
-  blk_opf_t op_flags, gfp_t gfp)
-{
-   struct buffer_head *bh;
-   int ret;
-
-   bh = sb_getblk_gfp(sb, block, gfp);
-   if (bh == NULL)
-   return ERR_PTR(-ENOMEM);
-   if (ext4_buffer_uptodate(bh))
-   return bh;
-
-   ret = ext4_read_bh_lock(bh, REQ_META | op_flags, true);
-   if (ret) {
-   put_bh(bh);
-   return ERR_PTR(ret);
-   }
-   return bh;
-}
-
 struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
   blk_opf_t op_flags)
 {
-   gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_inode->i_mapping,
-   ~__GFP_FS) | __GFP_MOVABLE;
+   struct buffer_head *bh = __bread_gfp2(sb->s_bdev, block,
+ sb->s_blocksize,
+  

[PATCH RFC v3 for-6.8/block 13/17] jbd2: use bdev apis

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_device.

Signed-off-by: Yu Kuai 
---
 fs/jbd2/journal.c  | 3 +--
 fs/jbd2/recovery.c | 6 ++
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ed53188472f9..f1b5ffeaf02a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2003,8 +2003,7 @@ static int __jbd2_journal_erase(journal_t *journal, 
unsigned int flags)
byte_count = (block_stop - block_start + 1) *
journal->j_blocksize;
 
-   truncate_inode_pages_range(journal->j_dev->bd_inode->i_mapping,
-   byte_start, byte_stop);
+   truncate_bdev_range(journal->j_dev, 0, byte_start, byte_stop);
 
if (flags & JBD2_JOURNAL_FLUSH_DISCARD) {
err = blkdev_issue_discard(journal->j_dev,
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 01f744cb97a4..6b6a2c4585fa 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -290,7 +290,6 @@ int jbd2_journal_recover(journal_t *journal)
 
struct recovery_infoinfo;
errseq_twb_err;
-   struct address_space*mapping;
 
memset(, 0, sizeof(info));
sb = journal->j_superblock;
@@ -309,8 +308,7 @@ int jbd2_journal_recover(journal_t *journal)
}
 
wb_err = 0;
-   mapping = journal->j_fs_dev->bd_inode->i_mapping;
-   errseq_check_and_advance(>wb_err, _err);
+   bdev_wb_err_check_and_advance(journal->j_fs_dev, _err);
err = do_one_pass(journal, , PASS_SCAN);
if (!err)
err = do_one_pass(journal, , PASS_REVOKE);
@@ -334,7 +332,7 @@ int jbd2_journal_recover(journal_t *journal)
err2 = sync_blockdev(journal->j_fs_dev);
if (!err)
err = err2;
-   err2 = errseq_check_and_advance(>wb_err, _err);
+   err2 = bdev_wb_err_check_and_advance(journal->j_fs_dev, _err);
if (!err)
err = err2;
/* Make sure all replayed data is on permanent storage */
-- 
2.39.2




[PATCH RFC v3 for-6.8/block 12/17] nilfs2: use bdev api in nilfs_attach_log_writer()

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_device.

Signed-off-by: Yu Kuai 
---
 fs/nilfs2/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 55e31cc903d1..a1130e384937 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2823,7 +2823,7 @@ int nilfs_attach_log_writer(struct super_block *sb, 
struct nilfs_root *root)
if (!nilfs->ns_writer)
return -ENOMEM;
 
-   inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL);
+   bdev_attach_wb(nilfs->ns_bdev);
 
err = nilfs_segctor_start_thread(nilfs->ns_writer);
if (unlikely(err))
-- 
2.39.2




[PATCH RFC v3 for-6.8/block 11/17] erofs: use bdev api

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_device.

Signed-off-by: Yu Kuai 
---
 fs/erofs/data.c | 18 --
 fs/erofs/internal.h |  2 ++
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index c98aeda8abb2..bbe2fe199bf3 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -32,8 +32,8 @@ void erofs_put_metabuf(struct erofs_buf *buf)
 void *erofs_bread(struct erofs_buf *buf, erofs_blk_t blkaddr,
  enum erofs_kmap_type type)
 {
-   struct inode *inode = buf->inode;
-   erofs_off_t offset = (erofs_off_t)blkaddr << inode->i_blkbits;
+   u8 blkszbits = buf->inode ? buf->inode->i_blkbits : buf->blkszbits;
+   erofs_off_t offset = (erofs_off_t)blkaddr << blkszbits;
pgoff_t index = offset >> PAGE_SHIFT;
struct page *page = buf->page;
struct folio *folio;
@@ -43,7 +43,9 @@ void *erofs_bread(struct erofs_buf *buf, erofs_blk_t blkaddr,
erofs_put_metabuf(buf);
 
nofs_flag = memalloc_nofs_save();
-   folio = read_cache_folio(inode->i_mapping, index, NULL, NULL);
+   folio = buf->inode ?
+   read_mapping_folio(buf->inode->i_mapping, index, NULL) :
+   bdev_read_folio(buf->bdev, offset);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(folio))
return folio;
@@ -67,10 +69,14 @@ void *erofs_bread(struct erofs_buf *buf, erofs_blk_t 
blkaddr,
 
 void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
 {
-   if (erofs_is_fscache_mode(sb))
+   if (erofs_is_fscache_mode(sb)) {
buf->inode = EROFS_SB(sb)->s_fscache->inode;
-   else
-   buf->inode = sb->s_bdev->bd_inode;
+   buf->bdev = NULL;
+   } else {
+   buf->inode = NULL;
+   buf->bdev = sb->s_bdev;
+   buf->blkszbits = EROFS_SB(sb)->blkszbits;
+   }
 }
 
 void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index b0409badb017..c9206351b485 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -224,8 +224,10 @@ enum erofs_kmap_type {
 
 struct erofs_buf {
struct inode *inode;
+   struct block_device *bdev;
struct page *page;
void *base;
+   u8 blkszbits;
enum erofs_kmap_type kmap_type;
 };
 #define __EROFS_BUF_INITIALIZER((struct erofs_buf){ .page = NULL })
-- 
2.39.2




[PATCH RFC v3 for-6.8/block 09/17] btrfs: use bdev apis

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

On the one hand covert to use folio while reading bdev inode, on the
other hand prevent to access bd_inode directly.

Signed-off-by: Yu Kuai 
---
 fs/btrfs/disk-io.c | 71 +-
 fs/btrfs/volumes.c | 17 ++-
 fs/btrfs/zoned.c   | 15 +-
 3 files changed, 48 insertions(+), 55 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 401ea09ae4b8..a1cfdee99a81 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3620,28 +3620,24 @@ ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
 static void btrfs_end_super_write(struct bio *bio)
 {
struct btrfs_device *device = bio->bi_private;
-   struct bio_vec *bvec;
-   struct bvec_iter_all iter_all;
-   struct page *page;
-
-   bio_for_each_segment_all(bvec, bio, iter_all) {
-   page = bvec->bv_page;
+   struct folio_iter fi;
 
+   bio_for_each_folio_all(fi, bio) {
if (bio->bi_status) {
btrfs_warn_rl_in_rcu(device->fs_info,
"lost page write due to IO error on %s (%d)",
btrfs_dev_name(device),
blk_status_to_errno(bio->bi_status));
-   ClearPageUptodate(page);
-   SetPageError(page);
+   folio_clear_uptodate(fi.folio);
+   folio_set_error(fi.folio);
btrfs_dev_stat_inc_and_print(device,
 BTRFS_DEV_STAT_WRITE_ERRS);
} else {
-   SetPageUptodate(page);
+   folio_mark_uptodate(fi.folio);
}
 
-   put_page(page);
-   unlock_page(page);
+   folio_put(fi.folio);
+   folio_unlock(fi.folio);
}
 
bio_put(bio);
@@ -3651,9 +3647,9 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct 
block_device *bdev,
   int copy_num, bool 
drop_cache)
 {
struct btrfs_super_block *super;
-   struct page *page;
+   struct folio *folio;
u64 bytenr, bytenr_orig;
-   struct address_space *mapping = bdev->bd_inode->i_mapping;
+   unsigned int nofs_flag;
int ret;
 
bytenr_orig = btrfs_sb_offset(copy_num);
@@ -3674,16 +3670,17 @@ struct btrfs_super_block 
*btrfs_read_dev_one_super(struct block_device *bdev,
 * Drop the page of the primary superblock, so later read will
 * always read from the device.
 */
-   invalidate_inode_pages2_range(mapping,
-   bytenr >> PAGE_SHIFT,
+   invalidate_bdev_range(bdev, bytenr >> PAGE_SHIFT,
(bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT);
}
 
-   page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS);
-   if (IS_ERR(page))
-   return ERR_CAST(page);
+   nofs_flag = memalloc_nofs_save();
+   folio = bdev_read_folio(bdev, bytenr);
+   memalloc_nofs_restore(nofs_flag);
+   if (IS_ERR(folio))
+   return ERR_CAST(folio);
 
-   super = page_address(page);
+   super = folio_address(folio);
if (btrfs_super_magic(super) != BTRFS_MAGIC) {
btrfs_release_disk_super(super);
return ERR_PTR(-ENODATA);
@@ -3740,7 +3737,6 @@ static int write_dev_supers(struct btrfs_device *device,
struct btrfs_super_block *sb, int max_mirrors)
 {
struct btrfs_fs_info *fs_info = device->fs_info;
-   struct address_space *mapping = device->bdev->bd_inode->i_mapping;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
int i;
int errors = 0;
@@ -3753,7 +3749,7 @@ static int write_dev_supers(struct btrfs_device *device,
shash->tfm = fs_info->csum_shash;
 
for (i = 0; i < max_mirrors; i++) {
-   struct page *page;
+   struct folio *folio;
struct bio *bio;
struct btrfs_super_block *disk_super;
 
@@ -3778,9 +3774,10 @@ static int write_dev_supers(struct btrfs_device *device,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
sb->csum);
 
-   page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
-  GFP_NOFS);
-   if (!page) {
+   folio = bdev_get_folio(device->bdev, bytenr,
+  FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
+  GFP_NOFS);
+   if (IS_ERR(folio)) {
btrfs_err(device->fs_info,
"couldn't get super block page for bytenr %

[PATCH RFC v3 for-6.8/block 08/17] bio: export bio_add_folio_nofail()

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

Currently btrfs is using __bio_add_page() in write_dev_supers(). In order
to convert to use folio for bdev in btrfs, export bio_add_folio_nofail()
so that it can replace __bio_add_page().

Signed-off-by: Yu Kuai 
---
 block/bio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/bio.c b/block/bio.c
index b9642a41f286..c7459839ca40 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1122,6 +1122,7 @@ void bio_add_folio_nofail(struct bio *bio, struct folio 
*folio, size_t len,
WARN_ON_ONCE(off > UINT_MAX);
__bio_add_page(bio, >page, len, off);
 }
+EXPORT_SYMBOL_GPL(bio_add_folio_nofail);
 
 /**
  * bio_add_folio - Attempt to add part of a folio to a bio.
-- 
2.39.2




[PATCH RFC v3 for-6.8/block 06/17] scsicam: use bdev api in scsi_bios_ptable()

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 drivers/scsi/scsicam.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c
index e2c7d8ef205f..9617d70c0ed1 100644
--- a/drivers/scsi/scsicam.c
+++ b/drivers/scsi/scsicam.c
@@ -32,11 +32,9 @@
  */
 unsigned char *scsi_bios_ptable(struct block_device *dev)
 {
-   struct address_space *mapping = bdev_whole(dev)->bd_inode->i_mapping;
unsigned char *res = NULL;
-   struct folio *folio;
+   struct folio *folio = bdev_read_folio(bdev_whole(dev), 0);
 
-   folio = read_mapping_folio(mapping, 0, NULL);
if (IS_ERR(folio))
return NULL;
 
-- 
2.39.2




[PATCH RFC v3 for-6.8/block 03/17] bcache: use bdev api in read_super()

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

On the one hand covert to use folio while reading bdev inode, on the
other hand prevent to access bd_inode directly.

Signed-off-by: Yu Kuai 
---
 drivers/md/bcache/super.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index bfe1685dbae5..23892b32c582 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -168,14 +168,13 @@ static const char *read_super(struct cache_sb *sb, struct 
block_device *bdev,
 {
const char *err;
struct cache_sb_disk *s;
-   struct page *page;
+   struct folio *folio;
unsigned int i;
 
-   page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
-  SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
-   if (IS_ERR(page))
+   folio = bdev_read_folio(bdev, SB_OFFSET);
+   if (IS_ERR(folio))
return "IO error";
-   s = page_address(page) + offset_in_page(SB_OFFSET);
+   s = folio_address(folio) + offset_in_folio(folio, SB_OFFSET);
 
sb->offset  = le64_to_cpu(s->offset);
sb->version = le64_to_cpu(s->version);
@@ -272,7 +271,7 @@ static const char *read_super(struct cache_sb *sb, struct 
block_device *bdev,
*res = s;
return NULL;
 err:
-   put_page(page);
+   folio_put(folio);
return err;
 }
 
-- 
2.39.2




[PATCH RFC v3 for-6.8/block 01/17] block: add some bdev apis

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

Those apis will be used for other modules, so that bd_inode won't be
accessed directly from other modules.

Signed-off-by: Yu Kuai 
---
 block/bdev.c   | 148 +
 block/blk.h|   2 -
 include/linux/blkdev.h |  17 +
 3 files changed, 165 insertions(+), 2 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index 750aec178b6a..6204621c6db6 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -89,6 +89,25 @@ void invalidate_bdev(struct block_device *bdev)
 }
 EXPORT_SYMBOL(invalidate_bdev);
 
+/**
+ * invalidate_bdev_pages - Invalidate clean unused buffers and pagecache.
+ * @bdev: the block device which holds the cache to invalidate
+ * @start: the offset 'from' which to invalidate
+ * @end: the offset 'to' which to invalidate (inclusive)
+ *
+ * This function removes pages that are clean, unmapped and unlocked,
+ * as well as shadow entries. It will not block on IO activity.
+ *
+ * If you want to remove all the pages of one block device, regardless of
+ * their use and writeback state, use truncate_bdev_range().
+ */
+void invalidate_bdev_range(struct block_device *bdev, pgoff_t start,
+  pgoff_t end)
+{
+   invalidate_mapping_pages(bdev->bd_inode->i_mapping, start, end);
+}
+EXPORT_SYMBOL_GPL(invalidate_bdev_range);
+
 /*
  * Drop all buffers & page cache for given bdev range. This function bails
  * with error if bdev has other exclusive owner (such as filesystem).
@@ -121,6 +140,7 @@ int truncate_bdev_range(struct block_device *bdev, 
blk_mode_t mode,
 lstart >> PAGE_SHIFT,
 lend >> PAGE_SHIFT);
 }
+EXPORT_SYMBOL_GPL(truncate_bdev_range);
 
 static void set_init_blocksize(struct block_device *bdev)
 {
@@ -1102,3 +1122,131 @@ void bdev_statx_dioalign(struct inode *inode, struct 
kstat *stat)
 
blkdev_put_no_open(bdev);
 }
+
+/**
+ * bdev_read_folio - Read into block device page cache.
+ * @bdev: the block device which holds the cache to read.
+ * @pos: the offset that allocated folio will contain.
+ *
+ * Read one page into the block device page cache. If it succeeds, the folio
+ * returned will contain @pos;
+ *
+ * Return: Uptodate folio on success, ERR_PTR() on failure.
+ */
+struct folio *bdev_read_folio(struct block_device *bdev, loff_t pos)
+{
+   return mapping_read_folio_gfp(bdev->bd_inode->i_mapping,
+ pos >> PAGE_SHIFT, GFP_KERNEL);
+}
+EXPORT_SYMBOL_GPL(bdev_read_folio);
+
+/**
+ * bdev_get_folio - Find and get a reference to a folio.
+ * @bdev: the block device which holds the address_space to search.
+ * @pos: the offset the returned folio will contain.
+ * @fgp_flags: %FGP flags modify how the folio is returned.
+ * @gfp: Memory allocation flags to use if %FGP_CREAT is specified.
+ *
+ * Looks up the page cache entry at @bdev->bd_inode->i_mapping from @pos. If
+ * this function returns a folio, it is returned with an increased refcount.
+ *
+ * Return: The found folio or an ERR_PTR() otherwise.
+ */
+struct folio *bdev_get_folio(struct block_device *bdev, loff_t pos,
+fgf_t fgp_flags, gfp_t gfp)
+{
+   return __filemap_get_folio(bdev->bd_inode->i_mapping, pos >> PAGE_SHIFT,
+  fgp_flags, gfp);
+}
+EXPORT_SYMBOL_GPL(bdev_get_folio);
+
+/**
+ * bdev_wb_err_check - Has block device writeback error occurred?
+ * @bdev: the block device to check.
+ * @since: Previously-sampled @bdev->bd_inode->i_mapping->wb_err.
+ *
+ * Grab @bdev->bd_inode->i_mapping->wb_err, and see if it has changed @since
+ * the given value was sampled.
+ *
+ * Return: The latest error or 0 if it hasn't changed.
+ */
+int bdev_wb_err_check(struct block_device *bdev, errseq_t since)
+{
+   return errseq_check(>bd_inode->i_mapping->wb_err, since);
+}
+EXPORT_SYMBOL_GPL(bdev_wb_err_check);
+
+/**
+ * bdev_wb_err_check_and_advance() - Check block device writeback error and
+ * advance to current value.
+ * @bdev: the block device to check;
+ * @since: Pointer to previously-sampled @bdev->bd_inode->i_mapping->wb_err to
+ * check against and advance.
+ *
+ * Grab @bdev->bd_inode->i_mapping->wb_err, and see whether it matches the
+ * value that @since points to. If it does, then just return 0; If it doesn't,
+ * then the value has changed. Set the "seen" flag, and try to swap it into
+ * place as the new eseq value. Then, set that value as the new @since value,
+ * and return whatever the error portion is set to.
+ *
+ * Return: Negative errno if one has been stored, or 0 if no new error has
+ * occurred.
+ */
+int bdev_wb_err_check_and_advance(struct block_device *bdev, errseq_t *since)
+{
+   return errseq_check_and_advance(>bd_inode->i_mapping->wb_err,
+   since);
+}
+EXPO

[PATCH RFC v3 for-6.8/block 10/17] cramfs: use bdev apis in cramfs_blkdev_read()

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

On the one hand covert to use folio while reading bdev inode, on the
other hand prevent to access bd_inode directly.

Also do some cleanup that there is no need for two for loop, and remove
local array pages.

Signed-off-by: Yu Kuai 
---
 fs/cramfs/inode.c | 36 +---
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 60dbfa0f8805..fad95d683d97 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -183,9 +183,6 @@ static int next_buffer;
 static void *cramfs_blkdev_read(struct super_block *sb, unsigned int offset,
unsigned int len)
 {
-   struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
-   struct file_ra_state ra = {};
-   struct page *pages[BLKS_PER_BUF];
unsigned i, blocknr, buffer;
unsigned long devsize;
char *data;
@@ -214,37 +211,30 @@ static void *cramfs_blkdev_read(struct super_block *sb, 
unsigned int offset,
devsize = bdev_nr_bytes(sb->s_bdev) >> PAGE_SHIFT;
 
/* Ok, read in BLKS_PER_BUF pages completely first. */
-   file_ra_state_init(, mapping);
-   page_cache_sync_readahead(mapping, , NULL, blocknr, BLKS_PER_BUF);
-
-   for (i = 0; i < BLKS_PER_BUF; i++) {
-   struct page *page = NULL;
-
-   if (blocknr + i < devsize) {
-   page = read_mapping_page(mapping, blocknr + i, NULL);
-   /* synchronous error? */
-   if (IS_ERR(page))
-   page = NULL;
-   }
-   pages[i] = page;
-   }
+   bdev_sync_readahead(sb->s_bdev, NULL, NULL, blocknr, BLKS_PER_BUF);
 
buffer = next_buffer;
next_buffer = NEXT_BUFFER(buffer);
buffer_blocknr[buffer] = blocknr;
buffer_dev[buffer] = sb;
-
data = read_buffers[buffer];
+
for (i = 0; i < BLKS_PER_BUF; i++) {
-   struct page *page = pages[i];
+   struct folio *folio = NULL;
+
+   if (blocknr + i < devsize)
+   folio = bdev_read_folio(sb->s_bdev,
+   (blocknr + i) << PAGE_SHIFT);
 
-   if (page) {
-   memcpy_from_page(data, page, 0, PAGE_SIZE);
-   put_page(page);
-   } else
+   if (IS_ERR_OR_NULL(folio)) {
memset(data, 0, PAGE_SIZE);
+   } else {
+   memcpy_from_folio(data, folio, 0, PAGE_SIZE);
+   folio_put(folio);
+   }
data += PAGE_SIZE;
}
+
return read_buffers[buffer] + offset;
 }
 
-- 
2.39.2




[PATCH RFC v3 for-6.8/block 05/17] s390/dasd: use bdev api in dasd_format()

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 drivers/s390/block/dasd_ioctl.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 61b9675e2a67..bbfb958237e6 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -221,8 +221,9 @@ dasd_format(struct dasd_block *block, struct format_data_t 
*fdata)
 * enabling the device later.
 */
if (fdata->start_unit == 0) {
-   block->gdp->part0->bd_inode->i_blkbits =
-   blksize_bits(fdata->blksize);
+   rc = set_blocksize(block->gdp->part0, fdata->blksize);
+   if (rc)
+   return rc;
}
 
rc = base->discipline->format_device(base, fdata, 1);
-- 
2.39.2




[PATCH RFC v3 for-6.8/block 07/17] bcachefs: remove dead function bdev_sectors()

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

bdev_sectors() is not used hence remove it.

Signed-off-by: Yu Kuai 
---
 fs/bcachefs/util.h | 5 -
 1 file changed, 5 deletions(-)

diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 2984b57b2958..22a0acc1704f 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -516,11 +516,6 @@ static inline unsigned fract_exp_two(unsigned x, unsigned 
fract_bits)
 void bch2_bio_map(struct bio *bio, void *base, size_t);
 int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
 
-static inline sector_t bdev_sectors(struct block_device *bdev)
-{
-   return bdev->bd_inode->i_size >> 9;
-}
-
 #define closure_bio_submit(bio, cl)\
 do {   \
closure_get(cl);\
-- 
2.39.2




[PATCH RFC v3 for-6.8/block 00/17] block: don't access bd_inode directly from other modules

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

Changes in v3:
 - remove bdev_associated_mapping() and patch 12 from v1;
 - add kerneldoc comments for new bdev apis;
 - rename __bdev_get_folio() to bdev_get_folio;
 - fix a problem in erofs that erofs_init_metabuf() is not always
 called.
 - add reviewed-by tag for patch 15-17;
Changes in v2:
 - remove some bdev apis that is not necessary;
 - pass in offset for bdev_read_folio() and __bdev_get_folio();
 - remove bdev_gfp_constraint() and add a new helper in fs/buffer.c to
 prevent access bd_indoe() directly from mapping_gfp_constraint() in
 ext4.(patch 15, 16);
 - remove block_device_ejected() from ext4.


Patch 1 add some bdev apis, then follow up patches will use these apis
to avoid access bd_inode directly, and hopefully the field bd_inode can
be removed eventually(after figure out a way for fs/buffer.c).

Yu Kuai (17):
  block: add some bdev apis
  xen/blkback: use bdev api in xen_update_blkif_status()
  bcache: use bdev api in read_super()
  mtd: block2mtd: use bdev apis
  s390/dasd: use bdev api in dasd_format()
  scsicam: use bdev api in scsi_bios_ptable()
  bcachefs: remove dead function bdev_sectors()
  bio: export bio_add_folio_nofail()
  btrfs: use bdev apis
  cramfs: use bdev apis in cramfs_blkdev_read()
  erofs: use bdev api
  nilfs2: use bdev api in nilfs_attach_log_writer()
  jbd2: use bdev apis
  buffer: add a new helper to read sb block
  ext4: use new helper to read sb block
  ext4: remove block_device_ejected()
  ext4: use bdev apis

 block/bdev.c   | 148 +
 block/bio.c|   1 +
 block/blk.h|   2 -
 drivers/block/xen-blkback/xenbus.c |   3 +-
 drivers/md/bcache/super.c  |  11 +--
 drivers/mtd/devices/block2mtd.c|  81 +++-
 drivers/s390/block/dasd_ioctl.c|   5 +-
 drivers/scsi/scsicam.c |   4 +-
 fs/bcachefs/util.h |   5 -
 fs/btrfs/disk-io.c |  71 +++---
 fs/btrfs/volumes.c |  17 ++--
 fs/btrfs/zoned.c   |  15 +--
 fs/buffer.c|  68 +
 fs/cramfs/inode.c  |  36 +++
 fs/erofs/data.c|  18 ++--
 fs/erofs/internal.h|   2 +
 fs/ext4/dir.c  |   6 +-
 fs/ext4/ext4.h |  13 ---
 fs/ext4/ext4_jbd2.c|   6 +-
 fs/ext4/inode.c|   8 +-
 fs/ext4/super.c|  66 +++--
 fs/ext4/symlink.c  |   2 +-
 fs/jbd2/journal.c  |   3 +-
 fs/jbd2/recovery.c |   6 +-
 fs/nilfs2/segment.c|   2 +-
 include/linux/blkdev.h |  17 
 include/linux/buffer_head.h|  18 +++-
 27 files changed, 377 insertions(+), 257 deletions(-)

-- 
2.39.2




[PATCH RFC v3 for-6.8/block 04/17] mtd: block2mtd: use bdev apis

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

On the one hand covert to use folio while reading bdev inode, on the
other hand prevent to access bd_inode directly.

Signed-off-by: Yu Kuai 
---
 drivers/mtd/devices/block2mtd.c | 81 +++--
 1 file changed, 36 insertions(+), 45 deletions(-)

diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index aa44a23ec045..cf201bf73184 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -46,40 +46,34 @@ struct block2mtd_dev {
 /* Static info about the MTD, used in cleanup_module */
 static LIST_HEAD(blkmtd_device_list);
 
-
-static struct page *page_read(struct address_space *mapping, pgoff_t index)
-{
-   return read_mapping_page(mapping, index, NULL);
-}
-
 /* erase a specified part of the device */
 static int _block2mtd_erase(struct block2mtd_dev *dev, loff_t to, size_t len)
 {
-   struct address_space *mapping =
-   dev->bdev_handle->bdev->bd_inode->i_mapping;
-   struct page *page;
+   struct block_device *bdev = dev->bdev_handle->bdev;
+   struct folio *folio;
pgoff_t index = to >> PAGE_SHIFT;   // page index
int pages = len >> PAGE_SHIFT;
u_long *p;
u_long *max;
 
while (pages) {
-   page = page_read(mapping, index);
-   if (IS_ERR(page))
-   return PTR_ERR(page);
+   folio = bdev_read_folio(bdev, index << PAGE_SHIFT);
+   if (IS_ERR(folio))
+   return PTR_ERR(folio);
 
-   max = page_address(page) + PAGE_SIZE;
-   for (p=page_address(page); ppriv;
-   struct address_space *mapping =
-   dev->bdev_handle->bdev->bd_inode->i_mapping;
-   struct page *page;
+   struct folio *folio;
pgoff_t index = from >> PAGE_SHIFT;
int offset = from & (PAGE_SIZE-1);
int cpylen;
@@ -120,12 +112,13 @@ static int block2mtd_read(struct mtd_info *mtd, loff_t 
from, size_t len,
cpylen = len;   // this page
len = len - cpylen;
 
-   page = page_read(mapping, index);
-   if (IS_ERR(page))
-   return PTR_ERR(page);
+   folio = bdev_read_folio(dev->bdev_handle->bdev,
+   index << PAGE_SHIFT);
+   if (IS_ERR(folio))
+   return PTR_ERR(folio);
 
-   memcpy(buf, page_address(page) + offset, cpylen);
-   put_page(page);
+   memcpy(buf, folio_address(folio) + offset, cpylen);
+   folio_put(folio);
 
if (retlen)
*retlen += cpylen;
@@ -141,9 +134,8 @@ static int block2mtd_read(struct mtd_info *mtd, loff_t 
from, size_t len,
 static int _block2mtd_write(struct block2mtd_dev *dev, const u_char *buf,
loff_t to, size_t len, size_t *retlen)
 {
-   struct page *page;
-   struct address_space *mapping =
-   dev->bdev_handle->bdev->bd_inode->i_mapping;
+   struct block_device *bdev = dev->bdev_handle->bdev;
+   struct folio *folio;
pgoff_t index = to >> PAGE_SHIFT;   // page index
int offset = to & ~PAGE_MASK;   // page offset
int cpylen;
@@ -155,18 +147,18 @@ static int _block2mtd_write(struct block2mtd_dev *dev, 
const u_char *buf,
cpylen = len;   // this page
len = len - cpylen;
 
-   page = page_read(mapping, index);
-   if (IS_ERR(page))
-   return PTR_ERR(page);
+   folio = bdev_read_folio(bdev, index << PAGE_SHIFT);
+   if (IS_ERR(folio))
+   return PTR_ERR(folio);
 
-   if (memcmp(page_address(page)+offset, buf, cpylen)) {
-   lock_page(page);
-   memcpy(page_address(page) + offset, buf, cpylen);
-   set_page_dirty(page);
-   unlock_page(page);
-   balance_dirty_pages_ratelimited(mapping);
+   if (memcmp(folio_address(folio) + offset, buf, cpylen)) {
+   folio_lock(folio);
+   memcpy(folio_address(folio) + offset, buf, cpylen);
+   folio_mark_dirty(folio);
+   folio_unlock(folio);
+   bdev_balance_dirty_pages_ratelimited(bdev);
}
-   put_page(page);
+   folio_put(folio);
 
if (retlen)
*retlen += cpylen;
@@ -211,8 +203,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
kfree(dev->mtd.name);
 
if (dev->bdev_handle) {
-   invalidate_mapping_pages(
- 

[PATCH RFC v3 for-6.8/block 02/17] xen/blkback: use bdev api in xen_update_blkif_status()

2023-12-21 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 drivers/block/xen-blkback/xenbus.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/block/xen-blkback/xenbus.c 
b/drivers/block/xen-blkback/xenbus.c
index e34219ea2b05..e645afa4af57 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -104,8 +104,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
xenbus_dev_error(blkif->be->dev, err, "block flush");
return;
}
-   invalidate_inode_pages2(
-   blkif->vbd.bdev_handle->bdev->bd_inode->i_mapping);
+   invalidate_bdev(blkif->vbd.bdev_handle->bdev);
 
for (i = 0; i < blkif->nr_rings; i++) {
ring = >rings[i];
-- 
2.39.2




Re: [PATCH RFC v2 for-6.8/block 01/18] block: add some bdev apis

2023-12-12 Thread Yu Kuai

Hi,

在 2023/12/12 21:14, Christoph Hellwig 写道:

On Mon, Dec 11, 2023 at 05:52:17PM +0100, Jan Kara wrote:

+void bdev_associated_mapping(struct block_device *bdev,
+struct address_space *mapping)
+{
+   mapping->host = bdev->bd_inode;
+}


Here I'm not sure - is the helper really a win? It seems a bit obscure to
me. This initialization of another mapping for a bdev looks really special.


If we want to hide bd_inode we'll something like this helper even if
I don't particularly like it either.

But it might be a good idea to move out of this series and into the
follow on removing bd_inode, as it's rather pointless without that
context.


Yes, this sounds good, I'll remove this from v3.

Thanks,
Kuai


.






Re: [PATCH RFC v2 for-6.8/block 01/18] block: add some bdev apis

2023-12-12 Thread Yu Kuai

Hi,

在 2023/12/12 21:16, Christoph Hellwig 写道:

+void invalidate_bdev_range(struct block_device *bdev, pgoff_t start,
+  pgoff_t end)
+{
+   invalidate_mapping_pages(bdev->bd_inode->i_mapping, start, end);
+}
+EXPORT_SYMBOL_GPL(invalidate_bdev_range);


Can we have kerneldoc comments for the new helpers please?


Of course, will definitely do this in v3.



+struct folio *__bdev_get_folio(struct block_device *bdev, loff_t pos,
+  fgf_t fgp_flags, gfp_t gfp)
+{
+   return __filemap_get_folio(bdev->bd_inode->i_mapping, pos >> PAGE_SHIFT,
+  fgp_flags, gfp);
+}
+EXPORT_SYMBOL_GPL(__bdev_get_folio);


It's a bit silly to have a __-prefixed API without a version that
doesn't have the prefix, so I'd prefer to drop it.  Unless willy has
a good argument for keeping it the same as the filemap API.


Ok, I'll drop it if willy doesn't against this.

Thanks,
Kuai


.






Re: [PATCH RFC v2 for-6.8/block 11/18] erofs: use bdev api

2023-12-11 Thread Yu Kuai

Hi,

在 2023/12/12 14:35, Gao Xiang 写道:



On 2023/12/11 22:07, Yu Kuai wrote:

From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
  fs/erofs/data.c | 18 --
  fs/erofs/internal.h |  2 ++
  2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index c98aeda8abb2..8cf3618190ab 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -32,8 +32,7 @@ void erofs_put_metabuf(struct erofs_buf *buf)
  void *erofs_bread(struct erofs_buf *buf, erofs_blk_t blkaddr,
    enum erofs_kmap_type type)
  {
-    struct inode *inode = buf->inode;
-    erofs_off_t offset = (erofs_off_t)blkaddr << inode->i_blkbits;
+    erofs_off_t offset = (erofs_off_t)blkaddr << buf->blkszbits;

I'd suggest that use `buf->blkszbits` only for bdev_read_folio() since
erofs_init_metabuf() is not always called before erofs_bread() is used.

For example, buf->inode can be one of directory inodes other than
initialized by erofs_init_metabuf().


Thanks for the notice, and you're right, I'll update code in v3:

u8 blkszbits = buf->inode ? inode->i_blkbits : buf->blkszbits;
erofs_off_t offset = (erofs_off_t)blkaddr << blkszbits;

Kuai


Thanks,
Gao Xiang



  pgoff_t index = offset >> PAGE_SHIFT;
  struct page *page = buf->page;
  struct folio *folio;
@@ -43,7 +42,9 @@ void *erofs_bread(struct erofs_buf *buf, erofs_blk_t 
blkaddr,

  erofs_put_metabuf(buf);
  nofs_flag = memalloc_nofs_save();
-    folio = read_cache_folio(inode->i_mapping, index, NULL, NULL);
+    folio = buf->inode ?
+    read_mapping_folio(buf->inode->i_mapping, index, NULL) :
+    bdev_read_folio(buf->bdev, offset);
  memalloc_nofs_restore(nofs_flag);
  if (IS_ERR(folio))
  return folio;
@@ -67,10 +68,15 @@ void *erofs_bread(struct erofs_buf *buf, 
erofs_blk_t blkaddr,

  void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
  {
-    if (erofs_is_fscache_mode(sb))
+    if (erofs_is_fscache_mode(sb)) {
  buf->inode = EROFS_SB(sb)->s_fscache->inode;
-    else
-    buf->inode = sb->s_bdev->bd_inode;
+    buf->bdev = NULL;
+    buf->blkszbits = buf->inode->i_blkbits;
+    } else {
+    buf->inode = NULL;
+    buf->bdev = sb->s_bdev;
+    buf->blkszbits = EROFS_SB(sb)->blkszbits;
+    }
  }
  void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index b0409badb017..c9206351b485 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -224,8 +224,10 @@ enum erofs_kmap_type {
  struct erofs_buf {
  struct inode *inode;
+    struct block_device *bdev;
  struct page *page;
  void *base;
+    u8 blkszbits;
  enum erofs_kmap_type kmap_type;
  };
  #define __EROFS_BUF_INITIALIZER    ((struct erofs_buf){ .page = NULL })

.






Re: [PATCH RFC v2 for-6.8/block 15/18] buffer: add a new helper to read sb block

2023-12-11 Thread Yu Kuai

Hi,

在 2023/12/12 1:27, Jan Kara 写道:

On Mon 11-12-23 22:07:53, Yu Kuai wrote:

From: Yu Kuai 

Unlike __bread_gfp(), ext4 has special handing while reading sb block:

1) __GFP_NOFAIL is not set, and memory allocation can fail;
2) If buffer write failed before, set buffer uptodate and don't read
block from disk;
3) REQ_META is set for all IO, and REQ_PRIO is set for reading xattr;
4) If failed, return error ptr instead of NULL;

This patch add a new helper __bread_gfp2() that will match above 2 and 3(
1 will be used, and 4 will still be encapsulated by ext4), and prepare to
prevent calling mapping_gfp_constraint() directly on bd_inode->i_mapping
in ext4.

Signed-off-by: Yu Kuai 

...

+/*
+ * This works like __bread_gfp() except:
+ * 1) If buffer write failed before, set buffer uptodate and don't read
+ * block from disk;
+ * 2) Caller can pass in additional op_flags like REQ_META;
+ */
+struct buffer_head *
+__bread_gfp2(struct block_device *bdev, sector_t block, unsigned int size,
+blk_opf_t op_flags, gfp_t gfp)
+{
+   return bread_gfp(bdev, block, size, op_flags, gfp, true);
+}
+EXPORT_SYMBOL(__bread_gfp2);


__bread_gfp2() is not a great name, why not just using bread_gfp()
directly? I'm not a huge fan of boolean arguments but three different flags
arguments would be too much for my taste ;) so I guess I can live with
that.


I agree that __bread_gfp2 is not a greate name, if possible, I'll try to
figure out a better name for v3.

Thanks for reviewing this patchset!
Kuai


Honza






Re: [PATCH RFC v2 for-6.8/block 01/18] block: add some bdev apis

2023-12-11 Thread Yu Kuai

Hi,

在 2023/12/12 0:52, Jan Kara 写道:

On Mon 11-12-23 22:05:35, Yu Kuai wrote:

From: Yu Kuai 

Those apis will be used for other modules, so that bd_inode won't be
accessed directly from other modules.

Signed-off-by: Yu Kuai 


...


+void bdev_associated_mapping(struct block_device *bdev,
+struct address_space *mapping)
+{
+   mapping->host = bdev->bd_inode;
+}


Here I'm not sure - is the helper really a win? It seems a bit obscure to
me. This initialization of another mapping for a bdev looks really special.


Yes, I don't like this helper at all, but gfs2 is used this way, and I
need this helper to remove 'bd_inode' from block_devcie later. I'm not
familiar with gfs2 at all but perhaps it worth to dig deeper and figure
out a proper way for gfs2.

Thanks,
Kuai


Honza






[PATCH RFC v2 for-6.8/block 18/18] ext4: use bdev apis

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 fs/ext4/dir.c   | 6 ++
 fs/ext4/ext4_jbd2.c | 6 +++---
 fs/ext4/super.c | 3 +--
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 3985f8c33f95..64e35eb6a324 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -191,10 +191,8 @@ static int ext4_readdir(struct file *file, struct 
dir_context *ctx)
pgoff_t index = map.m_pblk >>
(PAGE_SHIFT - inode->i_blkbits);
if (!ra_has_index(>f_ra, index))
-   page_cache_sync_readahead(
-   sb->s_bdev->bd_inode->i_mapping,
-   >f_ra, file,
-   index, 1);
+   bdev_sync_readahead(sb->s_bdev, >f_ra,
+   file, index, 1);
file->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
bh = ext4_bread(NULL, inode, map.m_lblk, 0);
if (IS_ERR(bh)) {
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index d1a2e6624401..c1bf3a00fad9 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -206,7 +206,6 @@ static void ext4_journal_abort_handle(const char *caller, 
unsigned int line,
 
 static void ext4_check_bdev_write_error(struct super_block *sb)
 {
-   struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
struct ext4_sb_info *sbi = EXT4_SB(sb);
int err;
 
@@ -216,9 +215,10 @@ static void ext4_check_bdev_write_error(struct super_block 
*sb)
 * we could read old data from disk and write it out again, which
 * may lead to on-disk filesystem inconsistency.
 */
-   if (errseq_check(>wb_err, READ_ONCE(sbi->s_bdev_wb_err))) {
+   if (bdev_wb_err_check(sb->s_bdev, READ_ONCE(sbi->s_bdev_wb_err))) {
spin_lock(>s_bdev_wb_lock);
-   err = errseq_check_and_advance(>wb_err, 
>s_bdev_wb_err);
+   err = bdev_wb_err_check_and_advance(sb->s_bdev,
+   >s_bdev_wb_err);
spin_unlock(>s_bdev_wb_lock);
if (err)
ext4_error_err(sb, -err,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3b5e2b557488..96724cae622a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5544,8 +5544,7 @@ static int __ext4_fill_super(struct fs_context *fc, 
struct super_block *sb)
 * used to detect the metadata async write error.
 */
spin_lock_init(>s_bdev_wb_lock);
-   errseq_check_and_advance(>s_bdev->bd_inode->i_mapping->wb_err,
->s_bdev_wb_err);
+   bdev_wb_err_check_and_advance(sb->s_bdev, >s_bdev_wb_err);
EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
ext4_orphan_cleanup(sb, es);
EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
-- 
2.39.2




[PATCH RFC v2 for-6.8/block 17/18] ext4: remove block_device_ejected()

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

block_device_ejected() is added by commit bdfe0cbd746a ("Revert
"ext4: remove block_device_ejected"") in 2015. At that time 'bdi->wb'
is destroyed synchronized from del_gendisk(), hence if ext4 is still
mounted, and then mark_buffer_dirty() will reference destroyed 'wb'.
However, such problem doesn't exist anymore:

- commit d03f6cdc1fc4 ("block: Dynamically allocate and refcount
backing_dev_info") switch bdi to use refcounting;
- commit 13eec2363ef0 ("fs: Get proper reference for s_bdi"), will grab
additional reference of bdi while mounting, so that 'bdi->wb' will not
be destroyed until generic_shutdown_super().

Hence remove this dead function block_device_ejected().

Signed-off-by: Yu Kuai 
---
 fs/ext4/super.c | 18 --
 1 file changed, 18 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ae41204f52d4..3b5e2b557488 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -467,22 +467,6 @@ static void ext4_maybe_update_superblock(struct 
super_block *sb)
schedule_work(_SB(sb)->s_sb_upd_work);
 }
 
-/*
- * The del_gendisk() function uninitializes the disk-specific data
- * structures, including the bdi structure, without telling anyone
- * else.  Once this happens, any attempt to call mark_buffer_dirty()
- * (for example, by ext4_commit_super), will cause a kernel OOPS.
- * This is a kludge to prevent these oops until we can put in a proper
- * hook in del_gendisk() to inform the VFS and file system layers.
- */
-static int block_device_ejected(struct super_block *sb)
-{
-   struct inode *bd_inode = sb->s_bdev->bd_inode;
-   struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
-
-   return bdi->dev == NULL;
-}
-
 static void ext4_journal_commit_callback(journal_t *journal, transaction_t 
*txn)
 {
struct super_block  *sb = journal->j_private;
@@ -6162,8 +6146,6 @@ static int ext4_commit_super(struct super_block *sb)
 
if (!sbh)
return -EINVAL;
-   if (block_device_ejected(sb))
-   return -ENODEV;
 
ext4_update_super(sb);
 
-- 
2.39.2




[PATCH RFC v2 for-6.8/block 16/18] ext4: use new helper to read sb block

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

Remove __ext4_sb_bread_gfp() and ext4_buffer_uptodate() that is defined
by ext4, and convert to use common helper __bread_gfp2() and
buffer_uptodate_or_error().

Signed-off-by: Yu Kuai 
---
 fs/ext4/ext4.h| 13 -
 fs/ext4/inode.c   |  8 
 fs/ext4/super.c   | 45 ++---
 fs/ext4/symlink.c |  2 +-
 4 files changed, 15 insertions(+), 53 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a5d784872303..8377f6c5264f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3824,19 +3824,6 @@ extern const struct iomap_ops ext4_iomap_ops;
 extern const struct iomap_ops ext4_iomap_overwrite_ops;
 extern const struct iomap_ops ext4_iomap_report_ops;
 
-static inline int ext4_buffer_uptodate(struct buffer_head *bh)
-{
-   /*
-* If the buffer has the write error flag, we have failed
-* to write out data in the block.  In this  case, we don't
-* have to read the block because we may read the old data
-* successfully.
-*/
-   if (buffer_write_io_error(bh))
-   set_buffer_uptodate(bh);
-   return buffer_uptodate(bh);
-}
-
 #endif /* __KERNEL__ */
 
 #define EFSBADCRC  EBADMSG /* Bad CRC detected */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 61277f7f8722..efb0af6f02f7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -887,7 +887,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct 
inode *inode,
bh = ext4_getblk(handle, inode, block, map_flags);
if (IS_ERR(bh))
return bh;
-   if (!bh || ext4_buffer_uptodate(bh))
+   if (!bh || buffer_uptodate_or_error(bh))
return bh;
 
ret = ext4_read_bh_lock(bh, REQ_META | REQ_PRIO, true);
@@ -915,7 +915,7 @@ int ext4_bread_batch(struct inode *inode, ext4_lblk_t 
block, int bh_count,
 
for (i = 0; i < bh_count; i++)
/* Note that NULL bhs[i] is valid because of holes. */
-   if (bhs[i] && !ext4_buffer_uptodate(bhs[i]))
+   if (bhs[i] && !buffer_uptodate_or_error(bhs[i]))
ext4_read_bh_lock(bhs[i], REQ_META | REQ_PRIO, false);
 
if (!wait)
@@ -4392,11 +4392,11 @@ static int __ext4_get_inode_loc(struct super_block *sb, 
unsigned long ino,
bh = sb_getblk(sb, block);
if (unlikely(!bh))
return -ENOMEM;
-   if (ext4_buffer_uptodate(bh))
+   if (buffer_uptodate_or_error(bh))
goto has_buffer;
 
lock_buffer(bh);
-   if (ext4_buffer_uptodate(bh)) {
+   if (buffer_uptodate_or_error(bh)) {
/* Someone brought it uptodate while we waited */
unlock_buffer(bh);
goto has_buffer;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c5fcf377ab1f..ae41204f52d4 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -180,7 +180,7 @@ void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t 
op_flags,
 {
BUG_ON(!buffer_locked(bh));
 
-   if (ext4_buffer_uptodate(bh)) {
+   if (buffer_uptodate_or_error(bh)) {
unlock_buffer(bh);
return;
}
@@ -191,7 +191,7 @@ int ext4_read_bh(struct buffer_head *bh, blk_opf_t 
op_flags, bh_end_io_t *end_io
 {
BUG_ON(!buffer_locked(bh));
 
-   if (ext4_buffer_uptodate(bh)) {
+   if (buffer_uptodate_or_error(bh)) {
unlock_buffer(bh);
return 0;
}
@@ -214,49 +214,24 @@ int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t 
op_flags, bool wait)
return ext4_read_bh(bh, op_flags, NULL);
 }
 
-/*
- * This works like __bread_gfp() except it uses ERR_PTR for error
- * returns.  Currently with sb_bread it's impossible to distinguish
- * between ENOMEM and EIO situations (since both result in a NULL
- * return.
- */
-static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
-  sector_t block,
-  blk_opf_t op_flags, gfp_t gfp)
-{
-   struct buffer_head *bh;
-   int ret;
-
-   bh = sb_getblk_gfp(sb, block, gfp);
-   if (bh == NULL)
-   return ERR_PTR(-ENOMEM);
-   if (ext4_buffer_uptodate(bh))
-   return bh;
-
-   ret = ext4_read_bh_lock(bh, REQ_META | op_flags, true);
-   if (ret) {
-   put_bh(bh);
-   return ERR_PTR(ret);
-   }
-   return bh;
-}
-
 struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
   blk_opf_t op_flags)
 {
-   gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_inode->i_mapping,
-   ~__GFP_FS) | __GFP_MOVABLE;
+   struct buffer_head *bh = __bread_gfp2(sb->s_bdev, block,
+ sb->s_blocksize,
+  

[PATCH RFC v2 for-6.8/block 15/18] buffer: add a new helper to read sb block

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

Unlike __bread_gfp(), ext4 has special handing while reading sb block:

1) __GFP_NOFAIL is not set, and memory allocation can fail;
2) If buffer write failed before, set buffer uptodate and don't read
   block from disk;
3) REQ_META is set for all IO, and REQ_PRIO is set for reading xattr;
4) If failed, return error ptr instead of NULL;

This patch add a new helper __bread_gfp2() that will match above 2 and 3(
1 will be used, and 4 will still be encapsulated by ext4), and prepare to
prevent calling mapping_gfp_constraint() directly on bd_inode->i_mapping
in ext4.

Signed-off-by: Yu Kuai 
---
 fs/buffer.c | 68 ++---
 include/linux/buffer_head.h | 18 +-
 2 files changed, 65 insertions(+), 21 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 967f34b70aa8..188bd36c9fea 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1255,16 +1255,19 @@ void __bforget(struct buffer_head *bh)
 }
 EXPORT_SYMBOL(__bforget);
 
-static struct buffer_head *__bread_slow(struct buffer_head *bh)
+static struct buffer_head *__bread_slow(struct buffer_head *bh,
+   blk_opf_t op_flags,
+   bool check_write_error)
 {
lock_buffer(bh);
-   if (buffer_uptodate(bh)) {
+   if (buffer_uptodate(bh) ||
+   (check_write_error && buffer_uptodate_or_error(bh))) {
unlock_buffer(bh);
return bh;
} else {
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
-   submit_bh(REQ_OP_READ, bh);
+   submit_bh(REQ_OP_READ | op_flags, bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return bh;
@@ -1445,6 +1448,31 @@ void __breadahead(struct block_device *bdev, sector_t 
block, unsigned size)
 }
 EXPORT_SYMBOL(__breadahead);
 
+static struct buffer_head *
+bread_gfp(struct block_device *bdev, sector_t block, unsigned int size,
+ blk_opf_t op_flags, gfp_t gfp, bool check_write_error)
+{
+   struct buffer_head *bh;
+
+   gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
+
+   /*
+* Prefer looping in the allocator rather than here, at least that
+* code knows what it's doing.
+*/
+   gfp |= __GFP_NOFAIL;
+
+   bh = bdev_getblk(bdev, block, size, gfp);
+   if (unlikely(!bh))
+   return NULL;
+
+   if (buffer_uptodate(bh) ||
+   (check_write_error && buffer_uptodate_or_error(bh)))
+   return bh;
+
+   return __bread_slow(bh, op_flags, check_write_error);
+}
+
 /**
  *  __bread_gfp() - reads a specified block and returns the bh
  *  @bdev: the block_device to read from
@@ -1458,27 +1486,27 @@ EXPORT_SYMBOL(__breadahead);
  *  It returns NULL if the block was unreadable.
  */
 struct buffer_head *
-__bread_gfp(struct block_device *bdev, sector_t block,
-  unsigned size, gfp_t gfp)
+__bread_gfp(struct block_device *bdev, sector_t block, unsigned int size,
+   gfp_t gfp)
 {
-   struct buffer_head *bh;
-
-   gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
-
-   /*
-* Prefer looping in the allocator rather than here, at least that
-* code knows what it's doing.
-*/
-   gfp |= __GFP_NOFAIL;
-
-   bh = bdev_getblk(bdev, block, size, gfp);
-
-   if (likely(bh) && !buffer_uptodate(bh))
-   bh = __bread_slow(bh);
-   return bh;
+   return bread_gfp(bdev, block, size, 0, gfp, false);
 }
 EXPORT_SYMBOL(__bread_gfp);
 
+/*
+ * This works like __bread_gfp() except:
+ * 1) If buffer write failed before, set buffer uptodate and don't read
+ * block from disk;
+ * 2) Caller can pass in additional op_flags like REQ_META;
+ */
+struct buffer_head *
+__bread_gfp2(struct block_device *bdev, sector_t block, unsigned int size,
+blk_opf_t op_flags, gfp_t gfp)
+{
+   return bread_gfp(bdev, block, size, op_flags, gfp, true);
+}
+EXPORT_SYMBOL(__bread_gfp2);
+
 static void __invalidate_bh_lrus(struct bh_lru *b)
 {
int i;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 5f23ee599889..751b2744b4ae 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -171,6 +171,18 @@ static __always_inline int buffer_uptodate(const struct 
buffer_head *bh)
return test_bit_acquire(BH_Uptodate, >b_state);
 }
 
+static __always_inline int buffer_uptodate_or_error(struct buffer_head *bh)
+{
+   /*
+* If the buffer has the write error flag, data was failed to write
+* out in the block. In this case, set buffer uptodate to prevent
+* reading old data.
+*/
+   if (buffer_write_io_error(bh))
+   set_buffer_uptodate(bh);
+   return buffer_uptodate(bh);
+}
+
 static inline unsigned long bh_offset(co

[PATCH RFC v2 for-6.8/block 14/18] jbd2: use bdev apis

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 fs/jbd2/journal.c  | 3 +--
 fs/jbd2/recovery.c | 6 ++
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ed53188472f9..f1b5ffeaf02a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2003,8 +2003,7 @@ static int __jbd2_journal_erase(journal_t *journal, 
unsigned int flags)
byte_count = (block_stop - block_start + 1) *
journal->j_blocksize;
 
-   truncate_inode_pages_range(journal->j_dev->bd_inode->i_mapping,
-   byte_start, byte_stop);
+   truncate_bdev_range(journal->j_dev, 0, byte_start, byte_stop);
 
if (flags & JBD2_JOURNAL_FLUSH_DISCARD) {
err = blkdev_issue_discard(journal->j_dev,
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 01f744cb97a4..6b6a2c4585fa 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -290,7 +290,6 @@ int jbd2_journal_recover(journal_t *journal)
 
struct recovery_infoinfo;
errseq_twb_err;
-   struct address_space*mapping;
 
memset(, 0, sizeof(info));
sb = journal->j_superblock;
@@ -309,8 +308,7 @@ int jbd2_journal_recover(journal_t *journal)
}
 
wb_err = 0;
-   mapping = journal->j_fs_dev->bd_inode->i_mapping;
-   errseq_check_and_advance(>wb_err, _err);
+   bdev_wb_err_check_and_advance(journal->j_fs_dev, _err);
err = do_one_pass(journal, , PASS_SCAN);
if (!err)
err = do_one_pass(journal, , PASS_REVOKE);
@@ -334,7 +332,7 @@ int jbd2_journal_recover(journal_t *journal)
err2 = sync_blockdev(journal->j_fs_dev);
if (!err)
err = err2;
-   err2 = errseq_check_and_advance(>wb_err, _err);
+   err2 = bdev_wb_err_check_and_advance(journal->j_fs_dev, _err);
if (!err)
err = err2;
/* Make sure all replayed data is on permanent storage */
-- 
2.39.2




[PATCH RFC v2 for-6.8/block 12/18] gfs2: use bdev api

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 fs/gfs2/glock.c  | 2 +-
 fs/gfs2/ops_fstype.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index d6bf1f8c25dc..4128a5bc4bb6 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1210,7 +1210,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
mapping = gfs2_glock2aspace(gl);
if (mapping) {
 mapping->a_ops = _meta_aops;
-   mapping->host = s->s_bdev->bd_inode;
+   bdev_associated_mapping(s->s_bdev, mapping);
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_NOFS);
mapping->private_data = NULL;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b108c5d26839..56ae8959b55a 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -114,7 +114,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 
address_space_init_once(mapping);
mapping->a_ops = _rgrp_aops;
-   mapping->host = sb->s_bdev->bd_inode;
+   bdev_associated_mapping(sb->s_bdev, mapping);
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_NOFS);
mapping->private_data = NULL;
-- 
2.39.2




[PATCH RFC v2 for-6.8/block 13/18] nilfs2: use bdev api in nilfs_attach_log_writer()

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 fs/nilfs2/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 55e31cc903d1..a1130e384937 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2823,7 +2823,7 @@ int nilfs_attach_log_writer(struct super_block *sb, 
struct nilfs_root *root)
if (!nilfs->ns_writer)
return -ENOMEM;
 
-   inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL);
+   bdev_attach_wb(nilfs->ns_bdev);
 
err = nilfs_segctor_start_thread(nilfs->ns_writer);
if (unlikely(err))
-- 
2.39.2




[PATCH RFC v2 for-6.8/block 11/18] erofs: use bdev api

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 fs/erofs/data.c | 18 --
 fs/erofs/internal.h |  2 ++
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index c98aeda8abb2..8cf3618190ab 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -32,8 +32,7 @@ void erofs_put_metabuf(struct erofs_buf *buf)
 void *erofs_bread(struct erofs_buf *buf, erofs_blk_t blkaddr,
  enum erofs_kmap_type type)
 {
-   struct inode *inode = buf->inode;
-   erofs_off_t offset = (erofs_off_t)blkaddr << inode->i_blkbits;
+   erofs_off_t offset = (erofs_off_t)blkaddr << buf->blkszbits;
pgoff_t index = offset >> PAGE_SHIFT;
struct page *page = buf->page;
struct folio *folio;
@@ -43,7 +42,9 @@ void *erofs_bread(struct erofs_buf *buf, erofs_blk_t blkaddr,
erofs_put_metabuf(buf);
 
nofs_flag = memalloc_nofs_save();
-   folio = read_cache_folio(inode->i_mapping, index, NULL, NULL);
+   folio = buf->inode ?
+   read_mapping_folio(buf->inode->i_mapping, index, NULL) :
+   bdev_read_folio(buf->bdev, offset);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(folio))
return folio;
@@ -67,10 +68,15 @@ void *erofs_bread(struct erofs_buf *buf, erofs_blk_t 
blkaddr,
 
 void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
 {
-   if (erofs_is_fscache_mode(sb))
+   if (erofs_is_fscache_mode(sb)) {
buf->inode = EROFS_SB(sb)->s_fscache->inode;
-   else
-   buf->inode = sb->s_bdev->bd_inode;
+   buf->bdev = NULL;
+   buf->blkszbits = buf->inode->i_blkbits;
+   } else {
+   buf->inode = NULL;
+   buf->bdev = sb->s_bdev;
+   buf->blkszbits = EROFS_SB(sb)->blkszbits;
+   }
 }
 
 void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index b0409badb017..c9206351b485 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -224,8 +224,10 @@ enum erofs_kmap_type {
 
 struct erofs_buf {
struct inode *inode;
+   struct block_device *bdev;
struct page *page;
void *base;
+   u8 blkszbits;
enum erofs_kmap_type kmap_type;
 };
 #define __EROFS_BUF_INITIALIZER((struct erofs_buf){ .page = NULL })
-- 
2.39.2




[PATCH RFC v2 for-6.8/block 09/18] btrfs: use bdev apis

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

On the one hand covert to use folio while reading bdev inode, on the
other hand prevent to access bd_inode directly.

Signed-off-by: Yu Kuai 
---
 fs/btrfs/disk-io.c | 71 +-
 fs/btrfs/volumes.c | 17 ++-
 fs/btrfs/zoned.c   | 15 +-
 3 files changed, 48 insertions(+), 55 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 401ea09ae4b8..c373806dc793 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3620,28 +3620,24 @@ ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
 static void btrfs_end_super_write(struct bio *bio)
 {
struct btrfs_device *device = bio->bi_private;
-   struct bio_vec *bvec;
-   struct bvec_iter_all iter_all;
-   struct page *page;
-
-   bio_for_each_segment_all(bvec, bio, iter_all) {
-   page = bvec->bv_page;
+   struct folio_iter fi;
 
+   bio_for_each_folio_all(fi, bio) {
if (bio->bi_status) {
btrfs_warn_rl_in_rcu(device->fs_info,
"lost page write due to IO error on %s (%d)",
btrfs_dev_name(device),
blk_status_to_errno(bio->bi_status));
-   ClearPageUptodate(page);
-   SetPageError(page);
+   folio_clear_uptodate(fi.folio);
+   folio_set_error(fi.folio);
btrfs_dev_stat_inc_and_print(device,
 BTRFS_DEV_STAT_WRITE_ERRS);
} else {
-   SetPageUptodate(page);
+   folio_mark_uptodate(fi.folio);
}
 
-   put_page(page);
-   unlock_page(page);
+   folio_put(fi.folio);
+   folio_unlock(fi.folio);
}
 
bio_put(bio);
@@ -3651,9 +3647,9 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct 
block_device *bdev,
   int copy_num, bool 
drop_cache)
 {
struct btrfs_super_block *super;
-   struct page *page;
+   struct folio *folio;
u64 bytenr, bytenr_orig;
-   struct address_space *mapping = bdev->bd_inode->i_mapping;
+   unsigned int nofs_flag;
int ret;
 
bytenr_orig = btrfs_sb_offset(copy_num);
@@ -3674,16 +3670,17 @@ struct btrfs_super_block 
*btrfs_read_dev_one_super(struct block_device *bdev,
 * Drop the page of the primary superblock, so later read will
 * always read from the device.
 */
-   invalidate_inode_pages2_range(mapping,
-   bytenr >> PAGE_SHIFT,
+   invalidate_bdev_range(bdev, bytenr >> PAGE_SHIFT,
(bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT);
}
 
-   page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS);
-   if (IS_ERR(page))
-   return ERR_CAST(page);
+   nofs_flag = memalloc_nofs_save();
+   folio = bdev_read_folio(bdev, bytenr);
+   memalloc_nofs_restore(nofs_flag);
+   if (IS_ERR(folio))
+   return ERR_CAST(folio);
 
-   super = page_address(page);
+   super = folio_address(folio);
if (btrfs_super_magic(super) != BTRFS_MAGIC) {
btrfs_release_disk_super(super);
return ERR_PTR(-ENODATA);
@@ -3740,7 +3737,6 @@ static int write_dev_supers(struct btrfs_device *device,
struct btrfs_super_block *sb, int max_mirrors)
 {
struct btrfs_fs_info *fs_info = device->fs_info;
-   struct address_space *mapping = device->bdev->bd_inode->i_mapping;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
int i;
int errors = 0;
@@ -3753,7 +3749,7 @@ static int write_dev_supers(struct btrfs_device *device,
shash->tfm = fs_info->csum_shash;
 
for (i = 0; i < max_mirrors; i++) {
-   struct page *page;
+   struct folio *folio;
struct bio *bio;
struct btrfs_super_block *disk_super;
 
@@ -3778,9 +3774,10 @@ static int write_dev_supers(struct btrfs_device *device,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
sb->csum);
 
-   page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
-  GFP_NOFS);
-   if (!page) {
+   folio = __bdev_get_folio(device->bdev, bytenr,
+FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
+GFP_NOFS);
+   if (IS_ERR(folio)) {
btrfs_err(device->fs_info,
"couldn't get super block page for b

[PATCH RFC v2 for-6.8/block 03/18] bcache: use bdev api in read_super()

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

On the one hand covert to use folio while reading bdev inode, on the
other hand prevent to access bd_inode directly.

Signed-off-by: Yu Kuai 
---
 drivers/md/bcache/super.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index bfe1685dbae5..23892b32c582 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -168,14 +168,13 @@ static const char *read_super(struct cache_sb *sb, struct 
block_device *bdev,
 {
const char *err;
struct cache_sb_disk *s;
-   struct page *page;
+   struct folio *folio;
unsigned int i;
 
-   page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
-  SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
-   if (IS_ERR(page))
+   folio = bdev_read_folio(bdev, SB_OFFSET);
+   if (IS_ERR(folio))
return "IO error";
-   s = page_address(page) + offset_in_page(SB_OFFSET);
+   s = folio_address(folio) + offset_in_folio(folio, SB_OFFSET);
 
sb->offset  = le64_to_cpu(s->offset);
sb->version = le64_to_cpu(s->version);
@@ -272,7 +271,7 @@ static const char *read_super(struct cache_sb *sb, struct 
block_device *bdev,
*res = s;
return NULL;
 err:
-   put_page(page);
+   folio_put(folio);
return err;
 }
 
-- 
2.39.2




[PATCH RFC v2 for-6.8/block 05/18] s390/dasd: use bdev api in dasd_format()

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 drivers/s390/block/dasd_ioctl.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 61b9675e2a67..bbfb958237e6 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -221,8 +221,9 @@ dasd_format(struct dasd_block *block, struct format_data_t 
*fdata)
 * enabling the device later.
 */
if (fdata->start_unit == 0) {
-   block->gdp->part0->bd_inode->i_blkbits =
-   blksize_bits(fdata->blksize);
+   rc = set_blocksize(block->gdp->part0, fdata->blksize);
+   if (rc)
+   return rc;
}
 
rc = base->discipline->format_device(base, fdata, 1);
-- 
2.39.2




[PATCH RFC v2 for-6.8/block 10/18] cramfs: use bdev apis in cramfs_blkdev_read()

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

On the one hand covert to use folio while reading bdev inode, on the
other hand prevent to access bd_inode directly.

Also do some cleanup that there is no need for two for loop, and remove
local array pages.

Signed-off-by: Yu Kuai 
---
 fs/cramfs/inode.c | 36 +---
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 60dbfa0f8805..fad95d683d97 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -183,9 +183,6 @@ static int next_buffer;
 static void *cramfs_blkdev_read(struct super_block *sb, unsigned int offset,
unsigned int len)
 {
-   struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
-   struct file_ra_state ra = {};
-   struct page *pages[BLKS_PER_BUF];
unsigned i, blocknr, buffer;
unsigned long devsize;
char *data;
@@ -214,37 +211,30 @@ static void *cramfs_blkdev_read(struct super_block *sb, 
unsigned int offset,
devsize = bdev_nr_bytes(sb->s_bdev) >> PAGE_SHIFT;
 
/* Ok, read in BLKS_PER_BUF pages completely first. */
-   file_ra_state_init(, mapping);
-   page_cache_sync_readahead(mapping, , NULL, blocknr, BLKS_PER_BUF);
-
-   for (i = 0; i < BLKS_PER_BUF; i++) {
-   struct page *page = NULL;
-
-   if (blocknr + i < devsize) {
-   page = read_mapping_page(mapping, blocknr + i, NULL);
-   /* synchronous error? */
-   if (IS_ERR(page))
-   page = NULL;
-   }
-   pages[i] = page;
-   }
+   bdev_sync_readahead(sb->s_bdev, NULL, NULL, blocknr, BLKS_PER_BUF);
 
buffer = next_buffer;
next_buffer = NEXT_BUFFER(buffer);
buffer_blocknr[buffer] = blocknr;
buffer_dev[buffer] = sb;
-
data = read_buffers[buffer];
+
for (i = 0; i < BLKS_PER_BUF; i++) {
-   struct page *page = pages[i];
+   struct folio *folio = NULL;
+
+   if (blocknr + i < devsize)
+   folio = bdev_read_folio(sb->s_bdev,
+   (blocknr + i) << PAGE_SHIFT);
 
-   if (page) {
-   memcpy_from_page(data, page, 0, PAGE_SIZE);
-   put_page(page);
-   } else
+   if (IS_ERR_OR_NULL(folio)) {
memset(data, 0, PAGE_SIZE);
+   } else {
+   memcpy_from_folio(data, folio, 0, PAGE_SIZE);
+   folio_put(folio);
+   }
data += PAGE_SIZE;
}
+
return read_buffers[buffer] + offset;
 }
 
-- 
2.39.2




[PATCH RFC v2 for-6.8/block 02/18] xen/blkback: use bdev api in xen_update_blkif_status()

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 drivers/block/xen-blkback/xenbus.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/block/xen-blkback/xenbus.c 
b/drivers/block/xen-blkback/xenbus.c
index e34219ea2b05..e645afa4af57 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -104,8 +104,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
xenbus_dev_error(blkif->be->dev, err, "block flush");
return;
}
-   invalidate_inode_pages2(
-   blkif->vbd.bdev_handle->bdev->bd_inode->i_mapping);
+   invalidate_bdev(blkif->vbd.bdev_handle->bdev);
 
for (i = 0; i < blkif->nr_rings; i++) {
ring = >rings[i];
-- 
2.39.2




[PATCH RFC v2 for-6.8/block 06/18] scsicam: use bdev api in scsi_bios_ptable()

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 drivers/scsi/scsicam.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c
index e2c7d8ef205f..9617d70c0ed1 100644
--- a/drivers/scsi/scsicam.c
+++ b/drivers/scsi/scsicam.c
@@ -32,11 +32,9 @@
  */
 unsigned char *scsi_bios_ptable(struct block_device *dev)
 {
-   struct address_space *mapping = bdev_whole(dev)->bd_inode->i_mapping;
unsigned char *res = NULL;
-   struct folio *folio;
+   struct folio *folio = bdev_read_folio(bdev_whole(dev), 0);
 
-   folio = read_mapping_folio(mapping, 0, NULL);
if (IS_ERR(folio))
return NULL;
 
-- 
2.39.2




[PATCH RFC v2 for-6.8/block 08/18] bio: export bio_add_folio_nofail()

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

Currently btrfs is using __bio_add_page() in write_dev_supers(). In order
to convert to use folio for bdev in btrfs, export bio_add_folio_nofail()
so that it can replace __bio_add_page().

Signed-off-by: Yu Kuai 
---
 block/bio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/bio.c b/block/bio.c
index 5eba53ca953b..a7b2bbb210ee 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1119,6 +1119,7 @@ void bio_add_folio_nofail(struct bio *bio, struct folio 
*folio, size_t len,
WARN_ON_ONCE(off > UINT_MAX);
__bio_add_page(bio, >page, len, off);
 }
+EXPORT_SYMBOL_GPL(bio_add_folio_nofail);
 
 /**
  * bio_add_folio - Attempt to add part of a folio to a bio.
-- 
2.39.2




[PATCH RFC v2 for-6.8/block 01/18] block: add some bdev apis

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

Those apis will be used for other modules, so that bd_inode won't be
accessed directly from other modules.

Signed-off-by: Yu Kuai 
---
 block/bdev.c   | 70 ++
 block/blk.h|  2 --
 include/linux/blkdev.h | 17 ++
 3 files changed, 87 insertions(+), 2 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index 750aec178b6a..9a469753eb4b 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -89,6 +89,13 @@ void invalidate_bdev(struct block_device *bdev)
 }
 EXPORT_SYMBOL(invalidate_bdev);
 
+void invalidate_bdev_range(struct block_device *bdev, pgoff_t start,
+  pgoff_t end)
+{
+   invalidate_mapping_pages(bdev->bd_inode->i_mapping, start, end);
+}
+EXPORT_SYMBOL_GPL(invalidate_bdev_range);
+
 /*
  * Drop all buffers & page cache for given bdev range. This function bails
  * with error if bdev has other exclusive owner (such as filesystem).
@@ -121,6 +128,7 @@ int truncate_bdev_range(struct block_device *bdev, 
blk_mode_t mode,
 lstart >> PAGE_SHIFT,
 lend >> PAGE_SHIFT);
 }
+EXPORT_SYMBOL_GPL(truncate_bdev_range);
 
 static void set_init_blocksize(struct block_device *bdev)
 {
@@ -1102,3 +1110,65 @@ void bdev_statx_dioalign(struct inode *inode, struct 
kstat *stat)
 
blkdev_put_no_open(bdev);
 }
+
+struct folio *bdev_read_folio(struct block_device *bdev, loff_t pos)
+{
+   return mapping_read_folio_gfp(bdev->bd_inode->i_mapping,
+ pos >> PAGE_SHIFT, GFP_KERNEL);
+}
+EXPORT_SYMBOL_GPL(bdev_read_folio);
+
+struct folio *__bdev_get_folio(struct block_device *bdev, loff_t pos,
+  fgf_t fgp_flags, gfp_t gfp)
+{
+   return __filemap_get_folio(bdev->bd_inode->i_mapping, pos >> PAGE_SHIFT,
+  fgp_flags, gfp);
+}
+EXPORT_SYMBOL_GPL(__bdev_get_folio);
+
+int bdev_wb_err_check(struct block_device *bdev, errseq_t since)
+{
+   return errseq_check(>bd_inode->i_mapping->wb_err, since);
+}
+EXPORT_SYMBOL_GPL(bdev_wb_err_check);
+
+int bdev_wb_err_check_and_advance(struct block_device *bdev, errseq_t *since)
+{
+   return errseq_check_and_advance(>bd_inode->i_mapping->wb_err,
+   since);
+}
+EXPORT_SYMBOL_GPL(bdev_wb_err_check_and_advance);
+
+void bdev_balance_dirty_pages_ratelimited(struct block_device *bdev)
+{
+   return balance_dirty_pages_ratelimited(bdev->bd_inode->i_mapping);
+}
+EXPORT_SYMBOL_GPL(bdev_balance_dirty_pages_ratelimited);
+
+void bdev_sync_readahead(struct block_device *bdev, struct file_ra_state *ra,
+struct file *file, pgoff_t index,
+unsigned long req_count)
+{
+   struct file_ra_state tmp_ra = {};
+
+   if (!ra) {
+   ra = _ra;
+   file_ra_state_init(ra, bdev->bd_inode->i_mapping);
+   }
+   page_cache_sync_readahead(bdev->bd_inode->i_mapping, ra, file, index,
+ req_count);
+}
+EXPORT_SYMBOL_GPL(bdev_sync_readahead);
+
+void bdev_attach_wb(struct block_device *bdev)
+{
+   inode_attach_wb(bdev->bd_inode, NULL);
+}
+EXPORT_SYMBOL_GPL(bdev_attach_wb);
+
+void bdev_associated_mapping(struct block_device *bdev,
+struct address_space *mapping)
+{
+   mapping->host = bdev->bd_inode;
+}
+EXPORT_SYMBOL_GPL(bdev_associated_mapping);
diff --git a/block/blk.h b/block/blk.h
index 08a358bc0919..da4becd4f7e9 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -467,8 +467,6 @@ extern struct device_attribute dev_attr_events_poll_msecs;
 extern struct attribute_group blk_trace_attr_group;
 
 blk_mode_t file_to_blk_mode(struct file *file);
-int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode,
-   loff_t lstart, loff_t lend);
 long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 17c0a7d0d319..d2453424a9eb 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct module;
 struct request_queue;
@@ -1502,6 +1503,22 @@ struct block_device *blkdev_get_no_open(dev_t dev);
 void blkdev_put_no_open(struct block_device *bdev);
 
 struct block_device *I_BDEV(struct inode *inode);
+void invalidate_bdev_range(struct block_device *bdev, pgoff_t start,
+  pgoff_t end);
+int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode,
+   loff_t lstart, loff_t lend);
+struct folio *bdev_read_folio(struct block_device *bdev, loff_t pos);
+struct folio *__bdev_get_folio(struct block_device *bdev, loff_t pos,
+  fg

[PATCH RFC v2 for-6.8/block 07/18] bcachefs: remove dead function bdev_sectors()

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

bdev_sectors() is not used hence remove it.

Signed-off-by: Yu Kuai 
---
 fs/bcachefs/util.h | 5 -
 1 file changed, 5 deletions(-)

diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 2984b57b2958..22a0acc1704f 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -516,11 +516,6 @@ static inline unsigned fract_exp_two(unsigned x, unsigned 
fract_bits)
 void bch2_bio_map(struct bio *bio, void *base, size_t);
 int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
 
-static inline sector_t bdev_sectors(struct block_device *bdev)
-{
-   return bdev->bd_inode->i_size >> 9;
-}
-
 #define closure_bio_submit(bio, cl)\
 do {   \
closure_get(cl);\
-- 
2.39.2




[PATCH RFC v2 for-6.8/block 00/18] block: don't access bd_inode directly from other modules

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

Changes in v2:
 - remove some bdev apis that is not necessary;
 - pass in offset for bdev_read_folio() and __bdev_get_folio();
 - remove bdev_gfp_constraint() and add a new helper in fs/buffer.c to
 prevent access bd_indoe() directly from mapping_gfp_constraint() in
 ext4.(patch 15, 16);
 - remove block_device_ejected() from ext4.

Noted that following is not changed yet since v1:
- Chirstoph suggested to remove invalidate_inode_pages2() from
xen_update_blkif_status(), however, this sync_bdev() + invalidate_bdev()
is used from many modules, and I'll leave this for later if we want to
kill all of them.
- Matthew suggested that pass in valid file_ra_state for cramfs,
however, I don't see an easy way to do this for cramfs_lookup() and
cramfs_read_super().

Patch 1 add some bdev apis, then follow up patches will use these apis
to avoid access bd_inode directly, and hopefully the field bd_inode can
be removed eventually(after figure out a way for fs/buffer.c).

Yu Kuai (18):
  block: add some bdev apis
  xen/blkback: use bdev api in xen_update_blkif_status()
  bcache: use bdev api in read_super()
  mtd: block2mtd: use bdev apis
  s390/dasd: use bdev api in dasd_format()
  scsicam: use bdev api in scsi_bios_ptable()
  bcachefs: remove dead function bdev_sectors()
  bio: export bio_add_folio_nofail()
  btrfs: use bdev apis
  cramfs: use bdev apis in cramfs_blkdev_read()
  erofs: use bdev api
  gfs2: use bdev api
  nilfs2: use bdev api in nilfs_attach_log_writer()
  jbd2: use bdev apis
  buffer: add a new helper to read sb block
  ext4: use new helper to read sb block
  ext4: remove block_device_ejected()
  ext4: use bdev apis

 block/bdev.c   | 70 ++
 block/bio.c|  1 +
 block/blk.h|  2 -
 drivers/block/xen-blkback/xenbus.c |  3 +-
 drivers/md/bcache/super.c  | 11 ++--
 drivers/mtd/devices/block2mtd.c| 81 +-
 drivers/s390/block/dasd_ioctl.c|  5 +-
 drivers/scsi/scsicam.c |  4 +-
 fs/bcachefs/util.h |  5 --
 fs/btrfs/disk-io.c | 71 --
 fs/btrfs/volumes.c | 17 +++
 fs/btrfs/zoned.c   | 15 +++---
 fs/buffer.c| 68 +
 fs/cramfs/inode.c  | 36 +
 fs/erofs/data.c| 18 ---
 fs/erofs/internal.h|  2 +
 fs/ext4/dir.c  |  6 +--
 fs/ext4/ext4.h | 13 -
 fs/ext4/ext4_jbd2.c|  6 +--
 fs/ext4/inode.c|  8 +--
 fs/ext4/super.c| 66 
 fs/ext4/symlink.c  |  2 +-
 fs/gfs2/glock.c|  2 +-
 fs/gfs2/ops_fstype.c   |  2 +-
 fs/jbd2/journal.c  |  3 +-
 fs/jbd2/recovery.c |  6 +--
 fs/nilfs2/segment.c|  2 +-
 include/linux/blkdev.h | 17 +++
 include/linux/buffer_head.h| 18 ++-
 29 files changed, 301 insertions(+), 259 deletions(-)

-- 
2.39.2




[PATCH RFC v2 for-6.8/block 04/18] mtd: block2mtd: use bdev apis

2023-12-11 Thread Yu Kuai
From: Yu Kuai 

On the one hand covert to use folio while reading bdev inode, on the
other hand prevent to access bd_inode directly.

Signed-off-by: Yu Kuai 
---
 drivers/mtd/devices/block2mtd.c | 81 +++--
 1 file changed, 36 insertions(+), 45 deletions(-)

diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index aa44a23ec045..cf201bf73184 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -46,40 +46,34 @@ struct block2mtd_dev {
 /* Static info about the MTD, used in cleanup_module */
 static LIST_HEAD(blkmtd_device_list);
 
-
-static struct page *page_read(struct address_space *mapping, pgoff_t index)
-{
-   return read_mapping_page(mapping, index, NULL);
-}
-
 /* erase a specified part of the device */
 static int _block2mtd_erase(struct block2mtd_dev *dev, loff_t to, size_t len)
 {
-   struct address_space *mapping =
-   dev->bdev_handle->bdev->bd_inode->i_mapping;
-   struct page *page;
+   struct block_device *bdev = dev->bdev_handle->bdev;
+   struct folio *folio;
pgoff_t index = to >> PAGE_SHIFT;   // page index
int pages = len >> PAGE_SHIFT;
u_long *p;
u_long *max;
 
while (pages) {
-   page = page_read(mapping, index);
-   if (IS_ERR(page))
-   return PTR_ERR(page);
+   folio = bdev_read_folio(bdev, index << PAGE_SHIFT);
+   if (IS_ERR(folio))
+   return PTR_ERR(folio);
 
-   max = page_address(page) + PAGE_SIZE;
-   for (p=page_address(page); ppriv;
-   struct address_space *mapping =
-   dev->bdev_handle->bdev->bd_inode->i_mapping;
-   struct page *page;
+   struct folio *folio;
pgoff_t index = from >> PAGE_SHIFT;
int offset = from & (PAGE_SIZE-1);
int cpylen;
@@ -120,12 +112,13 @@ static int block2mtd_read(struct mtd_info *mtd, loff_t 
from, size_t len,
cpylen = len;   // this page
len = len - cpylen;
 
-   page = page_read(mapping, index);
-   if (IS_ERR(page))
-   return PTR_ERR(page);
+   folio = bdev_read_folio(dev->bdev_handle->bdev,
+   index << PAGE_SHIFT);
+   if (IS_ERR(folio))
+   return PTR_ERR(folio);
 
-   memcpy(buf, page_address(page) + offset, cpylen);
-   put_page(page);
+   memcpy(buf, folio_address(folio) + offset, cpylen);
+   folio_put(folio);
 
if (retlen)
*retlen += cpylen;
@@ -141,9 +134,8 @@ static int block2mtd_read(struct mtd_info *mtd, loff_t 
from, size_t len,
 static int _block2mtd_write(struct block2mtd_dev *dev, const u_char *buf,
loff_t to, size_t len, size_t *retlen)
 {
-   struct page *page;
-   struct address_space *mapping =
-   dev->bdev_handle->bdev->bd_inode->i_mapping;
+   struct block_device *bdev = dev->bdev_handle->bdev;
+   struct folio *folio;
pgoff_t index = to >> PAGE_SHIFT;   // page index
int offset = to & ~PAGE_MASK;   // page offset
int cpylen;
@@ -155,18 +147,18 @@ static int _block2mtd_write(struct block2mtd_dev *dev, 
const u_char *buf,
cpylen = len;   // this page
len = len - cpylen;
 
-   page = page_read(mapping, index);
-   if (IS_ERR(page))
-   return PTR_ERR(page);
+   folio = bdev_read_folio(bdev, index << PAGE_SHIFT);
+   if (IS_ERR(folio))
+   return PTR_ERR(folio);
 
-   if (memcmp(page_address(page)+offset, buf, cpylen)) {
-   lock_page(page);
-   memcpy(page_address(page) + offset, buf, cpylen);
-   set_page_dirty(page);
-   unlock_page(page);
-   balance_dirty_pages_ratelimited(mapping);
+   if (memcmp(folio_address(folio) + offset, buf, cpylen)) {
+   folio_lock(folio);
+   memcpy(folio_address(folio) + offset, buf, cpylen);
+   folio_mark_dirty(folio);
+   folio_unlock(folio);
+   bdev_balance_dirty_pages_ratelimited(bdev);
}
-   put_page(page);
+   folio_put(folio);
 
if (retlen)
*retlen += cpylen;
@@ -211,8 +203,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
kfree(dev->mtd.name);
 
if (dev->bdev_handle) {
-   invalidate_mapping_pages(
- 

Re: [PATCH -next RFC 01/14] block: add some bdev apis

2023-12-06 Thread Yu Kuai

Hi,

在 2023/12/06 22:58, Matthew Wilcox 写道:

On Tue, Dec 05, 2023 at 08:37:15PM +0800, Yu Kuai wrote:

+struct folio *bdev_read_folio(struct block_device *bdev, pgoff_t index)
+{
+   return read_mapping_folio(bdev->bd_inode->i_mapping, index, NULL);
+}
+EXPORT_SYMBOL_GPL(bdev_read_folio);


I'm coming to the opinion that 'index' is the wrong parameter here.
Looking through all the callers of bdev_read_folio() in this patchset,
they all have a position in bytes, and they all convert it to
index for this call.  The API should probably be:

struct folio *bdev_read_folio(struct block_device *bdev, loff_t pos)
{
return read_mapping_folio(bdev->bd_inode->i_mapping,
pos / PAGE_SIZE, NULL);
}


Thanks for reviewing this patchset! Okay, I'll convert to pass in "pos"
in v2.


... and at some point, we'll get round to converting read_mapping_folio()
to take its argument in loff_t.

Similiarly for these two APIs:


+struct folio *bdev_read_folio_gfp(struct block_device *bdev, pgoff_t index,
+ gfp_t gfp)
+struct folio *bdev_get_folio(struct block_device *bdev, pgoff_t index)



+struct folio *bdev_find_or_create_folio(struct block_device *bdev,
+   pgoff_t index, gfp_t gfp)
+{
+   return __filemap_get_folio(bdev->bd_inode->i_mapping, index,
+  FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
+}
+EXPORT_SYMBOL_GPL(bdev_find_or_create_folio);


This one probably shouldn't exist.  I've been converting callers of
find_or_create_page() to call __filemap_get_folio; I suspect we
should expose a __bdev_get_folio and have the callers use the FGP
arguments directly, but I'm open to other opinions here.


If nobody against this, I will expose single __bdev_get_folio() to use
in v2.



+void bdev_sync_readahead(struct block_device *bdev, struct file_ra_state *ra,
+struct file *file, pgoff_t index,
+unsigned long req_count)
+{
+   struct file_ra_state tmp_ra = {};
+
+   if (!ra) {
+   ra = _ra;
+   file_ra_state_init(ra, bdev->bd_inode->i_mapping);
+   }
+   page_cache_sync_readahead(bdev->bd_inode->i_mapping, ra, file, index,
+ req_count);
+}


I think the caller should always be passing in a valid file_ra_state.
It's only cramfs that doesn't have one, and it really should!
Not entirely sure about the arguments here; part of me says "bytes",
but this is weird enough to maybe take arguments in pages.


In fact, bdev_sync_readahead() is only called for cramfs and ext4.

For ext4 it's used in ext4_readdir() so there is valid file_ra_state.

Hoever, for cramfs it's used in cramfs_read(), and cramfs_read() is used
for:

1) cramfs_read_folio
2) cramfs_readdir
3) cramfs_lookup
4) cramfs_read_super

Looks like it's easy to pass in valid file_ra_state() for 1) and 2),
however, I don't see an easy way to do this for 3) and 4).

Thanks,
Kuai



.






Re: [PATCH -next RFC 02/14] xen/blkback: use bdev api in xen_update_blkif_status()

2023-12-05 Thread Yu Kuai

Hi,

在 2023/12/06 13:55, Christoph Hellwig 写道:

On Tue, Dec 05, 2023 at 08:37:16PM +0800, Yu Kuai wrote:

diff --git a/drivers/block/xen-blkback/xenbus.c 
b/drivers/block/xen-blkback/xenbus.c
index e34219ea2b05..e645afa4af57 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -104,8 +104,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
xenbus_dev_error(blkif->be->dev, err, "block flush");
return;
}
-   invalidate_inode_pages2(
-   blkif->vbd.bdev_handle->bdev->bd_inode->i_mapping);
+   invalidate_bdev(blkif->vbd.bdev_handle->bdev);


blkbak is a bdev exported.   I don't think it should ever call
invalidate_inode_pages2, through a wrapper or not.


I'm not sure about this. I'm not familiar with xen/blkback, but I saw
that xen-blkback will open a bdev from xen_vbd_create(), hence this
looks like a dm/md for me, hence it sounds reasonable to sync +
invalidate the opened bdev while initialization. Please kindly correct
me if I'm wrong.

Thanks,
Kuai



.






Re: [PATCH -next RFC 01/14] block: add some bdev apis

2023-12-05 Thread Yu Kuai

Hi,

在 2023/12/06 14:14, Christoph Hellwig 写道:

+void invalidate_bdev_range(struct block_device *bdev, pgoff_t start,
+  pgoff_t end)
+{
+   invalidate_mapping_pages(bdev->bd_inode->i_mapping, start, end);
+}
+EXPORT_SYMBOL_GPL(invalidate_bdev_range);


All these could probably use kerneldoc comments.


Ok, and thanks for reviewing the patchset!


For this one I really don't like it existing at all, but we'll have to
discuss that in the btrfs patch.


+loff_t bdev_size(struct block_device *bdev)
+{
+   loff_t size;
+
+   spin_lock(>bd_size_lock);
+   size = i_size_read(bdev->bd_inode);
+   spin_unlock(>bd_size_lock);
+
+   return size;
+}
+EXPORT_SYMBOL_GPL(bdev_size);


No need for this one.  The callers can simply use bdev_nr_bytes.


Ok, I'll replace it with bdev_nr_bytes.



+struct folio *bdev_read_folio(struct block_device *bdev, pgoff_t index)
+{
+   return read_mapping_folio(bdev->bd_inode->i_mapping, index, NULL);
+}
+EXPORT_SYMBOL_GPL(bdev_read_folio);
+
+struct folio *bdev_read_folio_gfp(struct block_device *bdev, pgoff_t index,
+ gfp_t gfp)
+{
+   return mapping_read_folio_gfp(bdev->bd_inode->i_mapping, index, gfp);
+}
+EXPORT_SYMBOL_GPL(bdev_read_folio_gfp);


I think we can just drop bdev_read_folio_gfp. Half of the callers simply
pass GPK_KERNEL, and the other half passes GFP_NOFS and could just use
memalloc_nofs_save().


I'm a litter confused, so there are 3 use cases:
1) use GFP_USER, default gfp from bdev_alloc.
2) use GFP_KERNEL
3) use GFP_NOFS

I understand that you're suggesting memalloc_nofs_save() to distinguish
2 and 3, but how can I distinguish 1?



+void bdev_balance_dirty_pages_ratelimited(struct block_device *bdev)
+{
+   return balance_dirty_pages_ratelimited(bdev->bd_inode->i_mapping);
+}
+EXPORT_SYMBOL_GPL(bdev_balance_dirty_pages_ratelimited);


Hmm, this is just used for block2mtd, and feels a little too low-level
to me, as block2mtd really should be using the normal fileread/write
APIs.  I guess we'll have to live with it for now if we want to expedite
killing off bd_inode.


+void bdev_correlate_mapping(struct block_device *bdev,
+   struct address_space *mapping)
+{
+   mapping->host = bdev->bd_inode;
+}
+EXPORT_SYMBOL_GPL(bdev_correlate_mapping);


Maybe associated insted of correlate?  Either way this basically
fully exposes the bdev inode again :(


+gfp_t bdev_gfp_constraint(struct block_device *bdev, gfp_t gfp)
+{
+   return mapping_gfp_constraint(bdev->bd_inode->i_mapping, gfp);
+}
+EXPORT_SYMBOL_GPL(bdev_gfp_constraint);


The right fix here is to:

  - use memalloc_nofs_save in extet instead of using
mapping_gfp_constraint to clear it from the mapping flags
  - remove __ext4_sb_bread_gfp and just have buffer.c helper that does
the right thing (either by changing the calling conventions of an
existing one, or adding a new one).


Thanks for the suggestions, but I'm not sure how to do this yet, I must
read more ext4 code.



+/*
+ * The del_gendisk() function uninitializes the disk-specific data
+ * structures, including the bdi structure, without telling anyone
+ * else.  Once this happens, any attempt to call mark_buffer_dirty()
+ * (for example, by ext4_commit_super), will cause a kernel OOPS.
+ * This is a kludge to prevent these oops until we can put in a proper
+ * hook in del_gendisk() to inform the VFS and file system layers.
+ */
+int bdev_ejected(struct block_device *bdev)
+{
+   struct backing_dev_info *bdi = inode_to_bdi(bdev->bd_inode);
+
+   return bdi->dev == NULL;
+}
+EXPORT_SYMBOL_GPL(bdev_ejected);


And this code in ext4 should just go away entirely.  The bdi should
always be valid for a live bdev for years.

Sounds good, I was confused about this code as well.




--- a/block/bio.c
+++ b/block/bio.c
@@ -1119,6 +1119,7 @@ void bio_add_folio_nofail(struct bio *bio, struct folio 
*folio, size_t len,
WARN_ON_ONCE(off > UINT_MAX);
__bio_add_page(bio, >page, len, off);
  }
+EXPORT_SYMBOL_GPL(bio_add_folio_nofail);


How is this realted?  The export is fine, but really should be a
separate, well-documented commit.


This is used to replace __bio_add_page() in btrfs while converting page
to folio, please let me know if I should keep this, if so, I'll split
this into a new commit.


  
+static inline u8 block_bits(struct block_device *bdev)

+{
+   return bdev->bd_inode->i_blkbits;
+}


Not sure we should need this.  i_blkbits comes from the blocksize
the fs set, so it should have other ways to get at it.


Yes, this is now only used for erofs, and erofs do call
sb_set_blocksize() while initializing, hence it's right there is other
way to get blkbits and this helper is not needed.

Thanks,
Kuai


.






Re: [PATCH -next RFC 00/14] block: don't access bd_inode directly from other modules

2023-12-05 Thread Yu Kuai

Hi,

在 2023/12/06 13:54, Christoph Hellwig 写道:

On Tue, Dec 05, 2023 at 08:37:14PM +0800, Yu Kuai wrote:

From: Yu Kuai 

Patch 1 add some bdev apis, then follow up patches will use these apis
to avoid access bd_inode directly, and hopefully the field bd_inode can
be removed eventually(after figure out a way for fs/buffer.c).


What tree is this against?  It fails to apply to either Jens'
for-6.8/block or Linus tree in the very first patch.


It was against linux-next branch, for the tag next-20231201, because I'm
not sure yet if this patchset should be applied to Jans' tree. Please
let me know if I should swith wo Jens' tree for v2.

Thanks,
Kuai


.






[PATCH -next RFC 12/14] jbd2: use bdev apis

2023-12-05 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 fs/jbd2/journal.c  | 3 +--
 fs/jbd2/recovery.c | 6 ++
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ed53188472f9..f1b5ffeaf02a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2003,8 +2003,7 @@ static int __jbd2_journal_erase(journal_t *journal, 
unsigned int flags)
byte_count = (block_stop - block_start + 1) *
journal->j_blocksize;
 
-   truncate_inode_pages_range(journal->j_dev->bd_inode->i_mapping,
-   byte_start, byte_stop);
+   truncate_bdev_range(journal->j_dev, 0, byte_start, byte_stop);
 
if (flags & JBD2_JOURNAL_FLUSH_DISCARD) {
err = blkdev_issue_discard(journal->j_dev,
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 01f744cb97a4..6b6a2c4585fa 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -290,7 +290,6 @@ int jbd2_journal_recover(journal_t *journal)
 
struct recovery_infoinfo;
errseq_twb_err;
-   struct address_space*mapping;
 
memset(, 0, sizeof(info));
sb = journal->j_superblock;
@@ -309,8 +308,7 @@ int jbd2_journal_recover(journal_t *journal)
}
 
wb_err = 0;
-   mapping = journal->j_fs_dev->bd_inode->i_mapping;
-   errseq_check_and_advance(>wb_err, _err);
+   bdev_wb_err_check_and_advance(journal->j_fs_dev, _err);
err = do_one_pass(journal, , PASS_SCAN);
if (!err)
err = do_one_pass(journal, , PASS_REVOKE);
@@ -334,7 +332,7 @@ int jbd2_journal_recover(journal_t *journal)
err2 = sync_blockdev(journal->j_fs_dev);
if (!err)
err = err2;
-   err2 = errseq_check_and_advance(>wb_err, _err);
+   err2 = bdev_wb_err_check_and_advance(journal->j_fs_dev, _err);
if (!err)
err = err2;
/* Make sure all replayed data is on permanent storage */
-- 
2.39.2




[PATCH -next RFC 14/14] nilfs2: use bdev api in nilfs_attach_log_writer()

2023-12-05 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 fs/nilfs2/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 52995838f2de..be47a1d21889 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2824,7 +2824,7 @@ int nilfs_attach_log_writer(struct super_block *sb, 
struct nilfs_root *root)
if (!nilfs->ns_writer)
return -ENOMEM;
 
-   inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL);
+   bdev_attach_wb(nilfs->ns_bdev);
 
err = nilfs_segctor_start_thread(nilfs->ns_writer);
if (unlikely(err))
-- 
2.39.2




[PATCH -next RFC 13/14] gfs2: use bdev api

2023-12-05 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 fs/gfs2/glock.c  | 2 +-
 fs/gfs2/ops_fstype.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f28c67181230..c66b0ed07e15 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1227,7 +1227,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
mapping = gfs2_glock2aspace(gl);
if (mapping) {
 mapping->a_ops = _meta_aops;
-   mapping->host = s->s_bdev->bd_inode;
+   bdev_correlate_mapping(s->s_bdev, mapping);
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_NOFS);
mapping->i_private_data = NULL;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 00ce89bdf32c..3145a56c88cb 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -114,7 +114,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 
address_space_init_once(mapping);
mapping->a_ops = _rgrp_aops;
-   mapping->host = sb->s_bdev->bd_inode;
+   bdev_correlate_mapping(sb->s_bdev, mapping);
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_NOFS);
mapping->i_private_data = NULL;
-- 
2.39.2




[PATCH -next RFC 08/14] btrfs: use bdev apis

2023-12-05 Thread Yu Kuai
From: Yu Kuai 

On the one hand covert to use folio while reading bdev inode, on the
other hand prevent to access bd_inode directly.

Signed-off-by: Yu Kuai 
---
 fs/btrfs/disk-io.c | 68 --
 fs/btrfs/volumes.c | 17 ++--
 fs/btrfs/zoned.c   | 12 
 3 files changed, 42 insertions(+), 55 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9317606017e2..cfe7ea417760 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3597,28 +3597,24 @@ ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
 static void btrfs_end_super_write(struct bio *bio)
 {
struct btrfs_device *device = bio->bi_private;
-   struct bio_vec *bvec;
-   struct bvec_iter_all iter_all;
-   struct page *page;
-
-   bio_for_each_segment_all(bvec, bio, iter_all) {
-   page = bvec->bv_page;
+   struct folio_iter fi;
 
+   bio_for_each_folio_all(fi, bio) {
if (bio->bi_status) {
btrfs_warn_rl_in_rcu(device->fs_info,
"lost page write due to IO error on %s (%d)",
btrfs_dev_name(device),
blk_status_to_errno(bio->bi_status));
-   ClearPageUptodate(page);
-   SetPageError(page);
+   folio_clear_uptodate(fi.folio);
+   folio_set_error(fi.folio);
btrfs_dev_stat_inc_and_print(device,
 BTRFS_DEV_STAT_WRITE_ERRS);
} else {
-   SetPageUptodate(page);
+   folio_mark_uptodate(fi.folio);
}
 
-   put_page(page);
-   unlock_page(page);
+   folio_put(fi.folio);
+   folio_unlock(fi.folio);
}
 
bio_put(bio);
@@ -3628,9 +3624,8 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct 
block_device *bdev,
   int copy_num, bool 
drop_cache)
 {
struct btrfs_super_block *super;
-   struct page *page;
+   struct folio *folio;
u64 bytenr, bytenr_orig;
-   struct address_space *mapping = bdev->bd_inode->i_mapping;
int ret;
 
bytenr_orig = btrfs_sb_offset(copy_num);
@@ -3651,16 +3646,15 @@ struct btrfs_super_block 
*btrfs_read_dev_one_super(struct block_device *bdev,
 * Drop the page of the primary superblock, so later read will
 * always read from the device.
 */
-   invalidate_inode_pages2_range(mapping,
-   bytenr >> PAGE_SHIFT,
+   invalidate_bdev_range(bdev, bytenr >> PAGE_SHIFT,
(bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT);
}
 
-   page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS);
-   if (IS_ERR(page))
-   return ERR_CAST(page);
+   folio = bdev_read_folio_gfp(bdev, bytenr >> PAGE_SHIFT, GFP_NOFS);
+   if (IS_ERR(folio))
+   return ERR_CAST(folio);
 
-   super = page_address(page);
+   super = folio_address(folio);
if (btrfs_super_magic(super) != BTRFS_MAGIC) {
btrfs_release_disk_super(super);
return ERR_PTR(-ENODATA);
@@ -3717,7 +3711,6 @@ static int write_dev_supers(struct btrfs_device *device,
struct btrfs_super_block *sb, int max_mirrors)
 {
struct btrfs_fs_info *fs_info = device->fs_info;
-   struct address_space *mapping = device->bdev->bd_inode->i_mapping;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
int i;
int errors = 0;
@@ -3730,7 +3723,7 @@ static int write_dev_supers(struct btrfs_device *device,
shash->tfm = fs_info->csum_shash;
 
for (i = 0; i < max_mirrors; i++) {
-   struct page *page;
+   struct folio *folio;
struct bio *bio;
struct btrfs_super_block *disk_super;
 
@@ -3755,9 +3748,10 @@ static int write_dev_supers(struct btrfs_device *device,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
sb->csum);
 
-   page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
-  GFP_NOFS);
-   if (!page) {
+   folio = bdev_find_or_create_folio(device->bdev,
+ bytenr >> PAGE_SHIFT,
+ GFP_NOFS);
+   if (IS_ERR(folio)) {
btrfs_err(device->fs_info,
"couldn't get super block page for bytenr %llu",
bytenr);
@@ -37

[PATCH -next RFC 06/14] scsicam: use bdev api in scsi_bios_ptable()

2023-12-05 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 drivers/scsi/scsicam.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c
index e2c7d8ef205f..1c99b964a0eb 100644
--- a/drivers/scsi/scsicam.c
+++ b/drivers/scsi/scsicam.c
@@ -32,11 +32,10 @@
  */
 unsigned char *scsi_bios_ptable(struct block_device *dev)
 {
-   struct address_space *mapping = bdev_whole(dev)->bd_inode->i_mapping;
unsigned char *res = NULL;
struct folio *folio;
 
-   folio = read_mapping_folio(mapping, 0, NULL);
+   folio = bdev_read_folio(bdev_whole(dev), 0);
if (IS_ERR(folio))
return NULL;
 
-- 
2.39.2




[PATCH -next RFC 00/14] block: don't access bd_inode directly from other modules

2023-12-05 Thread Yu Kuai
From: Yu Kuai 

Patch 1 add some bdev apis, then follow up patches will use these apis
to avoid access bd_inode directly, and hopefully the field bd_inode can
be removed eventually(after figure out a way for fs/buffer.c).

Yu Kuai (14):
  block: add some bdev apis
  xen/blkback: use bdev api in xen_update_blkif_status()
  bcache: use bdev api in read_super()
  mtd: block2mtd: use bdev apis
  s390/dasd: use bdev api in dasd_format()
  scsicam: use bdev api in scsi_bios_ptable()
  bcachefs: remove dead function bdev_sectors()
  btrfs: use bdev apis
  cramfs: use bdev apis in cramfs_blkdev_read()
  erofs: use bdev api
  ext4: use bdev apis
  jbd2: use bdev apis
  gfs2: use bdev api
  nilfs2: use bdev api in nilfs_attach_log_writer()

 block/bdev.c   | 116 +
 block/bio.c|   1 +
 block/blk.h|   2 -
 drivers/block/xen-blkback/xenbus.c |   3 +-
 drivers/md/bcache/super.c  |  11 ++-
 drivers/mtd/devices/block2mtd.c|  80 +---
 drivers/s390/block/dasd_ioctl.c|   5 +-
 drivers/scsi/scsicam.c |   3 +-
 fs/bcachefs/util.h |   5 --
 fs/btrfs/disk-io.c |  68 -
 fs/btrfs/volumes.c |  17 ++---
 fs/btrfs/zoned.c   |  12 ++-
 fs/cramfs/inode.c  |  35 +++--
 fs/erofs/data.c|  17 +++--
 fs/erofs/internal.h|   1 +
 fs/ext4/dir.c  |   6 +-
 fs/ext4/ext4_jbd2.c|   6 +-
 fs/ext4/super.c|  27 +--
 fs/gfs2/glock.c|   2 +-
 fs/gfs2/ops_fstype.c   |   2 +-
 fs/jbd2/journal.c  |   3 +-
 fs/jbd2/recovery.c |   6 +-
 fs/nilfs2/segment.c|   2 +-
 include/linux/blkdev.h |  27 +++
 include/linux/buffer_head.h|   5 +-
 25 files changed, 273 insertions(+), 189 deletions(-)

-- 
2.39.2




[PATCH -next RFC 04/14] mtd: block2mtd: use bdev apis

2023-12-05 Thread Yu Kuai
From: Yu Kuai 

On the one hand covert to use folio while reading bdev inode, on the
other hand prevent to access bd_inode directly.

Signed-off-by: Yu Kuai 
---
 drivers/mtd/devices/block2mtd.c | 80 +++--
 1 file changed, 35 insertions(+), 45 deletions(-)

diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index aa44a23ec045..927fc9cf0856 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -46,40 +46,34 @@ struct block2mtd_dev {
 /* Static info about the MTD, used in cleanup_module */
 static LIST_HEAD(blkmtd_device_list);
 
-
-static struct page *page_read(struct address_space *mapping, pgoff_t index)
-{
-   return read_mapping_page(mapping, index, NULL);
-}
-
 /* erase a specified part of the device */
 static int _block2mtd_erase(struct block2mtd_dev *dev, loff_t to, size_t len)
 {
-   struct address_space *mapping =
-   dev->bdev_handle->bdev->bd_inode->i_mapping;
-   struct page *page;
+   struct block_device *bdev = dev->bdev_handle->bdev;
+   struct folio *folio;
pgoff_t index = to >> PAGE_SHIFT;   // page index
int pages = len >> PAGE_SHIFT;
u_long *p;
u_long *max;
 
while (pages) {
-   page = page_read(mapping, index);
-   if (IS_ERR(page))
-   return PTR_ERR(page);
+   folio = bdev_read_folio(bdev, index);
+   if (IS_ERR(folio))
+   return PTR_ERR(folio);
 
-   max = page_address(page) + PAGE_SIZE;
-   for (p=page_address(page); ppriv;
-   struct address_space *mapping =
-   dev->bdev_handle->bdev->bd_inode->i_mapping;
-   struct page *page;
+   struct folio *folio;
pgoff_t index = from >> PAGE_SHIFT;
int offset = from & (PAGE_SIZE-1);
int cpylen;
@@ -120,12 +112,12 @@ static int block2mtd_read(struct mtd_info *mtd, loff_t 
from, size_t len,
cpylen = len;   // this page
len = len - cpylen;
 
-   page = page_read(mapping, index);
-   if (IS_ERR(page))
-   return PTR_ERR(page);
+   folio = bdev_read_folio(dev->bdev_handle->bdev, index);
+   if (IS_ERR(folio))
+   return PTR_ERR(folio);
 
-   memcpy(buf, page_address(page) + offset, cpylen);
-   put_page(page);
+   memcpy(buf, folio_address(folio) + offset, cpylen);
+   folio_put(folio);
 
if (retlen)
*retlen += cpylen;
@@ -141,9 +133,8 @@ static int block2mtd_read(struct mtd_info *mtd, loff_t 
from, size_t len,
 static int _block2mtd_write(struct block2mtd_dev *dev, const u_char *buf,
loff_t to, size_t len, size_t *retlen)
 {
-   struct page *page;
-   struct address_space *mapping =
-   dev->bdev_handle->bdev->bd_inode->i_mapping;
+   struct block_device *bdev = dev->bdev_handle->bdev;
+   struct folio *folio;
pgoff_t index = to >> PAGE_SHIFT;   // page index
int offset = to & ~PAGE_MASK;   // page offset
int cpylen;
@@ -155,18 +146,18 @@ static int _block2mtd_write(struct block2mtd_dev *dev, 
const u_char *buf,
cpylen = len;   // this page
len = len - cpylen;
 
-   page = page_read(mapping, index);
-   if (IS_ERR(page))
-   return PTR_ERR(page);
+   folio = bdev_read_folio(bdev, index);
+   if (IS_ERR(folio))
+   return PTR_ERR(folio);
 
-   if (memcmp(page_address(page)+offset, buf, cpylen)) {
-   lock_page(page);
-   memcpy(page_address(page) + offset, buf, cpylen);
-   set_page_dirty(page);
-   unlock_page(page);
-   balance_dirty_pages_ratelimited(mapping);
+   if (memcmp(folio_address(folio) + offset, buf, cpylen)) {
+   folio_lock(folio);
+   memcpy(folio_address(folio) + offset, buf, cpylen);
+   folio_mark_dirty(folio);
+   folio_unlock(folio);
+   bdev_balance_dirty_pages_ratelimited(bdev);
}
-   put_page(page);
+   folio_put(folio);
 
if (retlen)
*retlen += cpylen;
@@ -211,8 +202,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
kfree(dev->mtd.name);
 
if (dev->bdev_handle) {
-   invalidate_mapping_pages(
-   dev->bdev_handle->bdev->bd_inode->i_mapping, 0, -1)

[PATCH -next RFC 11/14] ext4: use bdev apis

2023-12-05 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 fs/ext4/dir.c   |  6 ++
 fs/ext4/ext4_jbd2.c |  6 +++---
 fs/ext4/super.c | 27 ---
 include/linux/buffer_head.h |  5 +++--
 4 files changed, 12 insertions(+), 32 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 3985f8c33f95..64e35eb6a324 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -191,10 +191,8 @@ static int ext4_readdir(struct file *file, struct 
dir_context *ctx)
pgoff_t index = map.m_pblk >>
(PAGE_SHIFT - inode->i_blkbits);
if (!ra_has_index(>f_ra, index))
-   page_cache_sync_readahead(
-   sb->s_bdev->bd_inode->i_mapping,
-   >f_ra, file,
-   index, 1);
+   bdev_sync_readahead(sb->s_bdev, >f_ra,
+   file, index, 1);
file->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
bh = ext4_bread(NULL, inode, map.m_lblk, 0);
if (IS_ERR(bh)) {
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index d1a2e6624401..c1bf3a00fad9 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -206,7 +206,6 @@ static void ext4_journal_abort_handle(const char *caller, 
unsigned int line,
 
 static void ext4_check_bdev_write_error(struct super_block *sb)
 {
-   struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
struct ext4_sb_info *sbi = EXT4_SB(sb);
int err;
 
@@ -216,9 +215,10 @@ static void ext4_check_bdev_write_error(struct super_block 
*sb)
 * we could read old data from disk and write it out again, which
 * may lead to on-disk filesystem inconsistency.
 */
-   if (errseq_check(>wb_err, READ_ONCE(sbi->s_bdev_wb_err))) {
+   if (bdev_wb_err_check(sb->s_bdev, READ_ONCE(sbi->s_bdev_wb_err))) {
spin_lock(>s_bdev_wb_lock);
-   err = errseq_check_and_advance(>wb_err, 
>s_bdev_wb_err);
+   err = bdev_wb_err_check_and_advance(sb->s_bdev,
+   >s_bdev_wb_err);
spin_unlock(>s_bdev_wb_lock);
if (err)
ext4_error_err(sb, -err,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0980845c8b8f..243671d86db3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -244,8 +244,7 @@ static struct buffer_head *__ext4_sb_bread_gfp(struct 
super_block *sb,
 struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
   blk_opf_t op_flags)
 {
-   gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_inode->i_mapping,
-   ~__GFP_FS) | __GFP_MOVABLE;
+   gfp_t gfp = bdev_gfp_constraint(sb->s_bdev, ~__GFP_FS) | __GFP_MOVABLE;
 
return __ext4_sb_bread_gfp(sb, block, op_flags, gfp);
 }
@@ -253,8 +252,7 @@ struct buffer_head *ext4_sb_bread(struct super_block *sb, 
sector_t block,
 struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
sector_t block)
 {
-   gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_inode->i_mapping,
-   ~__GFP_FS);
+   gfp_t gfp = bdev_gfp_constraint(sb->s_bdev, ~__GFP_FS);
 
return __ext4_sb_bread_gfp(sb, block, 0, gfp);
 }
@@ -492,22 +490,6 @@ static void ext4_maybe_update_superblock(struct 
super_block *sb)
schedule_work(_SB(sb)->s_sb_upd_work);
 }
 
-/*
- * The del_gendisk() function uninitializes the disk-specific data
- * structures, including the bdi structure, without telling anyone
- * else.  Once this happens, any attempt to call mark_buffer_dirty()
- * (for example, by ext4_commit_super), will cause a kernel OOPS.
- * This is a kludge to prevent these oops until we can put in a proper
- * hook in del_gendisk() to inform the VFS and file system layers.
- */
-static int block_device_ejected(struct super_block *sb)
-{
-   struct inode *bd_inode = sb->s_bdev->bd_inode;
-   struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
-
-   return bdi->dev == NULL;
-}
-
 static void ext4_journal_commit_callback(journal_t *journal, transaction_t 
*txn)
 {
struct super_block  *sb = journal->j_private;
@@ -5585,8 +5567,7 @@ static int __ext4_fill_super(struct fs_context *fc, 
struct super_block *sb)
 * used to detect the metadata async write error.
 */
spin_lock_init(>s_bdev_wb_lock);
-   errseq_check_and_advance(>s_bdev->bd_inode->i_mapping->wb_err,
-  

[PATCH -next RFC 03/14] bcache: use bdev api in read_super()

2023-12-05 Thread Yu Kuai
From: Yu Kuai 

On the one hand covert to use folio while reading bdev inode, on the
other hand prevent to access bd_inode directly.

Signed-off-by: Yu Kuai 
---
 drivers/md/bcache/super.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 1402096b8076..376b9dc2523f 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -168,14 +168,13 @@ static const char *read_super(struct cache_sb *sb, struct 
block_device *bdev,
 {
const char *err;
struct cache_sb_disk *s;
-   struct page *page;
+   struct folio *folio;
unsigned int i;
 
-   page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
-  SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
-   if (IS_ERR(page))
+   folio = bdev_read_folio_gfp(bdev, SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
+   if (IS_ERR(folio))
return "IO error";
-   s = page_address(page) + offset_in_page(SB_OFFSET);
+   s = folio_address(folio) + offset_in_folio(folio, SB_OFFSET);
 
sb->offset  = le64_to_cpu(s->offset);
sb->version = le64_to_cpu(s->version);
@@ -272,7 +271,7 @@ static const char *read_super(struct cache_sb *sb, struct 
block_device *bdev,
*res = s;
return NULL;
 err:
-   put_page(page);
+   folio_put(folio);
return err;
 }
 
-- 
2.39.2




[PATCH -next RFC 09/14] cramfs: use bdev apis in cramfs_blkdev_read()

2023-12-05 Thread Yu Kuai
From: Yu Kuai 

On the one hand covert to use folio while reading bdev inode, on the
other hand prevent to access bd_inode directly.

Also do some cleanup that there is no need for two for loop, and remove
local array pages.

Signed-off-by: Yu Kuai 
---
 fs/cramfs/inode.c | 35 ---
 1 file changed, 12 insertions(+), 23 deletions(-)

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 60dbfa0f8805..46ff4e5506fd 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -183,9 +183,6 @@ static int next_buffer;
 static void *cramfs_blkdev_read(struct super_block *sb, unsigned int offset,
unsigned int len)
 {
-   struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
-   struct file_ra_state ra = {};
-   struct page *pages[BLKS_PER_BUF];
unsigned i, blocknr, buffer;
unsigned long devsize;
char *data;
@@ -214,37 +211,29 @@ static void *cramfs_blkdev_read(struct super_block *sb, 
unsigned int offset,
devsize = bdev_nr_bytes(sb->s_bdev) >> PAGE_SHIFT;
 
/* Ok, read in BLKS_PER_BUF pages completely first. */
-   file_ra_state_init(, mapping);
-   page_cache_sync_readahead(mapping, , NULL, blocknr, BLKS_PER_BUF);
-
-   for (i = 0; i < BLKS_PER_BUF; i++) {
-   struct page *page = NULL;
-
-   if (blocknr + i < devsize) {
-   page = read_mapping_page(mapping, blocknr + i, NULL);
-   /* synchronous error? */
-   if (IS_ERR(page))
-   page = NULL;
-   }
-   pages[i] = page;
-   }
+   bdev_sync_readahead(sb->s_bdev, NULL, NULL, blocknr, BLKS_PER_BUF);
 
buffer = next_buffer;
next_buffer = NEXT_BUFFER(buffer);
buffer_blocknr[buffer] = blocknr;
buffer_dev[buffer] = sb;
-
data = read_buffers[buffer];
+
for (i = 0; i < BLKS_PER_BUF; i++) {
-   struct page *page = pages[i];
+   struct folio *folio = NULL;
+
+   if (blocknr + i < devsize)
+   folio = bdev_read_folio(sb->s_bdev, blocknr + i);
 
-   if (page) {
-   memcpy_from_page(data, page, 0, PAGE_SIZE);
-   put_page(page);
-   } else
+   if (IS_ERR_OR_NULL(folio)) {
memset(data, 0, PAGE_SIZE);
+   } else {
+   memcpy_from_folio(data, folio, 0, PAGE_SIZE);
+   folio_put(folio);
+   }
data += PAGE_SIZE;
}
+
return read_buffers[buffer] + offset;
 }
 
-- 
2.39.2




[PATCH -next RFC 07/14] bcachefs: remove dead function bdev_sectors()

2023-12-05 Thread Yu Kuai
From: Yu Kuai 

bdev_sectors() is not used hence remove it.

Signed-off-by: Yu Kuai 
---
 fs/bcachefs/util.h | 5 -
 1 file changed, 5 deletions(-)

diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index b93d5f481c7e..932ca6f7a37b 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -541,11 +541,6 @@ static inline unsigned fract_exp_two(unsigned x, unsigned 
fract_bits)
 void bch2_bio_map(struct bio *bio, void *base, size_t);
 int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
 
-static inline sector_t bdev_sectors(struct block_device *bdev)
-{
-   return bdev->bd_inode->i_size >> 9;
-}
-
 #define closure_bio_submit(bio, cl)\
 do {   \
closure_get(cl);\
-- 
2.39.2




[PATCH -next RFC 10/14] erofs: use bdev api

2023-12-05 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 fs/erofs/data.c | 17 +++--
 fs/erofs/internal.h |  1 +
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index c98aeda8abb2..b9d2c90f9b22 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -32,8 +32,8 @@ void erofs_put_metabuf(struct erofs_buf *buf)
 void *erofs_bread(struct erofs_buf *buf, erofs_blk_t blkaddr,
  enum erofs_kmap_type type)
 {
-   struct inode *inode = buf->inode;
-   erofs_off_t offset = (erofs_off_t)blkaddr << inode->i_blkbits;
+   u8 blkbits = buf->inode ? buf->inode->i_blkbits : block_bits(buf->bdev);
+   erofs_off_t offset = (erofs_off_t)blkaddr << blkbits;
pgoff_t index = offset >> PAGE_SHIFT;
struct page *page = buf->page;
struct folio *folio;
@@ -43,7 +43,9 @@ void *erofs_bread(struct erofs_buf *buf, erofs_blk_t blkaddr,
erofs_put_metabuf(buf);
 
nofs_flag = memalloc_nofs_save();
-   folio = read_cache_folio(inode->i_mapping, index, NULL, NULL);
+   folio = buf->inode ?
+   read_mapping_folio(buf->inode->i_mapping, index, NULL) :
+   bdev_read_folio(buf->bdev, index);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(folio))
return folio;
@@ -67,10 +69,13 @@ void *erofs_bread(struct erofs_buf *buf, erofs_blk_t 
blkaddr,
 
 void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
 {
-   if (erofs_is_fscache_mode(sb))
+   if (erofs_is_fscache_mode(sb)) {
buf->inode = EROFS_SB(sb)->s_fscache->inode;
-   else
-   buf->inode = sb->s_bdev->bd_inode;
+   buf->bdev = NULL;
+   } else {
+   buf->inode = NULL;
+   buf->bdev = sb->s_bdev;
+   }
 }
 
 void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index b0409badb017..a68b0924c052 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -224,6 +224,7 @@ enum erofs_kmap_type {
 
 struct erofs_buf {
struct inode *inode;
+   struct block_device *bdev;
struct page *page;
void *base;
enum erofs_kmap_type kmap_type;
-- 
2.39.2




[PATCH -next RFC 05/14] s390/dasd: use bdev api in dasd_format()

2023-12-05 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 drivers/s390/block/dasd_ioctl.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 61b9675e2a67..bbfb958237e6 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -221,8 +221,9 @@ dasd_format(struct dasd_block *block, struct format_data_t 
*fdata)
 * enabling the device later.
 */
if (fdata->start_unit == 0) {
-   block->gdp->part0->bd_inode->i_blkbits =
-   blksize_bits(fdata->blksize);
+   rc = set_blocksize(block->gdp->part0, fdata->blksize);
+   if (rc)
+   return rc;
}
 
rc = base->discipline->format_device(base, fdata, 1);
-- 
2.39.2




[PATCH -next RFC 01/14] block: add some bdev apis

2023-12-05 Thread Yu Kuai
From: Yu Kuai 

Those apis will be used for other modules, so that bd_inode won't be
accessed directly from other modules.

Signed-off-by: Yu Kuai 
---
 block/bdev.c   | 116 +
 block/bio.c|   1 +
 block/blk.h|   2 -
 include/linux/blkdev.h |  27 ++
 4 files changed, 144 insertions(+), 2 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index 6f73b02d549c..fcba5c1bd113 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -92,6 +92,13 @@ void invalidate_bdev(struct block_device *bdev)
 }
 EXPORT_SYMBOL(invalidate_bdev);
 
+void invalidate_bdev_range(struct block_device *bdev, pgoff_t start,
+  pgoff_t end)
+{
+   invalidate_mapping_pages(bdev->bd_inode->i_mapping, start, end);
+}
+EXPORT_SYMBOL_GPL(invalidate_bdev_range);
+
 /*
  * Drop all buffers & page cache for given bdev range. This function bails
  * with error if bdev has other exclusive owner (such as filesystem).
@@ -124,6 +131,7 @@ int truncate_bdev_range(struct block_device *bdev, 
blk_mode_t mode,
 lstart >> PAGE_SHIFT,
 lend >> PAGE_SHIFT);
 }
+EXPORT_SYMBOL_GPL(truncate_bdev_range);
 
 static void set_init_blocksize(struct block_device *bdev)
 {
@@ -138,6 +146,18 @@ static void set_init_blocksize(struct block_device *bdev)
bdev->bd_inode->i_blkbits = blksize_bits(bsize);
 }
 
+loff_t bdev_size(struct block_device *bdev)
+{
+   loff_t size;
+
+   spin_lock(>bd_size_lock);
+   size = i_size_read(bdev->bd_inode);
+   spin_unlock(>bd_size_lock);
+
+   return size;
+}
+EXPORT_SYMBOL_GPL(bdev_size);
+
 int set_blocksize(struct block_device *bdev, int size)
 {
/* Size must be a power of two, and between 512 and PAGE_SIZE */
@@ -1144,3 +1164,99 @@ static int __init setup_bdev_allow_write_mounted(char 
*str)
return 1;
 }
 __setup("bdev_allow_write_mounted=", setup_bdev_allow_write_mounted);
+
+struct folio *bdev_read_folio(struct block_device *bdev, pgoff_t index)
+{
+   return read_mapping_folio(bdev->bd_inode->i_mapping, index, NULL);
+}
+EXPORT_SYMBOL_GPL(bdev_read_folio);
+
+struct folio *bdev_read_folio_gfp(struct block_device *bdev, pgoff_t index,
+ gfp_t gfp)
+{
+   return mapping_read_folio_gfp(bdev->bd_inode->i_mapping, index, gfp);
+}
+EXPORT_SYMBOL_GPL(bdev_read_folio_gfp);
+
+struct folio *bdev_get_folio(struct block_device *bdev, pgoff_t index)
+{
+   return filemap_get_folio(bdev->bd_inode->i_mapping, index);
+}
+EXPORT_SYMBOL_GPL(bdev_get_folio);
+
+struct folio *bdev_find_or_create_folio(struct block_device *bdev,
+   pgoff_t index, gfp_t gfp)
+{
+   return __filemap_get_folio(bdev->bd_inode->i_mapping, index,
+  FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
+}
+EXPORT_SYMBOL_GPL(bdev_find_or_create_folio);
+
+int bdev_wb_err_check(struct block_device *bdev, errseq_t since)
+{
+   return errseq_check(>bd_inode->i_mapping->wb_err, since);
+}
+EXPORT_SYMBOL_GPL(bdev_wb_err_check);
+
+int bdev_wb_err_check_and_advance(struct block_device *bdev, errseq_t *since)
+{
+   return errseq_check_and_advance(>bd_inode->i_mapping->wb_err,
+   since);
+}
+EXPORT_SYMBOL_GPL(bdev_wb_err_check_and_advance);
+
+void bdev_balance_dirty_pages_ratelimited(struct block_device *bdev)
+{
+   return balance_dirty_pages_ratelimited(bdev->bd_inode->i_mapping);
+}
+EXPORT_SYMBOL_GPL(bdev_balance_dirty_pages_ratelimited);
+
+void bdev_sync_readahead(struct block_device *bdev, struct file_ra_state *ra,
+struct file *file, pgoff_t index,
+unsigned long req_count)
+{
+   struct file_ra_state tmp_ra = {};
+
+   if (!ra) {
+   ra = _ra;
+   file_ra_state_init(ra, bdev->bd_inode->i_mapping);
+   }
+   page_cache_sync_readahead(bdev->bd_inode->i_mapping, ra, file, index,
+ req_count);
+}
+EXPORT_SYMBOL_GPL(bdev_sync_readahead);
+
+void bdev_attach_wb(struct block_device *bdev)
+{
+   inode_attach_wb(bdev->bd_inode, NULL);
+}
+EXPORT_SYMBOL_GPL(bdev_attach_wb);
+
+void bdev_correlate_mapping(struct block_device *bdev,
+   struct address_space *mapping)
+{
+   mapping->host = bdev->bd_inode;
+}
+EXPORT_SYMBOL_GPL(bdev_correlate_mapping);
+
+gfp_t bdev_gfp_constraint(struct block_device *bdev, gfp_t gfp)
+{
+   return mapping_gfp_constraint(bdev->bd_inode->i_mapping, gfp);
+}
+EXPORT_SYMBOL_GPL(bdev_gfp_constraint);
+
+/*
+ * The del_gendisk() function uninitializes the disk-specific data
+ * structures, including the bdi structure, without telling anyone
+ * else.  Once this happens, any attempt to c

[PATCH -next RFC 02/14] xen/blkback: use bdev api in xen_update_blkif_status()

2023-12-05 Thread Yu Kuai
From: Yu Kuai 

Avoid to access bd_inode directly, prepare to remove bd_inode from
block_devcie.

Signed-off-by: Yu Kuai 
---
 drivers/block/xen-blkback/xenbus.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/block/xen-blkback/xenbus.c 
b/drivers/block/xen-blkback/xenbus.c
index e34219ea2b05..e645afa4af57 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -104,8 +104,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
xenbus_dev_error(blkif->be->dev, err, "block flush");
return;
}
-   invalidate_inode_pages2(
-   blkif->vbd.bdev_handle->bdev->bd_inode->i_mapping);
+   invalidate_bdev(blkif->vbd.bdev_handle->bdev);
 
for (i = 0; i < blkif->nr_rings; i++) {
ring = >rings[i];
-- 
2.39.2




Re: [PATCH block/for-next v2 01/16] block: add a new helper to get inode from block_device

2023-11-27 Thread Yu Kuai

Hi,

在 2023/11/28 0:32, Christoph Hellwig 写道:

On Mon, Nov 27, 2023 at 09:07:22PM +0800, Yu Kuai wrote:

1) Is't okay to add a new helper to pass in bdev for following apis?



For some we already have them (e.g. bdev_nr_bytes to read the bdev)
size, for some we need to add them.  The big thing that seems to
stick out is page cache API, and I think that is where we need to
define maintainable APIs for file systems and others to use the
block device page cache.  Probably only in folio versions and not
pages once if we're touching the code anyay


Thanks for the advice! In case I'm understanding correctly, do you mean
that all other fs/drivers that is using pages versions can safely switch
to folio versions now?

By the way, my orginal idea was trying to add a new field 'bd_flags'
in block_devcie, and then add a new bit so that bio_check_ro() will
only warn once for each partition. Now that this patchset will be quite
complex, I'll add a new bool field 'bd_ro_warned' to fix the above
problem first, and then add 'bd_flags' once this patchset is done.

Thanks,
Kuai




2) For the file fs/buffer.c, there are some special usage like
following that I don't think it's good to add a helper:

spin_lock(_inode->i_mapping->private_lock);

Is't okay to move following apis from fs/buffer.c directly to
block/bdev.c?

__find_get_block
bdev_getblk


I'm not sure moving is a good idea, but we might end up the
some kind of low-level access from buffer.c, be that special
helpers, a separate header or something else.  Let's sort out
the rest of the kernel first.

.






Re: [PATCH block/for-next v2 01/16] block: add a new helper to get inode from block_device

2023-11-27 Thread Yu Kuai

Hi,

在 2023/11/27 15:21, Christoph Hellwig 写道:

On Mon, Nov 27, 2023 at 02:21:01PM +0800, Yu Kuai wrote:

From: Yu Kuai 

block_devcie is allocated from bdev_alloc() by bdev_alloc_inode(), and
currently block_device contains a pointer that point to the address of
inode, while such inode is allocated together:


This is going the wrong way.  Nothing outside of core block layer code
should ever directly use the bdev inode.  We've been rather sloppy
and added a lot of direct reference to it, but they really need to
go away and be replaced with well defined high level operation on
struct block_device.  Once that is done we can remove the bd_inode
pointer, but replacing it with something that pokes even more deeply
into bdev internals is a bad idea.


Thanks for the advice, however, after collecting how other modules are
using bdev inode, I got two main questions:

1) Is't okay to add a new helper to pass in bdev for following apis?
If so, then almost all the fs and driver can avoid to access bd_inode
dirctly.

errseq_check(>bd_inode->i_mapping->wb_err, wb_err);
errseq_check_and_advance(>bd_inode->i_mapping->wb_err, _err);
mapping_gfp_constraint(bdev->bd_inode->i_mapping, gfp);
i_size_read(bdev->bd_inode)
find_get_page(bdev->bd_inode->i_mapping, offset);
find_or_create_page(bdev->bd_inode->i_mapping, index, gfp);
read_cache_page_gfp(bdev->bd_inode->i_mapping, index, gfp);
invalidate_inode_pages2(bdev->bd_inode->i_mapping);
invalidate_inode_pages2_range(bdev->bd_inode->i_mapping, start, end);
read_mapping_folio(bdev->bd_inode->i_mapping, index, file);
read_mapping_page(bdev->bd_inode->i_mapping, index, file);
balance_dirty_pages_ratelimited(bdev->bd_inode->i_mapping)
file_ra_state_init(ra, bdev->bd_inode->i_mapping);
page_cache_sync_readahead(bdev->bd_inode->i_mapping, ra, file, index, 
req_count);

inode_to_bdi(bdev->bd_inode)

2) For the file fs/buffer.c, there are some special usage like
following that I don't think it's good to add a helper:

spin_lock(_inode->i_mapping->private_lock);

Is't okay to move following apis from fs/buffer.c directly to
block/bdev.c?

__find_get_block
bdev_getblk

Thanks,
Kuai


.






Re: [PATCH block/for-next v2 07/16] bcachefs: use new helper to get inode from block_device

2023-11-26 Thread Yu Kuai

Hi,

在 2023/11/27 15:24, Kent Overstreet 写道:

On Mon, Nov 27, 2023 at 04:09:47PM +0900, Damien Le Moal wrote:

On 11/27/23 15:21, Yu Kuai wrote:

From: Yu Kuai 

Which is more efficiency, and also prepare to remove the field
'bd_inode' from block_device.

Signed-off-by: Yu Kuai 
---
  fs/bcachefs/util.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 2984b57b2958..fe7ccb3a3517 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -518,7 +518,7 @@ int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
  
  static inline sector_t bdev_sectors(struct block_device *bdev)

  {
-   return bdev->bd_inode->i_size >> 9;
+   return bdev_inode(bdev)->i_size >> 9;


shouldn't this use i_size_read() ?

I missed the history with this but why not use bdev_nr_sectors() and delete this
helper ?


Actually, this helper seems to be dead code.


Yes, there is no caller of this helper, I'll remove this helper.

Thanks,
Kuai


.






[PATCH block/for-next v2 12/16] gfs2: use new helper to get inode from block_device

2023-11-26 Thread Yu Kuai
From: Yu Kuai 

Which is more efficiency, and also prepare to remove the field
'bd_inode' from block_device.

Signed-off-by: Yu Kuai 
---
 fs/gfs2/glock.c  | 2 +-
 fs/gfs2/ops_fstype.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index d6bf1f8c25dc..ec6394544ebb 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1210,7 +1210,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
mapping = gfs2_glock2aspace(gl);
if (mapping) {
 mapping->a_ops = _meta_aops;
-   mapping->host = s->s_bdev->bd_inode;
+   mapping->host = bdev_inode(s->s_bdev);
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_NOFS);
mapping->private_data = NULL;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b108c5d26839..dfc4735cfd54 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -114,7 +114,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 
address_space_init_once(mapping);
mapping->a_ops = _rgrp_aops;
-   mapping->host = sb->s_bdev->bd_inode;
+   mapping->host = bdev_inode(sb->s_bdev);
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_NOFS);
mapping->private_data = NULL;
-- 
2.39.2




[PATCH block/for-next v2 14/16] nilfs2: use new helper to get inode from block_device

2023-11-26 Thread Yu Kuai
From: Yu Kuai 

Which is more efficiency, and also prepare to remove the field
'bd_inode' from block_device.

Signed-off-by: Yu Kuai 
---
 fs/nilfs2/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 55e31cc903d1..d346f5c1aad7 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2823,7 +2823,7 @@ int nilfs_attach_log_writer(struct super_block *sb, 
struct nilfs_root *root)
if (!nilfs->ns_writer)
return -ENOMEM;
 
-   inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL);
+   inode_attach_wb(bdev_inode(nilfs->ns_bdev), NULL);
 
err = nilfs_segctor_start_thread(nilfs->ns_writer);
if (unlikely(err))
-- 
2.39.2




[PATCH block/for-next v2 10/16] erofs: use new helper to get inode from block_device

2023-11-26 Thread Yu Kuai
From: Yu Kuai 

Which is more efficiency, and also prepare to remove the field
'bd_inode' from block_device.

Signed-off-by: Yu Kuai 
---
 fs/erofs/data.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 029c761670bf..85d490b3b53d 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -70,7 +70,7 @@ void erofs_init_metabuf(struct erofs_buf *buf, struct 
super_block *sb)
if (erofs_is_fscache_mode(sb))
buf->inode = EROFS_SB(sb)->s_fscache->inode;
else
-   buf->inode = sb->s_bdev->bd_inode;
+   buf->inode = bdev_inode(sb->s_bdev);
 }
 
 void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
-- 
2.39.2




[PATCH block/for-next v2 16/16] block: use new helper to get inode from block_device

2023-11-26 Thread Yu Kuai
From: Yu Kuai 

Which is more efficiency, and also remove the field 'bd_inode' since it's
not used anymore.

Signed-off-by: Yu Kuai 
---
 block/bdev.c  | 39 ++-
 block/blk-zoned.c |  4 ++--
 block/fops.c  |  4 ++--
 block/genhd.c |  8 
 block/ioctl.c |  8 
 block/partitions/core.c   |  9 +
 include/linux/blk_types.h |  3 +--
 include/linux/blkdev.h|  4 ++--
 8 files changed, 42 insertions(+), 37 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index 7509389095b7..8af89cf91ae1 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL(I_BDEV);
 
 static void bdev_write_inode(struct block_device *bdev)
 {
-   struct inode *inode = bdev->bd_inode;
+   struct inode *inode = bdev_inode(bdev);
int ret;
 
spin_lock(>i_lock);
@@ -62,7 +62,7 @@ static void bdev_write_inode(struct block_device *bdev)
 /* Kill _all_ buffers and pagecache , dirty or not.. */
 static void kill_bdev(struct block_device *bdev)
 {
-   struct address_space *mapping = bdev->bd_inode->i_mapping;
+   struct address_space *mapping = bdev_inode(bdev)->i_mapping;
 
if (mapping_empty(mapping))
return;
@@ -74,7 +74,7 @@ static void kill_bdev(struct block_device *bdev)
 /* Invalidate clean unused buffers and pagecache. */
 void invalidate_bdev(struct block_device *bdev)
 {
-   struct address_space *mapping = bdev->bd_inode->i_mapping;
+   struct address_space *mapping = bdev_inode(bdev)->i_mapping;
 
if (mapping->nrpages) {
invalidate_bh_lrus();
@@ -102,7 +102,7 @@ int truncate_bdev_range(struct block_device *bdev, 
blk_mode_t mode,
goto invalidate;
}
 
-   truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend);
+   truncate_inode_pages_range(bdev_inode(bdev)->i_mapping, lstart, lend);
if (!(mode & BLK_OPEN_EXCL))
bd_abort_claiming(bdev, truncate_bdev_range);
return 0;
@@ -112,7 +112,7 @@ int truncate_bdev_range(struct block_device *bdev, 
blk_mode_t mode,
 * Someone else has handle exclusively open. Try invalidating instead.
 * The 'end' argument is inclusive so the rounding is safe.
 */
-   return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping,
+   return invalidate_inode_pages2_range(bdev_inode(bdev)->i_mapping,
 lstart >> PAGE_SHIFT,
 lend >> PAGE_SHIFT);
 }
@@ -120,18 +120,21 @@ int truncate_bdev_range(struct block_device *bdev, 
blk_mode_t mode,
 static void set_init_blocksize(struct block_device *bdev)
 {
unsigned int bsize = bdev_logical_block_size(bdev);
-   loff_t size = i_size_read(bdev->bd_inode);
+   struct inode *inode = bdev_inode(bdev);
+   loff_t size = i_size_read(inode);
 
while (bsize < PAGE_SIZE) {
if (size & bsize)
break;
bsize <<= 1;
}
-   bdev->bd_inode->i_blkbits = blksize_bits(bsize);
+   inode->i_blkbits = blksize_bits(bsize);
 }
 
 int set_blocksize(struct block_device *bdev, int size)
 {
+   struct inode *inode;
+
/* Size must be a power of two, and between 512 and PAGE_SIZE */
if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
return -EINVAL;
@@ -141,9 +144,10 @@ int set_blocksize(struct block_device *bdev, int size)
return -EINVAL;
 
/* Don't change the size if it is same as current */
-   if (bdev->bd_inode->i_blkbits != blksize_bits(size)) {
+   inode = bdev_inode(bdev);
+   if (inode->i_blkbits != blksize_bits(size)) {
sync_blockdev(bdev);
-   bdev->bd_inode->i_blkbits = blksize_bits(size);
+   inode->i_blkbits = blksize_bits(size);
kill_bdev(bdev);
}
return 0;
@@ -178,7 +182,7 @@ int sync_blockdev_nowait(struct block_device *bdev)
 {
if (!bdev)
return 0;
-   return filemap_flush(bdev->bd_inode->i_mapping);
+   return filemap_flush(bdev_inode(bdev)->i_mapping);
 }
 EXPORT_SYMBOL_GPL(sync_blockdev_nowait);
 
@@ -190,13 +194,13 @@ int sync_blockdev(struct block_device *bdev)
 {
if (!bdev)
return 0;
-   return filemap_write_and_wait(bdev->bd_inode->i_mapping);
+   return filemap_write_and_wait(bdev_inode(bdev)->i_mapping);
 }
 EXPORT_SYMBOL(sync_blockdev);
 
 int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend)
 {
-   return filemap_write_and_wait_range(bdev->bd_inode->i_mapping,
+   return filemap_write_and_wait_range(bdev_inode(bdev)->i_mapping,
lstart, lend);
 }
 EXPORT_

[PATCH block/for-next v2 13/16] jbd2: use new helper to get inode from block_device

2023-11-26 Thread Yu Kuai
From: Yu Kuai 

Which is more efficiency, and also prepare to remove the field
'bd_inode' from block_device.

Signed-off-by: Yu Kuai 
---
 fs/jbd2/journal.c  | 3 ++-
 fs/jbd2/recovery.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ed53188472f9..e2d034cc9dc0 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2003,7 +2003,8 @@ static int __jbd2_journal_erase(journal_t *journal, 
unsigned int flags)
byte_count = (block_stop - block_start + 1) *
journal->j_blocksize;
 
-   truncate_inode_pages_range(journal->j_dev->bd_inode->i_mapping,
+   truncate_inode_pages_range(
+   bdev_inode(journal->j_dev)->i_mapping,
byte_start, byte_stop);
 
if (flags & JBD2_JOURNAL_FLUSH_DISCARD) {
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 01f744cb97a4..7774efe872e8 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -309,7 +309,7 @@ int jbd2_journal_recover(journal_t *journal)
}
 
wb_err = 0;
-   mapping = journal->j_fs_dev->bd_inode->i_mapping;
+   mapping = bdev_inode(journal->j_fs_dev)->i_mapping;
errseq_check_and_advance(>wb_err, _err);
err = do_one_pass(journal, , PASS_SCAN);
if (!err)
-- 
2.39.2




[PATCH block/for-next v2 05/16] s390/dasd: use new helper to get inode from block_device

2023-11-26 Thread Yu Kuai
From: Yu Kuai 

Which is more efficiency, and also prepare to remove the field
'bd_inode' from block_device.

Signed-off-by: Yu Kuai 
---
 drivers/s390/block/dasd_ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 61b9675e2a67..a34554ace310 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -221,7 +221,7 @@ dasd_format(struct dasd_block *block, struct format_data_t 
*fdata)
 * enabling the device later.
 */
if (fdata->start_unit == 0) {
-   block->gdp->part0->bd_inode->i_blkbits =
+   bdev_inode(block->gdp->part0)->i_blkbits =
blksize_bits(fdata->blksize);
}
 
-- 
2.39.2




[PATCH block/for-next v2 15/16] buffer: use new helper to get inode from block_device

2023-11-26 Thread Yu Kuai
From: Yu Kuai 

Which is more efficiency, and also prepare to remove the field
'bd_inode' from block_device.

Signed-off-by: Yu Kuai 
---
 fs/buffer.c | 8 
 include/linux/buffer_head.h | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 967f34b70aa8..bf993198f881 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -189,7 +189,7 @@ EXPORT_SYMBOL(end_buffer_write_sync);
 static struct buffer_head *
 __find_get_block_slow(struct block_device *bdev, sector_t block)
 {
-   struct inode *bd_inode = bdev->bd_inode;
+   struct inode *bd_inode = bdev_inode(bdev);
struct address_space *bd_mapping = bd_inode->i_mapping;
struct buffer_head *ret = NULL;
pgoff_t index;
@@ -1032,7 +1032,7 @@ static int
 grow_dev_page(struct block_device *bdev, sector_t block,
  pgoff_t index, int size, int sizebits, gfp_t gfp)
 {
-   struct inode *inode = bdev->bd_inode;
+   struct inode *inode = bdev_inode(bdev);
struct folio *folio;
struct buffer_head *bh;
sector_t end_block;
@@ -1463,7 +1463,7 @@ __bread_gfp(struct block_device *bdev, sector_t block,
 {
struct buffer_head *bh;
 
-   gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
+   gfp |= mapping_gfp_constraint(bdev_inode(bdev)->i_mapping, ~__GFP_FS);
 
/*
 * Prefer looping in the allocator rather than here, at least that
@@ -1696,7 +1696,7 @@ EXPORT_SYMBOL(create_empty_buffers);
  */
 void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t 
len)
 {
-   struct inode *bd_inode = bdev->bd_inode;
+   struct inode *bd_inode = bdev_inode(bdev);
struct address_space *bd_mapping = bd_inode->i_mapping;
struct folio_batch fbatch;
pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 5f23ee599889..da9ee62e3aa9 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -341,7 +341,7 @@ static inline struct buffer_head *getblk_unmovable(struct 
block_device *bdev,
 {
gfp_t gfp;
 
-   gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
+   gfp = mapping_gfp_constraint(bdev_inode(bdev)->i_mapping, ~__GFP_FS);
gfp |= __GFP_NOFAIL;
 
return bdev_getblk(bdev, block, size, gfp);
@@ -352,7 +352,7 @@ static inline struct buffer_head *__getblk(struct 
block_device *bdev,
 {
gfp_t gfp;
 
-   gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS);
+   gfp = mapping_gfp_constraint(bdev_inode(bdev)->i_mapping, ~__GFP_FS);
gfp |= __GFP_MOVABLE | __GFP_NOFAIL;
 
return bdev_getblk(bdev, block, size, gfp);
-- 
2.39.2




[PATCH block/for-next v2 11/16] ext4: use new helper to get inode from block_device

2023-11-26 Thread Yu Kuai
From: Yu Kuai 

Which is more efficiency, and also prepare to remove the field
'bd_inode' from block_device.

Signed-off-by: Yu Kuai 
---
 fs/ext4/dir.c   | 2 +-
 fs/ext4/ext4_jbd2.c | 2 +-
 fs/ext4/super.c | 8 
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 3985f8c33f95..6e9fe408642b 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -192,7 +192,7 @@ static int ext4_readdir(struct file *file, struct 
dir_context *ctx)
(PAGE_SHIFT - inode->i_blkbits);
if (!ra_has_index(>f_ra, index))
page_cache_sync_readahead(
-   sb->s_bdev->bd_inode->i_mapping,
+   bdev_inode(sb->s_bdev)->i_mapping,
>f_ra, file,
index, 1);
file->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index d1a2e6624401..e0e7f71d022d 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -206,7 +206,7 @@ static void ext4_journal_abort_handle(const char *caller, 
unsigned int line,
 
 static void ext4_check_bdev_write_error(struct super_block *sb)
 {
-   struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
+   struct address_space *mapping = bdev_inode(sb->s_bdev)->i_mapping;
struct ext4_sb_info *sbi = EXT4_SB(sb);
int err;
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c5fcf377ab1f..da6af2205e55 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -244,7 +244,7 @@ static struct buffer_head *__ext4_sb_bread_gfp(struct 
super_block *sb,
 struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
   blk_opf_t op_flags)
 {
-   gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_inode->i_mapping,
+   gfp_t gfp = mapping_gfp_constraint(bdev_inode(sb->s_bdev)->i_mapping,
~__GFP_FS) | __GFP_MOVABLE;
 
return __ext4_sb_bread_gfp(sb, block, op_flags, gfp);
@@ -253,7 +253,7 @@ struct buffer_head *ext4_sb_bread(struct super_block *sb, 
sector_t block,
 struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
sector_t block)
 {
-   gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_inode->i_mapping,
+   gfp_t gfp = mapping_gfp_constraint(bdev_inode(sb->s_bdev)->i_mapping,
~__GFP_FS);
 
return __ext4_sb_bread_gfp(sb, block, 0, gfp);
@@ -502,7 +502,7 @@ static void ext4_maybe_update_superblock(struct super_block 
*sb)
  */
 static int block_device_ejected(struct super_block *sb)
 {
-   struct inode *bd_inode = sb->s_bdev->bd_inode;
+   struct inode *bd_inode = bdev_inode(sb->s_bdev);
struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
 
return bdi->dev == NULL;
@@ -5585,7 +5585,7 @@ static int __ext4_fill_super(struct fs_context *fc, 
struct super_block *sb)
 * used to detect the metadata async write error.
 */
spin_lock_init(>s_bdev_wb_lock);
-   errseq_check_and_advance(>s_bdev->bd_inode->i_mapping->wb_err,
+   errseq_check_and_advance(_inode(sb->s_bdev)->i_mapping->wb_err,
 >s_bdev_wb_err);
EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
ext4_orphan_cleanup(sb, es);
-- 
2.39.2




[PATCH block/for-next v2 02/16] xen/blkback: use new helper to get inode from block_device

2023-11-26 Thread Yu Kuai
From: Yu Kuai 

Which is more efficiency, and also prepare to remove the field
'bd_inode' from block_device.

Signed-off-by: Yu Kuai 
---
 drivers/block/xen-blkback/xenbus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/xen-blkback/xenbus.c 
b/drivers/block/xen-blkback/xenbus.c
index e34219ea2b05..e11f8123d213 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -105,7 +105,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
return;
}
invalidate_inode_pages2(
-   blkif->vbd.bdev_handle->bdev->bd_inode->i_mapping);
+   bdev_inode(blkif->vbd.bdev_handle->bdev)->i_mapping);
 
for (i = 0; i < blkif->nr_rings; i++) {
ring = >rings[i];
-- 
2.39.2




[PATCH block/for-next v2 07/16] bcachefs: use new helper to get inode from block_device

2023-11-26 Thread Yu Kuai
From: Yu Kuai 

Which is more efficiency, and also prepare to remove the field
'bd_inode' from block_device.

Signed-off-by: Yu Kuai 
---
 fs/bcachefs/util.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 2984b57b2958..fe7ccb3a3517 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -518,7 +518,7 @@ int bch2_bio_alloc_pages(struct bio *, size_t, gfp_t);
 
 static inline sector_t bdev_sectors(struct block_device *bdev)
 {
-   return bdev->bd_inode->i_size >> 9;
+   return bdev_inode(bdev)->i_size >> 9;
 }
 
 #define closure_bio_submit(bio, cl)\
-- 
2.39.2




[PATCH block/for-next v2 09/16] cramfs: use new helper to get inode from block_device

2023-11-26 Thread Yu Kuai
From: Yu Kuai 

Which is more efficiency, and also prepare to remove the field
'bd_inode' from block_device.

Signed-off-by: Yu Kuai 
---
 fs/cramfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 60dbfa0f8805..e9ed1e24c9e4 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -183,7 +183,7 @@ static int next_buffer;
 static void *cramfs_blkdev_read(struct super_block *sb, unsigned int offset,
unsigned int len)
 {
-   struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
+   struct address_space *mapping = bdev_inode(sb->s_bdev)->i_mapping;
struct file_ra_state ra = {};
struct page *pages[BLKS_PER_BUF];
unsigned i, blocknr, buffer;
-- 
2.39.2




[PATCH block/for-next v2 03/16] bcache: use new helper to get inode from block_device

2023-11-26 Thread Yu Kuai
From: Yu Kuai 

Which is more efficiency, and also prepare to remove the field
'bd_inode' from block_device.

Signed-off-by: Yu Kuai 
---
 drivers/md/bcache/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index bfe1685dbae5..3ab8bae049ee 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -171,7 +171,7 @@ static const char *read_super(struct cache_sb *sb, struct 
block_device *bdev,
struct page *page;
unsigned int i;
 
-   page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
+   page = read_cache_page_gfp(bdev_inode(bdev)->i_mapping,
   SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
if (IS_ERR(page))
return "IO error";
-- 
2.39.2




[PATCH block/for-next v2 08/16] btrfs: use new helper to get inode from block_device

2023-11-26 Thread Yu Kuai
From: Yu Kuai 

Which is more efficiency, and also prepare to remove the field
'bd_inode' from block_device.

Signed-off-by: Yu Kuai 
---
 fs/btrfs/disk-io.c | 6 +++---
 fs/btrfs/volumes.c | 4 ++--
 fs/btrfs/zoned.c   | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 401ea09ae4b8..88b20cd4d046 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3653,7 +3653,7 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct 
block_device *bdev,
struct btrfs_super_block *super;
struct page *page;
u64 bytenr, bytenr_orig;
-   struct address_space *mapping = bdev->bd_inode->i_mapping;
+   struct address_space *mapping = bdev_inode(bdev)->i_mapping;
int ret;
 
bytenr_orig = btrfs_sb_offset(copy_num);
@@ -3740,7 +3740,7 @@ static int write_dev_supers(struct btrfs_device *device,
struct btrfs_super_block *sb, int max_mirrors)
 {
struct btrfs_fs_info *fs_info = device->fs_info;
-   struct address_space *mapping = device->bdev->bd_inode->i_mapping;
+   struct address_space *mapping = bdev_inode(device->bdev)->i_mapping;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
int i;
int errors = 0;
@@ -3857,7 +3857,7 @@ static int wait_dev_supers(struct btrfs_device *device, 
int max_mirrors)
device->commit_total_bytes)
break;
 
-   page = find_get_page(device->bdev->bd_inode->i_mapping,
+   page = find_get_page(bdev_inode(device->bdev)->i_mapping,
 bytenr >> PAGE_SHIFT);
if (!page) {
errors++;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c6f16625af51..bbf157cedab7 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1257,8 +1257,8 @@ static struct btrfs_super_block 
*btrfs_read_disk_super(struct block_device *bdev
return ERR_PTR(-EINVAL);
 
/* pull in the page with our super */
-   page = read_cache_page_gfp(bdev->bd_inode->i_mapping, index, 
GFP_KERNEL);
-
+   page = read_cache_page_gfp(bdev_inode(bdev)->i_mapping, index,
+  GFP_KERNEL);
if (IS_ERR(page))
return ERR_CAST(page);
 
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 188378ca19c7..a5f7f1458edf 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -120,7 +120,7 @@ static int sb_write_pointer(struct block_device *bdev, 
struct blk_zone *zones,
return -ENOENT;
} else if (full[0] && full[1]) {
/* Compare two super blocks */
-   struct address_space *mapping = bdev->bd_inode->i_mapping;
+   struct address_space *mapping = bdev_inode(bdev)->i_mapping;
struct page *page[BTRFS_NR_SB_LOG_ZONES];
struct btrfs_super_block *super[BTRFS_NR_SB_LOG_ZONES];
int i;
-- 
2.39.2




[PATCH block/for-next v2 04/16] mtd: block2mtd: use new helper to get inode from block_device

2023-11-26 Thread Yu Kuai
From: Yu Kuai 

Which is more efficiency, and also prepare to remove the field
'bd_inode' from block_device.

Signed-off-by: Yu Kuai 
---
 drivers/mtd/devices/block2mtd.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index aa44a23ec045..d4f7a4339a70 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -56,7 +56,7 @@ static struct page *page_read(struct address_space *mapping, 
pgoff_t index)
 static int _block2mtd_erase(struct block2mtd_dev *dev, loff_t to, size_t len)
 {
struct address_space *mapping =
-   dev->bdev_handle->bdev->bd_inode->i_mapping;
+   bdev_inode(dev->bdev_handle->bdev)->i_mapping;
struct page *page;
pgoff_t index = to >> PAGE_SHIFT;   // page index
int pages = len >> PAGE_SHIFT;
@@ -107,7 +107,7 @@ static int block2mtd_read(struct mtd_info *mtd, loff_t 
from, size_t len,
 {
struct block2mtd_dev *dev = mtd->priv;
struct address_space *mapping =
-   dev->bdev_handle->bdev->bd_inode->i_mapping;
+   bdev_inode(dev->bdev_handle->bdev)->i_mapping;
struct page *page;
pgoff_t index = from >> PAGE_SHIFT;
int offset = from & (PAGE_SIZE-1);
@@ -143,7 +143,7 @@ static int _block2mtd_write(struct block2mtd_dev *dev, 
const u_char *buf,
 {
struct page *page;
struct address_space *mapping =
-   dev->bdev_handle->bdev->bd_inode->i_mapping;
+   bdev_inode(dev->bdev_handle->bdev)->i_mapping;
pgoff_t index = to >> PAGE_SHIFT;   // page index
int offset = to & ~PAGE_MASK;   // page offset
int cpylen;
@@ -212,7 +212,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
 
if (dev->bdev_handle) {
invalidate_mapping_pages(
-   dev->bdev_handle->bdev->bd_inode->i_mapping, 0, -1);
+   bdev_inode(dev->bdev_handle->bdev)->i_mapping, 0, -1);
bdev_release(dev->bdev_handle);
}
 
@@ -295,7 +295,7 @@ static struct block2mtd_dev *add_device(char *devname, int 
erase_size,
goto err_free_block2mtd;
}
 
-   if ((long)bdev->bd_inode->i_size % erase_size) {
+   if ((long)bdev_inode(bdev)->i_size % erase_size) {
pr_err("erasesize must be a divisor of device size\n");
goto err_free_block2mtd;
}
@@ -313,7 +313,7 @@ static struct block2mtd_dev *add_device(char *devname, int 
erase_size,
 
dev->mtd.name = name;
 
-   dev->mtd.size = bdev->bd_inode->i_size & PAGE_MASK;
+   dev->mtd.size = bdev_inode(bdev)->i_size & PAGE_MASK;
dev->mtd.erasesize = erase_size;
dev->mtd.writesize = 1;
dev->mtd.writebufsize = PAGE_SIZE;
-- 
2.39.2




[PATCH block/for-next v2 06/16] scsicam: use new helper to get inode from block_device

2023-11-26 Thread Yu Kuai
From: Yu Kuai 

Which is more efficiency, and also prepare to remove the field
'bd_inode' from block_device.

Signed-off-by: Yu Kuai 
---
 drivers/scsi/scsicam.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c
index e2c7d8ef205f..de40a5ef7d96 100644
--- a/drivers/scsi/scsicam.c
+++ b/drivers/scsi/scsicam.c
@@ -32,7 +32,7 @@
  */
 unsigned char *scsi_bios_ptable(struct block_device *dev)
 {
-   struct address_space *mapping = bdev_whole(dev)->bd_inode->i_mapping;
+   struct address_space *mapping = bdev_inode(bdev_whole(dev))->i_mapping;
unsigned char *res = NULL;
struct folio *folio;
 
-- 
2.39.2




[PATCH block/for-next v2 00/16] block: remove field 'bd_inode' from block_device

2023-11-26 Thread Yu Kuai
From: Yu Kuai 

Changes in v2:
 - split different portions into different patches, as greg k-h
 suggested.
 - use container_of() instead of "bdev + 1" to get the address of
 bd_inode in the new helper, as grep k-h suggested.

Yu Kuai (16):
  block: add a new helper to get inode from block_device
  xen/blkback: use new helper to get inode from block_device
  bcache: use new helper to get inode from block_device
  mtd: block2mtd: use new helper to get inode from block_device
  s390/dasd: use new helper to get inode from block_device
  scsicam: use new helper to get inode from block_device
  bcachefs: use new helper to get inode from block_device
  btrfs: use new helper to get inode from block_device
  cramfs: use new helper to get inode from block_device
  erofs: use new helper to get inode from block_device
  ext4: use new helper to get inode from block_device
  gfs2: use new helper to get inode from block_device
  jbd2: use new helper to get inode from block_device
  nilfs2: use new helper to get inode from block_device
  buffer: use new helper to get inode from block_device
  block: use new helper to get inode from block_device

 block/bdev.c   | 44 +++---
 block/blk-zoned.c  |  4 +--
 block/fops.c   |  4 +--
 block/genhd.c  |  8 +++---
 block/ioctl.c  |  8 +++---
 block/partitions/core.c|  9 +++---
 drivers/block/xen-blkback/xenbus.c |  2 +-
 drivers/md/bcache/super.c  |  2 +-
 drivers/mtd/devices/block2mtd.c| 12 
 drivers/s390/block/dasd_ioctl.c|  2 +-
 drivers/scsi/scsicam.c |  2 +-
 fs/bcachefs/util.h |  2 +-
 fs/btrfs/disk-io.c |  6 ++--
 fs/btrfs/volumes.c |  4 +--
 fs/btrfs/zoned.c   |  2 +-
 fs/buffer.c|  8 +++---
 fs/cramfs/inode.c  |  2 +-
 fs/erofs/data.c|  2 +-
 fs/ext4/dir.c  |  2 +-
 fs/ext4/ext4_jbd2.c|  2 +-
 fs/ext4/super.c|  8 +++---
 fs/gfs2/glock.c|  2 +-
 fs/gfs2/ops_fstype.c   |  2 +-
 fs/jbd2/journal.c  |  3 +-
 fs/jbd2/recovery.c |  2 +-
 fs/nilfs2/segment.c|  2 +-
 include/linux/blk_types.h  | 15 --
 include/linux/blkdev.h |  4 +--
 include/linux/buffer_head.h|  4 +--
 29 files changed, 91 insertions(+), 78 deletions(-)

-- 
2.39.2




[PATCH block/for-next v2 01/16] block: add a new helper to get inode from block_device

2023-11-26 Thread Yu Kuai
From: Yu Kuai 

block_devcie is allocated from bdev_alloc() by bdev_alloc_inode(), and
currently block_device contains a pointer that point to the address of
inode, while such inode is allocated together:

bdev_alloc
 inode = new_inode()
  // inode is _inode->vfs_inode
 bdev = I_BDEV(inode)
  // bdev is _inode->bdev
 bdev->inode = inode

Add a new helper to get address of inode from bdev by add operation
instead of memory access, which is more efficiency.

Signed-off-by: Yu Kuai 
---
 block/bdev.c  |  5 -
 include/linux/blk_types.h | 12 
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index e4cfb7adb645..7509389095b7 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -30,11 +30,6 @@
 #include "../fs/internal.h"
 #include "blk.h"
 
-struct bdev_inode {
-   struct block_device bdev;
-   struct inode vfs_inode;
-};
-
 static inline struct bdev_inode *BDEV_I(struct inode *inode)
 {
return container_of(inode, struct bdev_inode, vfs_inode);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d5c5e59ddbd2..06de8393dcd1 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -85,6 +85,18 @@ struct block_device {
 #define bdev_kobj(_bdev) \
(&((_bdev)->bd_device.kobj))
 
+struct bdev_inode {
+   struct block_device bdev;
+   struct inode vfs_inode;
+};
+
+static inline struct inode *bdev_inode(struct block_device *bdev)
+{
+   struct bdev_inode *bi = container_of(bdev, struct bdev_inode, bdev);
+
+   return >vfs_inode;
+}
+
 /*
  * Block error status values.  See block/blk-core:blk_errors for the details.
  * Alpha cannot write a byte atomically, so we need to use 32-bit value.
-- 
2.39.2




Re: [PATCH -next] block: remove field 'bd_inode' from block_device

2023-11-26 Thread Yu Kuai

Hi,

在 2023/11/25 22:32, Greg KH 写道:

On Sat, Nov 25, 2023 at 05:39:12PM +0800, Yu Kuai wrote:

From: Yu Kuai 

block_devcie is allocated from bdev_alloc() by bdev_alloc_inode(), and
currently block_device contains a pointer that point to the address of
inode, while such inode is allocated together:

bdev_alloc
  inode = new_inode()
   // inode is _inode->vfs_inode
  bdev = I_BDEV(inode)
   // bdev is _inode->bdev
  bdev->inode = inode

Add a new helper to get address of inode from bdev by add operation
instead of memory access, which is more efficiency. Also prepare to
add a new field 'bd_flags' in the first cacheline(64 bytes).

Signed-off-by: Yu Kuai 
---
  block/bdev.c   | 39 +-
  block/blk-zoned.c  |  4 +--
  block/fops.c   |  4 +--
  block/genhd.c  |  8 +++---
  block/ioctl.c  |  8 +++---
  block/partitions/core.c|  9 ---
  drivers/block/xen-blkback/xenbus.c |  2 +-
  drivers/md/bcache/super.c  |  2 +-
  drivers/mtd/devices/block2mtd.c| 12 -
  drivers/s390/block/dasd_ioctl.c|  2 +-
  drivers/scsi/scsicam.c |  2 +-
  fs/bcachefs/util.h |  2 +-
  fs/btrfs/disk-io.c |  6 ++---
  fs/btrfs/volumes.c |  4 +--
  fs/btrfs/zoned.c   |  2 +-
  fs/buffer.c|  8 +++---
  fs/cramfs/inode.c  |  2 +-
  fs/erofs/data.c|  2 +-
  fs/ext4/dir.c  |  2 +-
  fs/ext4/ext4_jbd2.c|  2 +-
  fs/ext4/super.c|  8 +++---
  fs/gfs2/glock.c|  2 +-
  fs/gfs2/ops_fstype.c   |  2 +-
  fs/jbd2/journal.c  |  3 ++-
  fs/jbd2/recovery.c |  2 +-
  fs/nilfs2/segment.c|  2 +-
  include/linux/blk_types.h  | 10 ++--
  include/linux/blkdev.h |  4 +--
  include/linux/buffer_head.h|  4 +--
  29 files changed, 86 insertions(+), 73 deletions(-)


You should do this as a patch series, add the helper function that does
nothing, convert all the different portions of the kernel as different
patches, and _then_ change the implementation of the block layer to
handle the change in the structure.

Otherwise this is going to be hard to get accepted.


Okay, thanks for the adivce, I'll do that in v2.

By the way, I was thinking that this patch is quite simple, and doesn't
worth spliting into 10+ patches,


Also, one note:


@@ -85,6 +84,13 @@ struct block_device {
  #define bdev_kobj(_bdev) \
(&((_bdev)->bd_device.kobj))
  
+static inline struct inode *bdev_inode(struct block_device *bdev)

+{
+   void *inode = bdev + 1;


That's crazy, if something changes, this will keep working yet the
kernel will break and no one will know why.

Please use container_of(), that's what it is there for, this exact type
of thing.  Or if not, are you just assuming that the memory location
right after bdev is the inode?  That's a tough assumption, how are you
going to assure it really stays there?


Struct bdev_inode never changes since commit 8fbd544cbca5 ("[PATCH]
bdev: add I_BDEV()") from 2004, and I think it won't change unless
there is a different way to manage lifetime of block_device.

And the 'bdev + 1' is copied from blk_mq_rq_to_pdu(), however, I aggre
that use container_of() is better and I will use it in v2.

Thanks,
Kuai



thanks,

greg k-h
.






[PATCH -next] block: remove field 'bd_inode' from block_device

2023-11-25 Thread Yu Kuai
From: Yu Kuai 

block_devcie is allocated from bdev_alloc() by bdev_alloc_inode(), and
currently block_device contains a pointer that point to the address of
inode, while such inode is allocated together:

bdev_alloc
 inode = new_inode()
  // inode is _inode->vfs_inode
 bdev = I_BDEV(inode)
  // bdev is _inode->bdev
 bdev->inode = inode

Add a new helper to get address of inode from bdev by add operation
instead of memory access, which is more efficiency. Also prepare to
add a new field 'bd_flags' in the first cacheline(64 bytes).

Signed-off-by: Yu Kuai 
---
 block/bdev.c   | 39 +-
 block/blk-zoned.c  |  4 +--
 block/fops.c   |  4 +--
 block/genhd.c  |  8 +++---
 block/ioctl.c  |  8 +++---
 block/partitions/core.c|  9 ---
 drivers/block/xen-blkback/xenbus.c |  2 +-
 drivers/md/bcache/super.c  |  2 +-
 drivers/mtd/devices/block2mtd.c| 12 -
 drivers/s390/block/dasd_ioctl.c|  2 +-
 drivers/scsi/scsicam.c |  2 +-
 fs/bcachefs/util.h |  2 +-
 fs/btrfs/disk-io.c |  6 ++---
 fs/btrfs/volumes.c |  4 +--
 fs/btrfs/zoned.c   |  2 +-
 fs/buffer.c|  8 +++---
 fs/cramfs/inode.c  |  2 +-
 fs/erofs/data.c|  2 +-
 fs/ext4/dir.c  |  2 +-
 fs/ext4/ext4_jbd2.c|  2 +-
 fs/ext4/super.c|  8 +++---
 fs/gfs2/glock.c|  2 +-
 fs/gfs2/ops_fstype.c   |  2 +-
 fs/jbd2/journal.c  |  3 ++-
 fs/jbd2/recovery.c |  2 +-
 fs/nilfs2/segment.c|  2 +-
 include/linux/blk_types.h  | 10 ++--
 include/linux/blkdev.h |  4 +--
 include/linux/buffer_head.h|  4 +--
 29 files changed, 86 insertions(+), 73 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index e4cfb7adb645..f27eb5588332 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -48,7 +48,7 @@ EXPORT_SYMBOL(I_BDEV);
 
 static void bdev_write_inode(struct block_device *bdev)
 {
-   struct inode *inode = bdev->bd_inode;
+   struct inode *inode = bdev_inode(bdev);
int ret;
 
spin_lock(>i_lock);
@@ -67,7 +67,7 @@ static void bdev_write_inode(struct block_device *bdev)
 /* Kill _all_ buffers and pagecache , dirty or not.. */
 static void kill_bdev(struct block_device *bdev)
 {
-   struct address_space *mapping = bdev->bd_inode->i_mapping;
+   struct address_space *mapping = bdev_inode(bdev)->i_mapping;
 
if (mapping_empty(mapping))
return;
@@ -79,7 +79,7 @@ static void kill_bdev(struct block_device *bdev)
 /* Invalidate clean unused buffers and pagecache. */
 void invalidate_bdev(struct block_device *bdev)
 {
-   struct address_space *mapping = bdev->bd_inode->i_mapping;
+   struct address_space *mapping = bdev_inode(bdev)->i_mapping;
 
if (mapping->nrpages) {
invalidate_bh_lrus();
@@ -107,7 +107,7 @@ int truncate_bdev_range(struct block_device *bdev, 
blk_mode_t mode,
goto invalidate;
}
 
-   truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend);
+   truncate_inode_pages_range(bdev_inode(bdev)->i_mapping, lstart, lend);
if (!(mode & BLK_OPEN_EXCL))
bd_abort_claiming(bdev, truncate_bdev_range);
return 0;
@@ -117,7 +117,7 @@ int truncate_bdev_range(struct block_device *bdev, 
blk_mode_t mode,
 * Someone else has handle exclusively open. Try invalidating instead.
 * The 'end' argument is inclusive so the rounding is safe.
 */
-   return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping,
+   return invalidate_inode_pages2_range(bdev_inode(bdev)->i_mapping,
 lstart >> PAGE_SHIFT,
 lend >> PAGE_SHIFT);
 }
@@ -125,18 +125,21 @@ int truncate_bdev_range(struct block_device *bdev, 
blk_mode_t mode,
 static void set_init_blocksize(struct block_device *bdev)
 {
unsigned int bsize = bdev_logical_block_size(bdev);
-   loff_t size = i_size_read(bdev->bd_inode);
+   struct inode *inode = bdev_inode(bdev);
+   loff_t size = i_size_read(inode);
 
while (bsize < PAGE_SIZE) {
if (size & bsize)
break;
bsize <<= 1;
}
-   bdev->bd_inode->i_blkbits = blksize_bits(bsize);
+   inode->i_blkbits = blksize_bits(bsize);
 }
 
 int set_blocksize(struct block_device *bdev, int size)
 {
+   struct inode *inode;
+
/* Size must be a power of two, and between 512 and PAGE_SIZE */
if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))