[f2fs-dev] [PATCH 6.1 7/8] block: fix race between set_blocksize and read paths

2025-10-21 Thread Mahmoud Adam via Linux-f2fs-devel
From: "Darrick J. Wong" 

commit c0e473a0d226479e8e925d5ba93f751d8df628e9 upstream.

With the new large sector size support, it's now the case that
set_blocksize can change i_blksize and the folio order in a manner that
conflicts with a concurrent reader and causes a kernel crash.

Specifically, let's say that udev-worker calls libblkid to detect the
labels on a block device.  The read call can create an order-0 folio to
read the first 4096 bytes from the disk.  But then udev is preempted.

Next, someone tries to mount an 8k-sectorsize filesystem from the same
block device.  The filesystem calls set_blksize, which sets i_blksize to
8192 and the minimum folio order to 1.

Now udev resumes, still holding the order-0 folio it allocated.  It then
tries to schedule a read bio and do_mpage_readahead tries to create
bufferheads for the folio.  Unfortunately, blocks_per_folio == 0 because
the page size is 4096 but the blocksize is 8192 so no bufferheads are
attached and the bh walk never sets bdev.  We then submit the bio with a
NULL block device and crash.

Therefore, truncate the page cache after flushing but before updating
i_blksize.  However, that's not enough -- we also need to lock out file
IO and page faults during the update.  Take both the i_rwsem and the
invalidate_lock in exclusive mode for invalidations, and in shared mode
for read/write operations.

I don't know if this is the correct fix, but xfs/259 found it.

Signed-off-by: Darrick J. Wong 
Reviewed-by: Christoph Hellwig 
Reviewed-by: Luis Chamberlain 
Tested-by: Shin'ichiro Kawasaki 
Link: 
https://lore.kernel.org/r/174543795699.4139148.2086129139322431423.stgit@frogsfrogsfrogs
Signed-off-by: Jens Axboe 
[use bdev->bd_inode instead & fix small contextual changes]
Signed-off-by: Mahmoud Adam 
---
 block/bdev.c  | 17 +
 block/blk-zoned.c |  5 -
 block/fops.c  | 16 
 block/ioctl.c |  6 ++
 4 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/block/bdev.c b/block/bdev.c
index b61502ec8da06c..5a631a0ca46a81 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -147,9 +147,26 @@ int set_blocksize(struct block_device *bdev, int size)
 
/* Don't change the size if it is same as current */
if (bdev->bd_inode->i_blkbits != blksize_bits(size)) {
+   /*
+* Flush and truncate the pagecache before we reconfigure the
+* mapping geometry because folio sizes are variable now.  If a
+* reader has already allocated a folio whose size is smaller
+* than the new min_order but invokes readahead after the new
+* min_order becomes visible, readahead will think there are
+* "zero" blocks per folio and crash.  Take the inode and
+* invalidation locks to avoid racing with
+* read/write/fallocate.
+*/
+   inode_lock(bdev->bd_inode);
+   filemap_invalidate_lock(bdev->bd_inode->i_mapping);
+
sync_blockdev(bdev);
+   kill_bdev(bdev);
+
bdev->bd_inode->i_blkbits = blksize_bits(size);
kill_bdev(bdev);
+   filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
+   inode_unlock(bdev->bd_inode);
}
return 0;
 }
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index db829401d8d0ca..ef72612ca4645f 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -417,6 +417,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, 
fmode_t mode,
op = REQ_OP_ZONE_RESET;
 
/* Invalidate the page cache, including dirty pages. */
+   inode_lock(bdev->bd_inode);
filemap_invalidate_lock(bdev->bd_inode->i_mapping);
ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
if (ret)
@@ -439,8 +440,10 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, 
fmode_t mode,
   GFP_KERNEL);
 
 fail:
-   if (cmd == BLKRESETZONE)
+   if (cmd == BLKRESETZONE) {
filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
+   inode_unlock(bdev->bd_inode);
+   }
 
return ret;
 }
diff --git a/block/fops.c b/block/fops.c
index fb7a57ed42d995..2fc6ac6679ee63 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -592,7 +592,14 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, 
struct iov_iter *from)
ret = direct_write_fallback(iocb, from, ret,
generic_perform_write(iocb, from));
} else {
+   /*
+* Take i_rwsem and invalidate_lock to avoid racing with
+* set_blocksize changing i_blkbits/folio order and punching
+* out the pagecache.
+*/
+   inode_lock_shared(bd_inode);
ret = generic_perform_write(iocb, from);
+   inode_unlock_sh

[f2fs-dev] [PATCH 6.1 6/8] block: open code __generic_file_write_iter for blkdev writes

2025-10-21 Thread Mahmoud Adam via Linux-f2fs-devel
From: Christoph Hellwig 

commit 727cfe976758b79f8d2f8051c75a5ccb14539a56 upstream.

Open code __generic_file_write_iter to remove the indirect call into
->direct_IO and to prepare using the iomap based write code.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Johannes Thumshirn 
Reviewed-by: Christian Brauner 
Reviewed-by: Hannes Reinecke 
Reviewed-by: Luis Chamberlain 
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Jens Axboe 
[fix contextual changes]
Signed-off-by: Mahmoud Adam 
---
 block/fops.c | 45 +++--
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/block/fops.c b/block/fops.c
index b02fe200c3ecd0..fb7a57ed42d995 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -515,6 +515,30 @@ static int blkdev_close(struct inode *inode, struct file 
*filp)
return 0;
 }
 
+static ssize_t
+blkdev_direct_write(struct kiocb *iocb, struct iov_iter *from)
+{
+   size_t count = iov_iter_count(from);
+   ssize_t written;
+
+   written = kiocb_invalidate_pages(iocb, count);
+   if (written) {
+   if (written == -EBUSY)
+   return 0;
+   return written;
+   }
+
+   written = blkdev_direct_IO(iocb, from);
+   if (written > 0) {
+   kiocb_invalidate_post_direct_write(iocb, count);
+   iocb->ki_pos += written;
+   count -= written;
+   }
+   if (written != -EIOCBQUEUED)
+   iov_iter_revert(from, count - iov_iter_count(from));
+   return written;
+}
+
 /*
  * Write data to the block device.  Only intended for the block device itself
  * and the raw driver which basically is a fake block device.
@@ -524,7 +548,8 @@ static int blkdev_close(struct inode *inode, struct file 
*filp)
  */
 static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
-   struct block_device *bdev = iocb->ki_filp->private_data;
+   struct file *file = iocb->ki_filp;
+   struct block_device *bdev = file->private_data;
struct inode *bd_inode = bdev->bd_inode;
loff_t size = bdev_nr_bytes(bdev);
struct blk_plug plug;
@@ -553,7 +578,23 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, 
struct iov_iter *from)
}
 
blk_start_plug(&plug);
-   ret = __generic_file_write_iter(iocb, from);
+   ret = file_remove_privs(file);
+   if (ret)
+   return ret;
+
+   ret = file_update_time(file);
+   if (ret)
+   return ret;
+
+   if (iocb->ki_flags & IOCB_DIRECT) {
+   ret = blkdev_direct_write(iocb, from);
+   if (ret >= 0 && iov_iter_count(from))
+   ret = direct_write_fallback(iocb, from, ret,
+   generic_perform_write(iocb, from));
+   } else {
+   ret = generic_perform_write(iocb, from);
+   }
+
if (ret > 0)
ret = generic_write_sync(iocb, ret);
iov_iter_reexpand(from, iov_iter_count(from) + shorted);
-- 
2.47.3




Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597



___
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH 6.1 8/8] nilfs2: fix deadlock warnings caused by lock dependency in init_nilfs()

2025-10-21 Thread Mahmoud Adam via Linux-f2fs-devel
From: Ryusuke Konishi 

commit fb881cd7604536b17a1927fb0533f9a6982ffcc5 upstream.

After commit c0e473a0d226 ("block: fix race between set_blocksize and read
paths") was merged, set_blocksize() called by sb_set_blocksize() now locks
the inode of the backing device file.  As a result of this change, syzbot
started reporting deadlock warnings due to a circular dependency involving
the semaphore "ns_sem" of the nilfs object, the inode lock of the backing
device file, and the locks that this inode lock is transitively dependent
on.

This is caused by a new lock dependency added by the above change, since
init_nilfs() calls sb_set_blocksize() in the lock section of "ns_sem".
However, these warnings are false positives because init_nilfs() is called
in the early stage of the mount operation and the filesystem has not yet
started.

The reason why "ns_sem" is locked in init_nilfs() was to avoid a race
condition in nilfs_fill_super() caused by sharing a nilfs object among
multiple filesystem instances (super block structures) in the early
implementation.  However, nilfs objects and super block structures have
long ago become one-to-one, and there is no longer any need to use the
semaphore there.

So, fix this issue by removing the use of the semaphore "ns_sem" in
init_nilfs().

Link: https://lkml.kernel.org/r/[email protected]
Fixes: c0e473a0d226 ("block: fix race between set_blocksize and read paths")
Signed-off-by: Ryusuke Konishi 
Reported-by: [email protected]
Closes: https://syzkaller.appspot.com/bug?extid=00f7f5b884b117ee6773
Tested-by: [email protected]
Reported-by: [email protected]
Closes: https://syzkaller.appspot.com/bug?extid=f30591e72bfc24d4715b
Tested-by: [email protected]>
Signed-off-by: Andrew Morton 
Signed-off-by: Mahmoud Adam 
---
 fs/nilfs2/the_nilfs.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index be41e26b782469..05fdbbc63e1f5f 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -680,8 +680,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block 
*sb, char *data)
int blocksize;
int err;
 
-   down_write(&nilfs->ns_sem);
-
blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE);
if (!blocksize) {
nilfs_err(sb, "unable to set blocksize");
@@ -757,7 +755,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block 
*sb, char *data)
set_nilfs_init(nilfs);
err = 0;
  out:
-   up_write(&nilfs->ns_sem);
return err;
 
  failed_sbh:
-- 
2.47.3




Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597



___
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH 6.1 4/8] fs: factor out a direct_write_fallback helper

2025-10-21 Thread Mahmoud Adam via Linux-f2fs-devel
From: Christoph Hellwig 

commit 44fff0fa08ec5a6d9d5fb05443a36d854d0ece4d upstream.

Add a helper dealing with handling the syncing of a buffered write
fallback for direct I/O.

Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Christoph Hellwig 
Reviewed-by: Damien Le Moal 
Reviewed-by: Miklos Szeredi 
Reviewed-by: Darrick J. Wong 
Cc: Al Viro 
Cc: Andreas Gruenbacher 
Cc: Anna Schumaker 
Cc: Chao Yu 
Cc: Christian Brauner 
Cc: Hannes Reinecke 
Cc: Ilya Dryomov 
Cc: Jaegeuk Kim 
Cc: Jens Axboe 
Cc: Johannes Thumshirn 
Cc: Matthew Wilcox 
Cc: Miklos Szeredi 
Cc: Theodore Ts'o 
Cc: Trond Myklebust 
Cc: Xiubo Li 
Signed-off-by: Andrew Morton 
[backing_dev_info still being used here. do small changes to the patch
to keep the out label. Which means replacing all returns to goto out.]
Signed-off-by: Mahmoud Adam 
---
 fs/libfs.c | 41 +++
 include/linux/fs.h |  2 ++
 mm/filemap.c   | 61 +++---
 3 files changed, 57 insertions(+), 47 deletions(-)

diff --git a/fs/libfs.c b/fs/libfs.c
index cbd42d76fbd018..a5bbe8e31d6616 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1582,3 +1582,44 @@ bool inode_maybe_inc_iversion(struct inode *inode, bool 
force)
return true;
 }
 EXPORT_SYMBOL(inode_maybe_inc_iversion);
+
+ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
+   ssize_t direct_written, ssize_t buffered_written)
+{
+   struct address_space *mapping = iocb->ki_filp->f_mapping;
+   loff_t pos = iocb->ki_pos - buffered_written;
+   loff_t end = iocb->ki_pos - 1;
+   int err;
+
+   /*
+* If the buffered write fallback returned an error, we want to return
+* the number of bytes which were written by direct I/O, or the error
+* code if that was zero.
+*
+* Note that this differs from normal direct-io semantics, which will
+* return -EFOO even if some bytes were written.
+*/
+   if (unlikely(buffered_written < 0)) {
+   if (direct_written)
+   return direct_written;
+   return buffered_written;
+   }
+
+   /*
+* We need to ensure that the page cache pages are written to disk and
+* invalidated to preserve the expected O_DIRECT semantics.
+*/
+   err = filemap_write_and_wait_range(mapping, pos, end);
+   if (err < 0) {
+   /*
+* We don't know how much we wrote, so just return the number of
+* bytes which were direct-written
+*/
+   if (direct_written)
+   return direct_written;
+   return err;
+   }
+   invalidate_mapping_pages(mapping, pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+   return direct_written + buffered_written;
+}
+EXPORT_SYMBOL_GPL(direct_write_fallback);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4e8a3e4f894c0f..02c83cd07d4f20 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3278,6 +3278,8 @@ extern ssize_t __generic_file_write_iter(struct kiocb *, 
struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
 ssize_t generic_perform_write(struct kiocb *, struct iov_iter *);
+ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
+   ssize_t direct_written, ssize_t buffered_written);
 
 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
rwf_t flags);
diff --git a/mm/filemap.c b/mm/filemap.c
index e2045266d2f2c9..b77f534dfad35a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3923,25 +3923,21 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, 
struct iov_iter *from)
 {
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
-   struct inode*inode = mapping->host;
-   ssize_t written = 0;
-   ssize_t err;
-   ssize_t status;
+   struct inode *inode = mapping->host;
+   ssize_t ret;
 
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);
-   err = file_remove_privs(file);
-   if (err)
+   ret = file_remove_privs(file);
+   if (ret)
goto out;
 
-   err = file_update_time(file);
-   if (err)
+   ret = file_update_time(file);
+   if (ret)
goto out;
 
if (iocb->ki_flags & IOCB_DIRECT) {
-   loff_t pos, endbyte;
-
-   written = generic_file_direct_write(iocb, from);
+   ret = generic_file_direct_write(iocb, from);
/*
 * If the write stopped short of completing, fall back to
 * buffered writes.  Some filesystems do this for writes to
@@ -3949,46 +3945,17 @@ ssize_t __gener

[f2fs-dev] [PATCH 6.1 2/8] filemap: add a kiocb_invalidate_post_direct_write helper

2025-10-21 Thread Mahmoud Adam via Linux-f2fs-devel
From: Christoph Hellwig 

commit c402a9a9430b670926decbb284b756ee6f47c1ec upstream.

Add a helper to invalidate page cache after a dio write.

Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Christoph Hellwig 
Reviewed-by: Damien Le Moal 
Reviewed-by: Hannes Reinecke 
Acked-by: Darrick J. Wong 
Cc: Al Viro 
Cc: Andreas Gruenbacher 
Cc: Anna Schumaker 
Cc: Chao Yu 
Cc: Christian Brauner 
Cc: Ilya Dryomov 
Cc: Jaegeuk Kim 
Cc: Jens Axboe 
Cc: Johannes Thumshirn 
Cc: Matthew Wilcox 
Cc: Miklos Szeredi 
Cc: Miklos Szeredi 
Cc: Theodore Ts'o 
Cc: Trond Myklebust 
Cc: Xiubo Li 
Signed-off-by: Andrew Morton 
Signed-off-by: Mahmoud Adam 
---
 fs/direct-io.c  | 10 ++
 fs/iomap/direct-io.c| 12 ++--
 include/linux/fs.h  |  5 -
 include/linux/pagemap.h |  1 +
 mm/filemap.c| 37 -
 5 files changed, 25 insertions(+), 40 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 03d381377ae10a..514042f12aea76 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -286,14 +286,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, 
unsigned int flags)
 * zeros from unwritten extents.
 */
if (flags & DIO_COMPLETE_INVALIDATE &&
-   ret > 0 && dio_op == REQ_OP_WRITE &&
-   dio->inode->i_mapping->nrpages) {
-   err = invalidate_inode_pages2_range(dio->inode->i_mapping,
-   offset >> PAGE_SHIFT,
-   (offset + ret - 1) >> PAGE_SHIFT);
-   if (err)
-   dio_warn_stale_pagecache(dio->iocb->ki_filp);
-   }
+   ret > 0 && dio_op == REQ_OP_WRITE)
+   kiocb_invalidate_post_direct_write(dio->iocb, ret);
 
inode_dio_end(dio->inode);
 
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 105c4a1d20a20b..9acfc9e847cdcb 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -81,7 +81,6 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
 {
const struct iomap_dio_ops *dops = dio->dops;
struct kiocb *iocb = dio->iocb;
-   struct inode *inode = file_inode(iocb->ki_filp);
loff_t offset = iocb->ki_pos;
ssize_t ret = dio->error;
 
@@ -108,15 +107,8 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
 * ->end_io() when necessary, otherwise a racing buffer read would cache
 * zeros from unwritten extents.
 */
-   if (!dio->error && dio->size &&
-   (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
-   int err;
-   err = invalidate_inode_pages2_range(inode->i_mapping,
-   offset >> PAGE_SHIFT,
-   (offset + dio->size - 1) >> PAGE_SHIFT);
-   if (err)
-   dio_warn_stale_pagecache(iocb->ki_filp);
-   }
+   if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE))
+   kiocb_invalidate_post_direct_write(iocb, dio->size);
 
inode_dio_end(file_inode(iocb->ki_filp));
if (ret > 0) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 48758ab2910087..4e8a3e4f894c0f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3371,11 +3371,6 @@ static inline void inode_dio_end(struct inode *inode)
wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
 }
 
-/*
- * Warn about a page cache invalidation failure diring a direct I/O write.
- */
-void dio_warn_stale_pagecache(struct file *filp);
-
 extern void inode_set_flags(struct inode *inode, unsigned int flags,
unsigned int mask);
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index bb462e5a91e28d..dfaa0990186716 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -31,6 +31,7 @@ int invalidate_inode_pages2(struct address_space *mapping);
 int invalidate_inode_pages2_range(struct address_space *mapping,
pgoff_t start, pgoff_t end);
 int kiocb_invalidate_pages(struct kiocb *iocb, size_t count);
+void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count);
 int write_inode_now(struct inode *, int sync);
 int filemap_fdatawrite(struct address_space *);
 int filemap_flush(struct address_space *);
diff --git a/mm/filemap.c b/mm/filemap.c
index 0923b8df285886..39484af4300e3c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3733,7 +3733,7 @@ EXPORT_SYMBOL(read_cache_page_gfp);
 /*
  * Warn about a page cache invalidation failure during a direct I/O write.
  */
-void dio_warn_stale_pagecache(struct file *filp)
+static void dio_warn_stale_pagecache(struct file *filp)
 {
static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST);
char pathname[128];
@@ -3750,19 +3750,23 @@ void dio_warn_stale_pagecache(struct file *filp)
}
 }
 
+void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count)
+{
+  

[f2fs-dev] [PATCH 6.1 3/8] filemap: update ki_pos in generic_perform_write

2025-10-21 Thread Mahmoud Adam via Linux-f2fs-devel
From: Christoph Hellwig 

commit 182c25e9c157f37bd0ab5a82fe2417e2223df459 upstream.

All callers of generic_perform_write need to updated ki_pos, move it into
common code.

Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Christoph Hellwig 
Reviewed-by: Xiubo Li 
Reviewed-by: Damien Le Moal 
Reviewed-by: Hannes Reinecke 
Acked-by: Theodore Ts'o 
Acked-by: Darrick J. Wong 
Cc: Al Viro 
Cc: Andreas Gruenbacher 
Cc: Anna Schumaker 
Cc: Chao Yu 
Cc: Christian Brauner 
Cc: Ilya Dryomov 
Cc: Jaegeuk Kim 
Cc: Jens Axboe 
Cc: Johannes Thumshirn 
Cc: Matthew Wilcox 
Cc: Miklos Szeredi 
Cc: Miklos Szeredi 
Cc: Trond Myklebust 
Signed-off-by: Andrew Morton 
Signed-off-by: Mahmoud Adam 
---
 fs/ceph/file.c | 2 --
 fs/ext4/file.c | 9 +++--
 fs/f2fs/file.c | 1 -
 fs/nfs/file.c  | 1 -
 mm/filemap.c   | 8 
 5 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 3336647e64df3a..5921bf278fff72 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1891,8 +1891,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct 
iov_iter *from)
 * can not run at the same time
 */
written = generic_perform_write(iocb, from);
-   if (likely(written >= 0))
-   iocb->ki_pos = pos + written;
ceph_end_io_write(inode);
}
 
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 289b088f4ae58f..e84a144f3f8ed5 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -287,12 +287,9 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
 
 out:
inode_unlock(inode);
-   if (likely(ret > 0)) {
-   iocb->ki_pos += ret;
-   ret = generic_write_sync(iocb, ret);
-   }
-
-   return ret;
+   if (unlikely(ret <= 0))
+   return ret;
+   return generic_write_sync(iocb, ret);
 }
 
 static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 5e2a0cb8d24d92..09b85d086d16a6 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4661,7 +4661,6 @@ static ssize_t f2fs_buffered_write_iter(struct kiocb 
*iocb,
current->backing_dev_info = NULL;
 
if (ret > 0) {
-   iocb->ki_pos += ret;
f2fs_update_iostat(F2FS_I_SB(inode), inode,
APP_BUFFERED_IO, ret);
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d8ec889a4b3f76..c1be636ef25729 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -645,7 +645,6 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter 
*from)
goto out;
 
written = result;
-   iocb->ki_pos += written;
nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
 
if (mntflags & NFS_MOUNT_WRITE_EAGER) {
diff --git a/mm/filemap.c b/mm/filemap.c
index 39484af4300e3c..e2045266d2f2c9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3891,7 +3891,10 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct 
iov_iter *i)
balance_dirty_pages_ratelimited(mapping);
} while (iov_iter_count(i));
 
-   return written ? written : status;
+   if (!written)
+   return status;
+   iocb->ki_pos += written;
+   return written;
 }
 EXPORT_SYMBOL(generic_perform_write);
 
@@ -3970,7 +3973,6 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, 
struct iov_iter *from)
endbyte = pos + status - 1;
err = filemap_write_and_wait_range(mapping, pos, endbyte);
if (err == 0) {
-   iocb->ki_pos = endbyte + 1;
written += status;
invalidate_mapping_pages(mapping,
 pos >> PAGE_SHIFT,
@@ -3983,8 +3985,6 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, 
struct iov_iter *from)
}
} else {
written = generic_perform_write(iocb, from);
-   if (likely(written > 0))
-   iocb->ki_pos += written;
}
 out:
current->backing_dev_info = NULL;
-- 
2.47.3




Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597



___
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH 6.1 5/8] direct_write_fallback(): on error revert the ->ki_pos update from buffered write

2025-10-21 Thread Mahmoud Adam via Linux-f2fs-devel
From: Al Viro 

commit 8287474aa5ffb41df52552c4ae4748e791d2faf2 upstream.

If we fail filemap_write_and_wait_range() on the range the buffered write went
into, we only report the "number of bytes which we direct-written", to quote
the comment in there.  Which is fine, but buffered write has already advanced
iocb->ki_pos, so we need to roll that back.  Otherwise we end up with e.g.
write(2) advancing position by more than the amount it reports having written.

Fixes: 182c25e9c157 "filemap: update ki_pos in generic_perform_write"
Signed-off-by: Al Viro 
Message-Id: <20230827214518.GU3390869@ZenIV>
Signed-off-by: Christian Brauner 
Signed-off-by: Mahmoud Adam 
---
 fs/libfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/libfs.c b/fs/libfs.c
index a5bbe8e31d6616..63bc52c20f7e03 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1615,6 +1615,7 @@ ssize_t direct_write_fallback(struct kiocb *iocb, struct 
iov_iter *iter,
 * We don't know how much we wrote, so just return the number of
 * bytes which were direct-written
 */
+   iocb->ki_pos -= buffered_written;
if (direct_written)
return direct_written;
return err;
-- 
2.47.3




Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597



___
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH 6.1 0/8] Backporting CVE-2025-38073 fix patch

2025-10-21 Thread Mahmoud Adam via Linux-f2fs-devel
This series aims to fix the CVE-2025-38073 for 6.1 LTS. Which is fixed
by c0e473a0d226 ("block: fix race between set_blocksize and read
paths"). This patch is built on top multiple refactors that where
merged on 6.6. The needed dependecies are:

  - e003f74afbd2 ("filemap: add a kiocb_invalidate_pages helper")
  - c402a9a9430b ("filemap: add a kiocb_invalidate_post_direct_write
helper")
  - 182c25e9c157 ("filemap: update ki_pos in generic_perform_write")
  - 44fff0fa08ec ("fs: factor out a direct_write_fallback helper")
  - 727cfe976758 ("block: open code __generic_file_write_iter for
blkdev writes")

Also backport follow up fixes:
- fb881cd76045 ("nilfs2: fix deadlock warnings caused by lock
  dependency in init_nilfs()").
- 8287474aa5ff ("direct_write_fallback(): on error revert the ->ki_pos
  update from buffered write")

Thanks,
MNAdam

Al Viro (1):
  direct_write_fallback(): on error revert the ->ki_pos update from
buffered write

Christoph Hellwig (5):
  filemap: add a kiocb_invalidate_pages helper
  filemap: add a kiocb_invalidate_post_direct_write helper
  filemap: update ki_pos in generic_perform_write
  fs: factor out a direct_write_fallback helper
  block: open code __generic_file_write_iter for blkdev writes

Darrick J. Wong (1):
  block: fix race between set_blocksize and read paths

Ryusuke Konishi (1):
  nilfs2: fix deadlock warnings caused by lock dependency in
init_nilfs()

 block/bdev.c|  17 +
 block/blk-zoned.c   |   5 +-
 block/fops.c|  61 +++-
 block/ioctl.c   |   6 ++
 fs/ceph/file.c  |   2 -
 fs/direct-io.c  |  10 +--
 fs/ext4/file.c  |   9 +--
 fs/f2fs/file.c  |   1 -
 fs/iomap/direct-io.c|  12 +---
 fs/libfs.c  |  42 +++
 fs/nfs/file.c   |   1 -
 fs/nilfs2/the_nilfs.c   |   3 -
 include/linux/fs.h  |   7 +-
 include/linux/pagemap.h |   2 +
 mm/filemap.c| 154 +---
 15 files changed, 205 insertions(+), 127 deletions(-)

-- 
2.47.3




Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597



___
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH 6.1 1/8] filemap: add a kiocb_invalidate_pages helper

2025-10-21 Thread Mahmoud Adam via Linux-f2fs-devel
From: Christoph Hellwig 

commit e003f74afbd2feadbb9ffbf9135e2d2fb5d320a5 upstream.

Factor out a helper that calls filemap_write_and_wait_range and
invalidate_inode_pages2_range for the range covered by a write kiocb or
returns -EAGAIN if the kiocb is marked as nowait and there would be pages
to write or invalidate.

Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Christoph Hellwig 
Reviewed-by: Damien Le Moal 
Reviewed-by: Hannes Reinecke 
Acked-by: Darrick J. Wong 
Cc: Al Viro 
Cc: Andreas Gruenbacher 
Cc: Anna Schumaker 
Cc: Chao Yu 
Cc: Christian Brauner 
Cc: Ilya Dryomov 
Cc: Jaegeuk Kim 
Cc: Jens Axboe 
Cc: Johannes Thumshirn 
Cc: Matthew Wilcox 
Cc: Miklos Szeredi 
Cc: Miklos Szeredi 
Cc: Theodore Ts'o 
Cc: Trond Myklebust 
Cc: Xiubo Li 
Signed-off-by: Andrew Morton 
Signed-off-by: Mahmoud Adam 
---
 include/linux/pagemap.h |  1 +
 mm/filemap.c| 48 -
 2 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 1be5a1fa6a3a84..bb462e5a91e28d 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -30,6 +30,7 @@ static inline void invalidate_remote_inode(struct inode 
*inode)
 int invalidate_inode_pages2(struct address_space *mapping);
 int invalidate_inode_pages2_range(struct address_space *mapping,
pgoff_t start, pgoff_t end);
+int kiocb_invalidate_pages(struct kiocb *iocb, size_t count);
 int write_inode_now(struct inode *, int sync);
 int filemap_fdatawrite(struct address_space *);
 int filemap_flush(struct address_space *);
diff --git a/mm/filemap.c b/mm/filemap.c
index 2ae6c6146d848a..0923b8df285886 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2839,6 +2839,33 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter 
*iter,
 }
 EXPORT_SYMBOL_GPL(filemap_read);
 
+int kiocb_invalidate_pages(struct kiocb *iocb, size_t count)
+{
+   struct address_space *mapping = iocb->ki_filp->f_mapping;
+   loff_t pos = iocb->ki_pos;
+   loff_t end = pos + count - 1;
+   int ret;
+
+   if (iocb->ki_flags & IOCB_NOWAIT) {
+   /* we could block if there are any pages in the range */
+   if (filemap_range_has_page(mapping, pos, end))
+   return -EAGAIN;
+   } else {
+   ret = filemap_write_and_wait_range(mapping, pos, end);
+   if (ret)
+   return ret;
+   }
+
+   /*
+* After a write we want buffered reads to be sure to go to disk to get
+* the new data.  We invalidate clean cached page from the region we're
+* about to write.  We do this *before* the write so that we can return
+* without clobbering -EIOCBQUEUED from ->direct_IO().
+*/
+   return invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT,
+end >> PAGE_SHIFT);
+}
+
 /**
  * generic_file_read_iter - generic filesystem read routine
  * @iocb:  kernel I/O control block
@@ -3737,30 +3764,11 @@ generic_file_direct_write(struct kiocb *iocb, struct 
iov_iter *from)
write_len = iov_iter_count(from);
end = (pos + write_len - 1) >> PAGE_SHIFT;
 
-   if (iocb->ki_flags & IOCB_NOWAIT) {
-   /* If there are pages to writeback, return */
-   if (filemap_range_has_page(file->f_mapping, pos,
-  pos + write_len - 1))
-   return -EAGAIN;
-   } else {
-   written = filemap_write_and_wait_range(mapping, pos,
-   pos + write_len - 1);
-   if (written)
-   goto out;
-   }
-
-   /*
-* After a write we want buffered reads to be sure to go to disk to get
-* the new data.  We invalidate clean cached page from the region we're
-* about to write.  We do this *before* the write so that we can return
-* without clobbering -EIOCBQUEUED from ->direct_IO().
-*/
-   written = invalidate_inode_pages2_range(mapping,
-   pos >> PAGE_SHIFT, end);
/*
 * If a page can not be invalidated, return 0 to fall back
 * to buffered write.
 */
+   written = kiocb_invalidate_pages(iocb, write_len);
if (written) {
if (written == -EBUSY)
return 0;
-- 
2.47.3




Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597



___
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel