[f2fs-dev] [PATCH 6.1 7/8] block: fix race between set_blocksize and read paths
From: "Darrick J. Wong"
commit c0e473a0d226479e8e925d5ba93f751d8df628e9 upstream.
With the new large sector size support, it's now the case that
set_blocksize can change i_blksize and the folio order in a manner that
conflicts with a concurrent reader and causes a kernel crash.
Specifically, let's say that udev-worker calls libblkid to detect the
labels on a block device. The read call can create an order-0 folio to
read the first 4096 bytes from the disk. But then udev is preempted.
Next, someone tries to mount an 8k-sectorsize filesystem from the same
block device. The filesystem calls set_blksize, which sets i_blksize to
8192 and the minimum folio order to 1.
Now udev resumes, still holding the order-0 folio it allocated. It then
tries to schedule a read bio and do_mpage_readahead tries to create
bufferheads for the folio. Unfortunately, blocks_per_folio == 0 because
the page size is 4096 but the blocksize is 8192 so no bufferheads are
attached and the bh walk never sets bdev. We then submit the bio with a
NULL block device and crash.
Therefore, truncate the page cache after flushing but before updating
i_blksize. However, that's not enough -- we also need to lock out file
IO and page faults during the update. Take both the i_rwsem and the
invalidate_lock in exclusive mode for invalidations, and in shared mode
for read/write operations.
I don't know if this is the correct fix, but xfs/259 found it.
Signed-off-by: Darrick J. Wong
Reviewed-by: Christoph Hellwig
Reviewed-by: Luis Chamberlain
Tested-by: Shin'ichiro Kawasaki
Link:
https://lore.kernel.org/r/174543795699.4139148.2086129139322431423.stgit@frogsfrogsfrogs
Signed-off-by: Jens Axboe
[use bdev->bd_inode instead & fix small contextual changes]
Signed-off-by: Mahmoud Adam
---
block/bdev.c | 17 +
block/blk-zoned.c | 5 -
block/fops.c | 16
block/ioctl.c | 6 ++
4 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/block/bdev.c b/block/bdev.c
index b61502ec8da06c..5a631a0ca46a81 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -147,9 +147,26 @@ int set_blocksize(struct block_device *bdev, int size)
/* Don't change the size if it is same as current */
if (bdev->bd_inode->i_blkbits != blksize_bits(size)) {
+ /*
+* Flush and truncate the pagecache before we reconfigure the
+* mapping geometry because folio sizes are variable now. If a
+* reader has already allocated a folio whose size is smaller
+* than the new min_order but invokes readahead after the new
+* min_order becomes visible, readahead will think there are
+* "zero" blocks per folio and crash. Take the inode and
+* invalidation locks to avoid racing with
+* read/write/fallocate.
+*/
+ inode_lock(bdev->bd_inode);
+ filemap_invalidate_lock(bdev->bd_inode->i_mapping);
+
sync_blockdev(bdev);
+ kill_bdev(bdev);
+
bdev->bd_inode->i_blkbits = blksize_bits(size);
kill_bdev(bdev);
+ filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
+ inode_unlock(bdev->bd_inode);
}
return 0;
}
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index db829401d8d0ca..ef72612ca4645f 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -417,6 +417,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
fmode_t mode,
op = REQ_OP_ZONE_RESET;
/* Invalidate the page cache, including dirty pages. */
+ inode_lock(bdev->bd_inode);
filemap_invalidate_lock(bdev->bd_inode->i_mapping);
ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
if (ret)
@@ -439,8 +440,10 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
fmode_t mode,
GFP_KERNEL);
fail:
- if (cmd == BLKRESETZONE)
+ if (cmd == BLKRESETZONE) {
filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
+ inode_unlock(bdev->bd_inode);
+ }
return ret;
}
diff --git a/block/fops.c b/block/fops.c
index fb7a57ed42d995..2fc6ac6679ee63 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -592,7 +592,14 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb,
struct iov_iter *from)
ret = direct_write_fallback(iocb, from, ret,
generic_perform_write(iocb, from));
} else {
+ /*
+* Take i_rwsem and invalidate_lock to avoid racing with
+* set_blocksize changing i_blkbits/folio order and punching
+* out the pagecache.
+*/
+ inode_lock_shared(bd_inode);
ret = generic_perform_write(iocb, from);
+ inode_unlock_sh
[f2fs-dev] [PATCH 6.1 6/8] block: open code __generic_file_write_iter for blkdev writes
From: Christoph Hellwig commit 727cfe976758b79f8d2f8051c75a5ccb14539a56 upstream. Open code __generic_file_write_iter to remove the indirect call into ->direct_IO and to prepare using the iomap based write code. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Christian Brauner Reviewed-by: Hannes Reinecke Reviewed-by: Luis Chamberlain Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe [fix contextual changes] Signed-off-by: Mahmoud Adam --- block/fops.c | 45 +++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/block/fops.c b/block/fops.c index b02fe200c3ecd0..fb7a57ed42d995 100644 --- a/block/fops.c +++ b/block/fops.c @@ -515,6 +515,30 @@ static int blkdev_close(struct inode *inode, struct file *filp) return 0; } +static ssize_t +blkdev_direct_write(struct kiocb *iocb, struct iov_iter *from) +{ + size_t count = iov_iter_count(from); + ssize_t written; + + written = kiocb_invalidate_pages(iocb, count); + if (written) { + if (written == -EBUSY) + return 0; + return written; + } + + written = blkdev_direct_IO(iocb, from); + if (written > 0) { + kiocb_invalidate_post_direct_write(iocb, count); + iocb->ki_pos += written; + count -= written; + } + if (written != -EIOCBQUEUED) + iov_iter_revert(from, count - iov_iter_count(from)); + return written; +} + /* * Write data to the block device. Only intended for the block device itself * and the raw driver which basically is a fake block device. @@ -524,7 +548,8 @@ static int blkdev_close(struct inode *inode, struct file *filp) */ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) { - struct block_device *bdev = iocb->ki_filp->private_data; + struct file *file = iocb->ki_filp; + struct block_device *bdev = file->private_data; struct inode *bd_inode = bdev->bd_inode; loff_t size = bdev_nr_bytes(bdev); struct blk_plug plug; @@ -553,7 +578,23 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) } blk_start_plug(&plug); - ret = __generic_file_write_iter(iocb, from); + ret = file_remove_privs(file); + if (ret) + return ret; + + ret = file_update_time(file); + if (ret) + return ret; + + if (iocb->ki_flags & IOCB_DIRECT) { + ret = blkdev_direct_write(iocb, from); + if (ret >= 0 && iov_iter_count(from)) + ret = direct_write_fallback(iocb, from, ret, + generic_perform_write(iocb, from)); + } else { + ret = generic_perform_write(iocb, from); + } + if (ret > 0) ret = generic_write_sync(iocb, ret); iov_iter_reexpand(from, iov_iter_count(from) + shorted); -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597 ___ Linux-f2fs-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
[f2fs-dev] [PATCH 6.1 8/8] nilfs2: fix deadlock warnings caused by lock dependency in init_nilfs()
From: Ryusuke Konishi
commit fb881cd7604536b17a1927fb0533f9a6982ffcc5 upstream.
After commit c0e473a0d226 ("block: fix race between set_blocksize and read
paths") was merged, set_blocksize() called by sb_set_blocksize() now locks
the inode of the backing device file. As a result of this change, syzbot
started reporting deadlock warnings due to a circular dependency involving
the semaphore "ns_sem" of the nilfs object, the inode lock of the backing
device file, and the locks that this inode lock is transitively dependent
on.
This is caused by a new lock dependency added by the above change, since
init_nilfs() calls sb_set_blocksize() in the lock section of "ns_sem".
However, these warnings are false positives because init_nilfs() is called
in the early stage of the mount operation and the filesystem has not yet
started.
The reason why "ns_sem" is locked in init_nilfs() was to avoid a race
condition in nilfs_fill_super() caused by sharing a nilfs object among
multiple filesystem instances (super block structures) in the early
implementation. However, nilfs objects and super block structures have
long ago become one-to-one, and there is no longer any need to use the
semaphore there.
So, fix this issue by removing the use of the semaphore "ns_sem" in
init_nilfs().
Link: https://lkml.kernel.org/r/[email protected]
Fixes: c0e473a0d226 ("block: fix race between set_blocksize and read paths")
Signed-off-by: Ryusuke Konishi
Reported-by: [email protected]
Closes: https://syzkaller.appspot.com/bug?extid=00f7f5b884b117ee6773
Tested-by: [email protected]
Reported-by: [email protected]
Closes: https://syzkaller.appspot.com/bug?extid=f30591e72bfc24d4715b
Tested-by: [email protected]>
Signed-off-by: Andrew Morton
Signed-off-by: Mahmoud Adam
---
fs/nilfs2/the_nilfs.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index be41e26b782469..05fdbbc63e1f5f 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -680,8 +680,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block
*sb, char *data)
int blocksize;
int err;
- down_write(&nilfs->ns_sem);
-
blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE);
if (!blocksize) {
nilfs_err(sb, "unable to set blocksize");
@@ -757,7 +755,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block
*sb, char *data)
set_nilfs_init(nilfs);
err = 0;
out:
- up_write(&nilfs->ns_sem);
return err;
failed_sbh:
--
2.47.3
Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597
___
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
[f2fs-dev] [PATCH 6.1 4/8] fs: factor out a direct_write_fallback helper
From: Christoph Hellwig commit 44fff0fa08ec5a6d9d5fb05443a36d854d0ece4d upstream. Add a helper dealing with handling the syncing of a buffered write fallback for direct I/O. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Miklos Szeredi Reviewed-by: Darrick J. Wong Cc: Al Viro Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Chao Yu Cc: Christian Brauner Cc: Hannes Reinecke Cc: Ilya Dryomov Cc: Jaegeuk Kim Cc: Jens Axboe Cc: Johannes Thumshirn Cc: Matthew Wilcox Cc: Miklos Szeredi Cc: Theodore Ts'o Cc: Trond Myklebust Cc: Xiubo Li Signed-off-by: Andrew Morton [backing_dev_info still being used here. do small changes to the patch to keep the out label. Which means replacing all returns to goto out.] Signed-off-by: Mahmoud Adam --- fs/libfs.c | 41 +++ include/linux/fs.h | 2 ++ mm/filemap.c | 61 +++--- 3 files changed, 57 insertions(+), 47 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index cbd42d76fbd018..a5bbe8e31d6616 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1582,3 +1582,44 @@ bool inode_maybe_inc_iversion(struct inode *inode, bool force) return true; } EXPORT_SYMBOL(inode_maybe_inc_iversion); + +ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter, + ssize_t direct_written, ssize_t buffered_written) +{ + struct address_space *mapping = iocb->ki_filp->f_mapping; + loff_t pos = iocb->ki_pos - buffered_written; + loff_t end = iocb->ki_pos - 1; + int err; + + /* +* If the buffered write fallback returned an error, we want to return +* the number of bytes which were written by direct I/O, or the error +* code if that was zero. +* +* Note that this differs from normal direct-io semantics, which will +* return -EFOO even if some bytes were written. +*/ + if (unlikely(buffered_written < 0)) { + if (direct_written) + return direct_written; + return buffered_written; + } + + /* +* We need to ensure that the page cache pages are written to disk and +* invalidated to preserve the expected O_DIRECT semantics. +*/ + err = filemap_write_and_wait_range(mapping, pos, end); + if (err < 0) { + /* +* We don't know how much we wrote, so just return the number of +* bytes which were direct-written +*/ + if (direct_written) + return direct_written; + return err; + } + invalidate_mapping_pages(mapping, pos >> PAGE_SHIFT, end >> PAGE_SHIFT); + return direct_written + buffered_written; +} +EXPORT_SYMBOL_GPL(direct_write_fallback); diff --git a/include/linux/fs.h b/include/linux/fs.h index 4e8a3e4f894c0f..02c83cd07d4f20 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3278,6 +3278,8 @@ extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *); ssize_t generic_perform_write(struct kiocb *, struct iov_iter *); +ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter, + ssize_t direct_written, ssize_t buffered_written); ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags); diff --git a/mm/filemap.c b/mm/filemap.c index e2045266d2f2c9..b77f534dfad35a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3923,25 +3923,21 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; - struct inode*inode = mapping->host; - ssize_t written = 0; - ssize_t err; - ssize_t status; + struct inode *inode = mapping->host; + ssize_t ret; /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); - err = file_remove_privs(file); - if (err) + ret = file_remove_privs(file); + if (ret) goto out; - err = file_update_time(file); - if (err) + ret = file_update_time(file); + if (ret) goto out; if (iocb->ki_flags & IOCB_DIRECT) { - loff_t pos, endbyte; - - written = generic_file_direct_write(iocb, from); + ret = generic_file_direct_write(iocb, from); /* * If the write stopped short of completing, fall back to * buffered writes. Some filesystems do this for writes to @@ -3949,46 +3945,17 @@ ssize_t __gener
[f2fs-dev] [PATCH 6.1 2/8] filemap: add a kiocb_invalidate_post_direct_write helper
From: Christoph Hellwig commit c402a9a9430b670926decbb284b756ee6f47c1ec upstream. Add a helper to invalidate page cache after a dio write. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Acked-by: Darrick J. Wong Cc: Al Viro Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Chao Yu Cc: Christian Brauner Cc: Ilya Dryomov Cc: Jaegeuk Kim Cc: Jens Axboe Cc: Johannes Thumshirn Cc: Matthew Wilcox Cc: Miklos Szeredi Cc: Miklos Szeredi Cc: Theodore Ts'o Cc: Trond Myklebust Cc: Xiubo Li Signed-off-by: Andrew Morton Signed-off-by: Mahmoud Adam --- fs/direct-io.c | 10 ++ fs/iomap/direct-io.c| 12 ++-- include/linux/fs.h | 5 - include/linux/pagemap.h | 1 + mm/filemap.c| 37 - 5 files changed, 25 insertions(+), 40 deletions(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index 03d381377ae10a..514042f12aea76 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -286,14 +286,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) * zeros from unwritten extents. */ if (flags & DIO_COMPLETE_INVALIDATE && - ret > 0 && dio_op == REQ_OP_WRITE && - dio->inode->i_mapping->nrpages) { - err = invalidate_inode_pages2_range(dio->inode->i_mapping, - offset >> PAGE_SHIFT, - (offset + ret - 1) >> PAGE_SHIFT); - if (err) - dio_warn_stale_pagecache(dio->iocb->ki_filp); - } + ret > 0 && dio_op == REQ_OP_WRITE) + kiocb_invalidate_post_direct_write(dio->iocb, ret); inode_dio_end(dio->inode); diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 105c4a1d20a20b..9acfc9e847cdcb 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -81,7 +81,6 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) { const struct iomap_dio_ops *dops = dio->dops; struct kiocb *iocb = dio->iocb; - struct inode *inode = file_inode(iocb->ki_filp); loff_t offset = iocb->ki_pos; ssize_t ret = dio->error; @@ -108,15 +107,8 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) * ->end_io() when necessary, otherwise a racing buffer read would cache * zeros from unwritten extents. */ - if (!dio->error && dio->size && - (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { - int err; - err = invalidate_inode_pages2_range(inode->i_mapping, - offset >> PAGE_SHIFT, - (offset + dio->size - 1) >> PAGE_SHIFT); - if (err) - dio_warn_stale_pagecache(iocb->ki_filp); - } + if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE)) + kiocb_invalidate_post_direct_write(iocb, dio->size); inode_dio_end(file_inode(iocb->ki_filp)); if (ret > 0) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 48758ab2910087..4e8a3e4f894c0f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3371,11 +3371,6 @@ static inline void inode_dio_end(struct inode *inode) wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); } -/* - * Warn about a page cache invalidation failure diring a direct I/O write. - */ -void dio_warn_stale_pagecache(struct file *filp); - extern void inode_set_flags(struct inode *inode, unsigned int flags, unsigned int mask); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index bb462e5a91e28d..dfaa0990186716 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -31,6 +31,7 @@ int invalidate_inode_pages2(struct address_space *mapping); int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end); int kiocb_invalidate_pages(struct kiocb *iocb, size_t count); +void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count); int write_inode_now(struct inode *, int sync); int filemap_fdatawrite(struct address_space *); int filemap_flush(struct address_space *); diff --git a/mm/filemap.c b/mm/filemap.c index 0923b8df285886..39484af4300e3c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3733,7 +3733,7 @@ EXPORT_SYMBOL(read_cache_page_gfp); /* * Warn about a page cache invalidation failure during a direct I/O write. */ -void dio_warn_stale_pagecache(struct file *filp) +static void dio_warn_stale_pagecache(struct file *filp) { static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST); char pathname[128]; @@ -3750,19 +3750,23 @@ void dio_warn_stale_pagecache(struct file *filp) } } +void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count) +{ +
[f2fs-dev] [PATCH 6.1 3/8] filemap: update ki_pos in generic_perform_write
From: Christoph Hellwig commit 182c25e9c157f37bd0ab5a82fe2417e2223df459 upstream. All callers of generic_perform_write need to updated ki_pos, move it into common code. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Christoph Hellwig Reviewed-by: Xiubo Li Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Acked-by: Theodore Ts'o Acked-by: Darrick J. Wong Cc: Al Viro Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Chao Yu Cc: Christian Brauner Cc: Ilya Dryomov Cc: Jaegeuk Kim Cc: Jens Axboe Cc: Johannes Thumshirn Cc: Matthew Wilcox Cc: Miklos Szeredi Cc: Miklos Szeredi Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Mahmoud Adam --- fs/ceph/file.c | 2 -- fs/ext4/file.c | 9 +++-- fs/f2fs/file.c | 1 - fs/nfs/file.c | 1 - mm/filemap.c | 8 5 files changed, 7 insertions(+), 14 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 3336647e64df3a..5921bf278fff72 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1891,8 +1891,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) * can not run at the same time */ written = generic_perform_write(iocb, from); - if (likely(written >= 0)) - iocb->ki_pos = pos + written; ceph_end_io_write(inode); } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 289b088f4ae58f..e84a144f3f8ed5 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -287,12 +287,9 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb, out: inode_unlock(inode); - if (likely(ret > 0)) { - iocb->ki_pos += ret; - ret = generic_write_sync(iocb, ret); - } - - return ret; + if (unlikely(ret <= 0)) + return ret; + return generic_write_sync(iocb, ret); } static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5e2a0cb8d24d92..09b85d086d16a6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4661,7 +4661,6 @@ static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb, current->backing_dev_info = NULL; if (ret > 0) { - iocb->ki_pos += ret; f2fs_update_iostat(F2FS_I_SB(inode), inode, APP_BUFFERED_IO, ret); } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index d8ec889a4b3f76..c1be636ef25729 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -645,7 +645,6 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) goto out; written = result; - iocb->ki_pos += written; nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); if (mntflags & NFS_MOUNT_WRITE_EAGER) { diff --git a/mm/filemap.c b/mm/filemap.c index 39484af4300e3c..e2045266d2f2c9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3891,7 +3891,10 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i) balance_dirty_pages_ratelimited(mapping); } while (iov_iter_count(i)); - return written ? written : status; + if (!written) + return status; + iocb->ki_pos += written; + return written; } EXPORT_SYMBOL(generic_perform_write); @@ -3970,7 +3973,6 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) endbyte = pos + status - 1; err = filemap_write_and_wait_range(mapping, pos, endbyte); if (err == 0) { - iocb->ki_pos = endbyte + 1; written += status; invalidate_mapping_pages(mapping, pos >> PAGE_SHIFT, @@ -3983,8 +3985,6 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } } else { written = generic_perform_write(iocb, from); - if (likely(written > 0)) - iocb->ki_pos += written; } out: current->backing_dev_info = NULL; -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597 ___ Linux-f2fs-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
[f2fs-dev] [PATCH 6.1 5/8] direct_write_fallback(): on error revert the ->ki_pos update from buffered write
From: Al Viro commit 8287474aa5ffb41df52552c4ae4748e791d2faf2 upstream. If we fail filemap_write_and_wait_range() on the range the buffered write went into, we only report the "number of bytes which we direct-written", to quote the comment in there. Which is fine, but buffered write has already advanced iocb->ki_pos, so we need to roll that back. Otherwise we end up with e.g. write(2) advancing position by more than the amount it reports having written. Fixes: 182c25e9c157 "filemap: update ki_pos in generic_perform_write" Signed-off-by: Al Viro Message-Id: <20230827214518.GU3390869@ZenIV> Signed-off-by: Christian Brauner Signed-off-by: Mahmoud Adam --- fs/libfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/libfs.c b/fs/libfs.c index a5bbe8e31d6616..63bc52c20f7e03 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1615,6 +1615,7 @@ ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter, * We don't know how much we wrote, so just return the number of * bytes which were direct-written */ + iocb->ki_pos -= buffered_written; if (direct_written) return direct_written; return err; -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597 ___ Linux-f2fs-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
[f2fs-dev] [PATCH 6.1 0/8] Backporting CVE-2025-38073 fix patch
This series aims to fix the CVE-2025-38073 for 6.1 LTS. Which is fixed
by c0e473a0d226 ("block: fix race between set_blocksize and read
paths"). This patch is built on top multiple refactors that where
merged on 6.6. The needed dependecies are:
- e003f74afbd2 ("filemap: add a kiocb_invalidate_pages helper")
- c402a9a9430b ("filemap: add a kiocb_invalidate_post_direct_write
helper")
- 182c25e9c157 ("filemap: update ki_pos in generic_perform_write")
- 44fff0fa08ec ("fs: factor out a direct_write_fallback helper")
- 727cfe976758 ("block: open code __generic_file_write_iter for
blkdev writes")
Also backport follow up fixes:
- fb881cd76045 ("nilfs2: fix deadlock warnings caused by lock
dependency in init_nilfs()").
- 8287474aa5ff ("direct_write_fallback(): on error revert the ->ki_pos
update from buffered write")
Thanks,
MNAdam
Al Viro (1):
direct_write_fallback(): on error revert the ->ki_pos update from
buffered write
Christoph Hellwig (5):
filemap: add a kiocb_invalidate_pages helper
filemap: add a kiocb_invalidate_post_direct_write helper
filemap: update ki_pos in generic_perform_write
fs: factor out a direct_write_fallback helper
block: open code __generic_file_write_iter for blkdev writes
Darrick J. Wong (1):
block: fix race between set_blocksize and read paths
Ryusuke Konishi (1):
nilfs2: fix deadlock warnings caused by lock dependency in
init_nilfs()
block/bdev.c| 17 +
block/blk-zoned.c | 5 +-
block/fops.c| 61 +++-
block/ioctl.c | 6 ++
fs/ceph/file.c | 2 -
fs/direct-io.c | 10 +--
fs/ext4/file.c | 9 +--
fs/f2fs/file.c | 1 -
fs/iomap/direct-io.c| 12 +---
fs/libfs.c | 42 +++
fs/nfs/file.c | 1 -
fs/nilfs2/the_nilfs.c | 3 -
include/linux/fs.h | 7 +-
include/linux/pagemap.h | 2 +
mm/filemap.c| 154 +---
15 files changed, 205 insertions(+), 127 deletions(-)
--
2.47.3
Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597
___
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
[f2fs-dev] [PATCH 6.1 1/8] filemap: add a kiocb_invalidate_pages helper
From: Christoph Hellwig commit e003f74afbd2feadbb9ffbf9135e2d2fb5d320a5 upstream. Factor out a helper that calls filemap_write_and_wait_range and invalidate_inode_pages2_range for the range covered by a write kiocb or returns -EAGAIN if the kiocb is marked as nowait and there would be pages to write or invalidate. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Acked-by: Darrick J. Wong Cc: Al Viro Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Chao Yu Cc: Christian Brauner Cc: Ilya Dryomov Cc: Jaegeuk Kim Cc: Jens Axboe Cc: Johannes Thumshirn Cc: Matthew Wilcox Cc: Miklos Szeredi Cc: Miklos Szeredi Cc: Theodore Ts'o Cc: Trond Myklebust Cc: Xiubo Li Signed-off-by: Andrew Morton Signed-off-by: Mahmoud Adam --- include/linux/pagemap.h | 1 + mm/filemap.c| 48 - 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 1be5a1fa6a3a84..bb462e5a91e28d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -30,6 +30,7 @@ static inline void invalidate_remote_inode(struct inode *inode) int invalidate_inode_pages2(struct address_space *mapping); int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end); +int kiocb_invalidate_pages(struct kiocb *iocb, size_t count); int write_inode_now(struct inode *, int sync); int filemap_fdatawrite(struct address_space *); int filemap_flush(struct address_space *); diff --git a/mm/filemap.c b/mm/filemap.c index 2ae6c6146d848a..0923b8df285886 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2839,6 +2839,33 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, } EXPORT_SYMBOL_GPL(filemap_read); +int kiocb_invalidate_pages(struct kiocb *iocb, size_t count) +{ + struct address_space *mapping = iocb->ki_filp->f_mapping; + loff_t pos = iocb->ki_pos; + loff_t end = pos + count - 1; + int ret; + + if (iocb->ki_flags & IOCB_NOWAIT) { + /* we could block if there are any pages in the range */ + if (filemap_range_has_page(mapping, pos, end)) + return -EAGAIN; + } else { + ret = filemap_write_and_wait_range(mapping, pos, end); + if (ret) + return ret; + } + + /* +* After a write we want buffered reads to be sure to go to disk to get +* the new data. We invalidate clean cached page from the region we're +* about to write. We do this *before* the write so that we can return +* without clobbering -EIOCBQUEUED from ->direct_IO(). +*/ + return invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, +end >> PAGE_SHIFT); +} + /** * generic_file_read_iter - generic filesystem read routine * @iocb: kernel I/O control block @@ -3737,30 +3764,11 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) write_len = iov_iter_count(from); end = (pos + write_len - 1) >> PAGE_SHIFT; - if (iocb->ki_flags & IOCB_NOWAIT) { - /* If there are pages to writeback, return */ - if (filemap_range_has_page(file->f_mapping, pos, - pos + write_len - 1)) - return -EAGAIN; - } else { - written = filemap_write_and_wait_range(mapping, pos, - pos + write_len - 1); - if (written) - goto out; - } - - /* -* After a write we want buffered reads to be sure to go to disk to get -* the new data. We invalidate clean cached page from the region we're -* about to write. We do this *before* the write so that we can return -* without clobbering -EIOCBQUEUED from ->direct_IO(). -*/ - written = invalidate_inode_pages2_range(mapping, - pos >> PAGE_SHIFT, end); /* * If a page can not be invalidated, return 0 to fall back * to buffered write. */ + written = kiocb_invalidate_pages(iocb, write_len); if (written) { if (written == -EBUSY) return 0; -- 2.47.3 Amazon Web Services Development Center Germany GmbH Tamara-Danz-Str. 13 10243 Berlin Geschaeftsfuehrung: Christian Schlaeger Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597 ___ Linux-f2fs-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
