Re: [RFC 1/1] ext4: Optimize ext4 DAX overwrites
On 8/20/20 6:23 PM, Jan Kara wrote: On Thu 20-08-20 17:06:28, Ritesh Harjani wrote: Currently in case of DAX, we are starting a transaction everytime for IOMAP_WRITE case. This can be optimized away in case of an overwrite (where the blocks were already allocated). This could give a significant performance boost for multi-threaded random writes. Reported-by: Dan Williams Signed-off-by: Ritesh Harjani Thanks for returning to this and I'm glad to see how much this helped :) BTW, I'd suspect there could be also significant contention and cache line bouncing on j_state_lock and transaction's atomic counters... ok, will try and profile to see if this happens. --- fs/ext4/ext4.h | 1 + fs/ext4/file.c | 2 +- fs/ext4/inode.c | 8 +++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 42f5060f3cdf..9a2138afc751 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3232,6 +3232,7 @@ extern const struct dentry_operations ext4_dentry_ops; extern const struct inode_operations ext4_file_inode_operations; extern const struct file_operations ext4_file_operations; extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); +extern bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len); /* inline.c */ extern int ext4_get_max_inline_size(struct inode *inode); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 2a01e31a032c..51cd92ac1758 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -188,7 +188,7 @@ ext4_extending_io(struct inode *inode, loff_t offset, size_t len) } /* Is IO overwriting allocated and initialized blocks? */ -static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len) +bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len) { struct ext4_map_blocks map; unsigned int blkbits = inode->i_blkbits; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 10dd470876b3..f0ac0ee9e991 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3423,6 +3423,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, int ret; struct ext4_map_blocks map; u8 blkbits = inode->i_blkbits; + bool overwrite = false; if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK) return -EINVAL; @@ -3430,6 +3431,9 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, if (WARN_ON_ONCE(ext4_has_inline_data(inode))) return -ERANGE; + if (IS_DAX(inode) && (flags & IOMAP_WRITE) && + ext4_overwrite_io(inode, offset, length)) + overwrite = true; So the patch looks correct but using ext4_overwrite_io() seems a bit foolish since under the hood it does ext4_map_blocks() only to be able to decide whether to call ext4_map_blocks() once again with exactly the same arguments :). So I'd rather slightly refactor the code in ext4_iomap_begin() to avoid this double calling of ext4_map_blocks() for the fast path. Yes, agreed. Looking at the numbers I was excited to post out the RFC for discussion. Will make above changes and post. :) With DIO, we need to detect overwrite case early in ext4_dio_write_iter() to determine whether we need shared or excl. locks - so probably for DIO case we still need overwrite check in ext4_dio_write_iter() Thanks for review!! -ritesh Honza /* * Calculate the first and last logical blocks respectively. */ @@ -3437,13 +3441,15 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, map.m_len = min_t(loff_t, (offset + length - 1) >> blkbits, EXT4_MAX_LOGICAL_BLOCK) - map.m_lblk + 1; - if (flags & IOMAP_WRITE) + if ((flags & IOMAP_WRITE) && !overwrite) ret = ext4_iomap_alloc(inode, , flags); else ret = ext4_map_blocks(NULL, inode, , 0); if (ret < 0) return ret; + if (IS_DAX(inode) && overwrite) + WARN_ON(!(map.m_flags & EXT4_MAP_MAPPED)); ext4_set_iomap(inode, iomap, , offset, length); -- 2.25.4
Re: [RFC 1/1] ext4: Optimize ext4 DAX overwrites
On Thu 20-08-20 17:06:28, Ritesh Harjani wrote: > Currently in case of DAX, we are starting a transaction > everytime for IOMAP_WRITE case. This can be optimized > away in case of an overwrite (where the blocks were already > allocated). This could give a significant performance boost > for multi-threaded random writes. > > Reported-by: Dan Williams > Signed-off-by: Ritesh Harjani Thanks for returning to this and I'm glad to see how much this helped :) BTW, I'd suspect there could be also significant contention and cache line bouncing on j_state_lock and transaction's atomic counters... > --- > fs/ext4/ext4.h | 1 + > fs/ext4/file.c | 2 +- > fs/ext4/inode.c | 8 +++- > 3 files changed, 9 insertions(+), 2 deletions(-) > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index 42f5060f3cdf..9a2138afc751 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -3232,6 +3232,7 @@ extern const struct dentry_operations ext4_dentry_ops; > extern const struct inode_operations ext4_file_inode_operations; > extern const struct file_operations ext4_file_operations; > extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); > +extern bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len); > > /* inline.c */ > extern int ext4_get_max_inline_size(struct inode *inode); > diff --git a/fs/ext4/file.c b/fs/ext4/file.c > index 2a01e31a032c..51cd92ac1758 100644 > --- a/fs/ext4/file.c > +++ b/fs/ext4/file.c > @@ -188,7 +188,7 @@ ext4_extending_io(struct inode *inode, loff_t offset, > size_t len) > } > > /* Is IO overwriting allocated and initialized blocks? */ > -static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len) > +bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len) > { > struct ext4_map_blocks map; > unsigned int blkbits = inode->i_blkbits; > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index 10dd470876b3..f0ac0ee9e991 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -3423,6 +3423,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t > offset, loff_t length, > int ret; > struct ext4_map_blocks map; > u8 blkbits = inode->i_blkbits; > + bool overwrite = false; > > if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK) > return -EINVAL; > @@ -3430,6 +3431,9 @@ static int ext4_iomap_begin(struct inode *inode, loff_t > offset, loff_t length, > if (WARN_ON_ONCE(ext4_has_inline_data(inode))) > return -ERANGE; > > + if (IS_DAX(inode) && (flags & IOMAP_WRITE) && > + ext4_overwrite_io(inode, offset, length)) > + overwrite = true; So the patch looks correct but using ext4_overwrite_io() seems a bit foolish since under the hood it does ext4_map_blocks() only to be able to decide whether to call ext4_map_blocks() once again with exactly the same arguments :). So I'd rather slightly refactor the code in ext4_iomap_begin() to avoid this double calling of ext4_map_blocks() for the fast path. Honza > /* >* Calculate the first and last logical blocks respectively. >*/ > @@ -3437,13 +3441,15 @@ static int ext4_iomap_begin(struct inode *inode, > loff_t offset, loff_t length, > map.m_len = min_t(loff_t, (offset + length - 1) >> blkbits, > EXT4_MAX_LOGICAL_BLOCK) - map.m_lblk + 1; > > - if (flags & IOMAP_WRITE) > + if ((flags & IOMAP_WRITE) && !overwrite) > ret = ext4_iomap_alloc(inode, , flags); > else > ret = ext4_map_blocks(NULL, inode, , 0); > > if (ret < 0) > return ret; > + if (IS_DAX(inode) && overwrite) > + WARN_ON(!(map.m_flags & EXT4_MAP_MAPPED)); > > ext4_set_iomap(inode, iomap, , offset, length); > > -- > 2.25.4 > -- Jan Kara SUSE Labs, CR
[RFC 1/1] ext4: Optimize ext4 DAX overwrites
Currently in case of DAX, we are starting a transaction everytime for IOMAP_WRITE case. This can be optimized away in case of an overwrite (where the blocks were already allocated). This could give a significant performance boost for multi-threaded random writes. Reported-by: Dan Williams Signed-off-by: Ritesh Harjani --- fs/ext4/ext4.h | 1 + fs/ext4/file.c | 2 +- fs/ext4/inode.c | 8 +++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 42f5060f3cdf..9a2138afc751 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3232,6 +3232,7 @@ extern const struct dentry_operations ext4_dentry_ops; extern const struct inode_operations ext4_file_inode_operations; extern const struct file_operations ext4_file_operations; extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); +extern bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len); /* inline.c */ extern int ext4_get_max_inline_size(struct inode *inode); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 2a01e31a032c..51cd92ac1758 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -188,7 +188,7 @@ ext4_extending_io(struct inode *inode, loff_t offset, size_t len) } /* Is IO overwriting allocated and initialized blocks? */ -static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len) +bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len) { struct ext4_map_blocks map; unsigned int blkbits = inode->i_blkbits; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 10dd470876b3..f0ac0ee9e991 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3423,6 +3423,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, int ret; struct ext4_map_blocks map; u8 blkbits = inode->i_blkbits; + bool overwrite = false; if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK) return -EINVAL; @@ -3430,6 +3431,9 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, if (WARN_ON_ONCE(ext4_has_inline_data(inode))) return -ERANGE; + if (IS_DAX(inode) && (flags & IOMAP_WRITE) && + ext4_overwrite_io(inode, offset, length)) + overwrite = true; /* * Calculate the first and last logical blocks respectively. */ @@ -3437,13 +3441,15 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, map.m_len = min_t(loff_t, (offset + length - 1) >> blkbits, EXT4_MAX_LOGICAL_BLOCK) - map.m_lblk + 1; - if (flags & IOMAP_WRITE) + if ((flags & IOMAP_WRITE) && !overwrite) ret = ext4_iomap_alloc(inode, , flags); else ret = ext4_map_blocks(NULL, inode, , 0); if (ret < 0) return ret; + if (IS_DAX(inode) && overwrite) + WARN_ON(!(map.m_flags & EXT4_MAP_MAPPED)); ext4_set_iomap(inode, iomap, , offset, length); -- 2.25.4