From: Zhang Yi <yi.zh...@huawei.com>

Add support for FALLOC_FL_WRITE_ZEROES. This first allocates blocks as
unwritten, then issues a zero command outside of the running journal
handle, and finally converts them to a written state.

Signed-off-by: Zhang Yi <yi.zh...@huawei.com>
---
 fs/ext4/extents.c           | 59 ++++++++++++++++++++++++++++++-------
 include/trace/events/ext4.h |  3 +-
 2 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c616a16a9f36..a147714403af 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4483,6 +4483,8 @@ static int ext4_alloc_file_blocks(struct file *file, 
ext4_lblk_t offset,
        struct ext4_map_blocks map;
        unsigned int credits;
        loff_t epos, old_size = i_size_read(inode);
+       unsigned int blkbits = inode->i_blkbits;
+       bool alloc_zero = false;
 
        BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
        map.m_lblk = offset;
@@ -4495,6 +4497,17 @@ static int ext4_alloc_file_blocks(struct file *file, 
ext4_lblk_t offset,
        if (len <= EXT_UNWRITTEN_MAX_LEN)
                flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
 
+       /*
+        * Do the actual write zero during a running journal transaction
+        * costs a lot. First allocate an unwritten extent and then
+        * convert it to written after zeroing it out.
+        */
+       if (flags & EXT4_GET_BLOCKS_ZERO) {
+               flags &= ~EXT4_GET_BLOCKS_ZERO;
+               flags |= EXT4_GET_BLOCKS_UNWRIT_EXT;
+               alloc_zero = true;
+       }
+
        /*
         * credits to insert 1 extent into extent tree
         */
@@ -4531,9 +4544,7 @@ static int ext4_alloc_file_blocks(struct file *file, 
ext4_lblk_t offset,
                 * allow a full retry cycle for any remaining allocations
                 */
                retries = 0;
-               map.m_lblk += ret;
-               map.m_len = len = len - ret;
-               epos = (loff_t)map.m_lblk << inode->i_blkbits;
+               epos = (loff_t)(map.m_lblk + ret) << blkbits;
                inode_set_ctime_current(inode);
                if (new_size) {
                        if (epos > new_size)
@@ -4553,6 +4564,21 @@ static int ext4_alloc_file_blocks(struct file *file, 
ext4_lblk_t offset,
                ret2 = ret3 ? ret3 : ret2;
                if (unlikely(ret2))
                        break;
+
+               if (alloc_zero &&
+                   (map.m_flags & (EXT4_MAP_MAPPED | EXT4_MAP_UNWRITTEN))) {
+                       ret2 = ext4_issue_zeroout(inode, map.m_lblk, map.m_pblk,
+                                                 map.m_len);
+                       if (likely(!ret2))
+                               ret2 = ext4_convert_unwritten_extents(NULL,
+                                       inode, (loff_t)map.m_lblk << blkbits,
+                                       (loff_t)map.m_len << blkbits);
+                       if (ret2)
+                               break;
+               }
+
+               map.m_lblk += ret;
+               map.m_len = len = len - ret;
        }
        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
                goto retry;
@@ -4618,7 +4644,11 @@ static long ext4_zero_range(struct file *file, loff_t 
offset,
        if (end_lblk > start_lblk) {
                ext4_lblk_t zero_blks = end_lblk - start_lblk;
 
-               flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | EXT4_EX_NOCACHE);
+               if (mode & FALLOC_FL_WRITE_ZEROES)
+                       flags = EXT4_GET_BLOCKS_CREATE_ZERO | EXT4_EX_NOCACHE;
+               else
+                       flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
+                                 EXT4_EX_NOCACHE);
                ret = ext4_alloc_file_blocks(file, start_lblk, zero_blks,
                                             new_size, flags);
                if (ret)
@@ -4730,8 +4760,8 @@ long ext4_fallocate(struct file *file, int mode, loff_t 
offset, loff_t len)
 
        /* Return error if mode is not supported */
        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
-                    FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
-                    FALLOC_FL_INSERT_RANGE))
+                    FALLOC_FL_ZERO_RANGE | FALLOC_FL_COLLAPSE_RANGE |
+                    FALLOC_FL_INSERT_RANGE | FALLOC_FL_WRITE_ZEROES))
                return -EOPNOTSUPP;
 
        inode_lock(inode);
@@ -4762,16 +4792,23 @@ long ext4_fallocate(struct file *file, int mode, loff_t 
offset, loff_t len)
        if (ret)
                goto out_invalidate_lock;
 
-       if (mode & FALLOC_FL_PUNCH_HOLE)
+       switch (mode & FALLOC_FL_MODE_MASK) {
+       case FALLOC_FL_PUNCH_HOLE:
                ret = ext4_punch_hole(file, offset, len);
-       else if (mode & FALLOC_FL_COLLAPSE_RANGE)
+               break;
+       case FALLOC_FL_COLLAPSE_RANGE:
                ret = ext4_collapse_range(file, offset, len);
-       else if (mode & FALLOC_FL_INSERT_RANGE)
+               break;
+       case FALLOC_FL_INSERT_RANGE:
                ret = ext4_insert_range(file, offset, len);
-       else if (mode & FALLOC_FL_ZERO_RANGE)
+               break;
+       case FALLOC_FL_ZERO_RANGE:
+       case FALLOC_FL_WRITE_ZEROES:
                ret = ext4_zero_range(file, offset, len, mode);
-       else
+               break;
+       default:
                ret = -EOPNOTSUPP;
+       }
 
 out_invalidate_lock:
        filemap_invalidate_unlock(mapping);
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 156908641e68..6f9cf2811733 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -92,7 +92,8 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
        { FALLOC_FL_KEEP_SIZE,          "KEEP_SIZE"},           \
        { FALLOC_FL_PUNCH_HOLE,         "PUNCH_HOLE"},          \
        { FALLOC_FL_COLLAPSE_RANGE,     "COLLAPSE_RANGE"},      \
-       { FALLOC_FL_ZERO_RANGE,         "ZERO_RANGE"})
+       { FALLOC_FL_ZERO_RANGE,         "ZERO_RANGE"},          \
+       { FALLOC_FL_WRITE_ZEROES,       "WRITE_ZEROES"})
 
 TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR);
 TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME);
-- 
2.46.1


Reply via email to