Add initial support for FS_XFLAG_ATOMICWRITES for forcealign enabled.

Current kernel support for atomic writes is based on HW support (for atomic
writes). As such, it is required to ensure extent alignment with
atomic_write_unit_max so that an atomic write can result in a single
HW-compliant IO operation.

rtvol also guarantees extent alignment, but we are basing support initially
on forcealign, which is not supported for rtvol yet.

Signed-off-by: John Garry <john.g.ga...@oracle.com>
---
 fs/xfs/libxfs/xfs_format.h    | 11 ++++++--
 fs/xfs/libxfs/xfs_inode_buf.c | 52 +++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_inode_buf.h |  2 ++
 fs/xfs/libxfs/xfs_sb.c        |  2 ++
 fs/xfs/xfs_buf.c              | 15 +++++++++-
 fs/xfs/xfs_buf.h              |  4 ++-
 fs/xfs/xfs_buf_mem.c          |  2 +-
 fs/xfs/xfs_inode.c            |  9 ++++++
 fs/xfs/xfs_inode.h            |  5 ++++
 fs/xfs/xfs_ioctl.c            | 36 ++++++++++++++++++++++++
 fs/xfs/xfs_mount.h            |  2 ++
 fs/xfs/xfs_super.c            |  4 +++
 12 files changed, 139 insertions(+), 5 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 42e1f80206ab..e20880b6aff7 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -354,12 +354,16 @@ xfs_sb_has_compat_feature(
 #define XFS_SB_FEAT_RO_COMPAT_REFLINK  (1 << 2)                /* reflinked 
files */
 #define XFS_SB_FEAT_RO_COMPAT_INOBTCNT (1 << 3)                /* inobt block 
counts */
 #define XFS_SB_FEAT_RO_COMPAT_FORCEALIGN (1 << 30)     /* aligned file data 
extents */
+#define XFS_SB_FEAT_RO_COMPAT_ATOMICWRITES (1 << 31)   /* atomicwrites enabled 
*/
+
 #define XFS_SB_FEAT_RO_COMPAT_ALL \
                (XFS_SB_FEAT_RO_COMPAT_FINOBT | \
                 XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
                 XFS_SB_FEAT_RO_COMPAT_REFLINK| \
                 XFS_SB_FEAT_RO_COMPAT_INOBTCNT | \
-                XFS_SB_FEAT_RO_COMPAT_FORCEALIGN)
+                XFS_SB_FEAT_RO_COMPAT_FORCEALIGN | \
+                XFS_SB_FEAT_RO_COMPAT_ATOMICWRITES)
+
 #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN  ~XFS_SB_FEAT_RO_COMPAT_ALL
 static inline bool
 xfs_sb_has_ro_compat_feature(
@@ -1098,6 +1102,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode 
*dip, xfs_dev_t rdev)
 #define XFS_DIFLAG2_NREXT64_BIT 4      /* large extent counters */
 /* data extent mappings for regular files must be aligned to extent size hint 
*/
 #define XFS_DIFLAG2_FORCEALIGN_BIT 5
+#define XFS_DIFLAG2_ATOMICWRITES_BIT 6
 
 #define XFS_DIFLAG2_DAX                (1 << XFS_DIFLAG2_DAX_BIT)
 #define XFS_DIFLAG2_REFLINK     (1 << XFS_DIFLAG2_REFLINK_BIT)
@@ -1105,10 +1110,12 @@ static inline void xfs_dinode_put_rdev(struct 
xfs_dinode *dip, xfs_dev_t rdev)
 #define XFS_DIFLAG2_BIGTIME    (1 << XFS_DIFLAG2_BIGTIME_BIT)
 #define XFS_DIFLAG2_NREXT64    (1 << XFS_DIFLAG2_NREXT64_BIT)
 #define XFS_DIFLAG2_FORCEALIGN (1 << XFS_DIFLAG2_FORCEALIGN_BIT)
+#define XFS_DIFLAG2_ATOMICWRITES       (1 << XFS_DIFLAG2_ATOMICWRITES_BIT)
 
 #define XFS_DIFLAG2_ANY \
        (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
-        XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_FORCEALIGN)
+        XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_FORCEALIGN | \
+        XFS_DIFLAG2_ATOMICWRITES)
 
 static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip)
 {
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index b2c5f466c1a9..046e72481b60 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -178,7 +178,10 @@ xfs_inode_from_disk(
        struct xfs_inode        *ip,
        struct xfs_dinode       *from)
 {
+       struct xfs_buftarg      *target = xfs_inode_buftarg(ip);
        struct inode            *inode = VFS_I(ip);
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_sb           *sbp = &mp->m_sb;
        int                     error;
        xfs_failaddr_t          fa;
 
@@ -261,6 +264,13 @@ xfs_inode_from_disk(
        }
        if (xfs_is_reflink_inode(ip))
                xfs_ifork_init_cow(ip);
+
+       if (xfs_inode_has_atomicwrites(ip)) {
+               if (sbp->sb_blocksize < target->bt_bdev_awu_min ||
+                   sbp->sb_blocksize * ip->i_extsize > target->bt_bdev_awu_max)
+                       ip->i_diflags2 &= ~XFS_DIFLAG2_ATOMICWRITES;
+       }
+
        return 0;
 
 out_destroy_data_fork:
@@ -653,6 +663,13 @@ xfs_dinode_verify(
                        return fa;
        }
 
+       if (flags2 & XFS_DIFLAG2_ATOMICWRITES) {
+               fa = xfs_inode_validate_atomicwrites(mp,
+                       be32_to_cpu(dip->di_extsize), flags2);
+               if (fa)
+                       return fa;
+       }
+
        return NULL;
 }
 
@@ -864,3 +881,38 @@ xfs_inode_validate_forcealign(
 
        return NULL;
 }
+
+xfs_failaddr_t
+xfs_inode_validate_atomicwrites(
+       struct xfs_mount        *mp,
+       uint32_t                extsize,
+       uint64_t                flags2)
+{
+       /* superblock rocompat feature flag */
+       if (!xfs_has_atomicwrites(mp))
+               return __this_address;
+
+       /*
+        * forcealign is required, so rely on sanity checks in
+        * xfs_inode_validate_forcealign()
+        */
+       if (!(flags2 & XFS_DIFLAG2_FORCEALIGN))
+               return __this_address;
+
+       /* extsize must be a power-of-2 */
+       if (!is_power_of_2(extsize))
+               return __this_address;
+
+       /* Required to guarnatee data block alignment */
+       if (mp->m_sb.sb_agblocks % extsize)
+               return __this_address;
+
+       /* Requires stripe unit+width be a multiple of extsize */
+       if (mp->m_dalign && (mp->m_dalign % extsize))
+               return __this_address;
+
+       if (mp->m_swidth && (mp->m_swidth % extsize))
+               return __this_address;
+
+       return NULL;
+}
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index b8b65287b037..c7613b1a05f2 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -36,6 +36,8 @@ xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount 
*mp,
 xfs_failaddr_t xfs_inode_validate_forcealign(struct xfs_mount *mp,
                uint32_t extsize, uint32_t cowextsize, uint16_t mode,
                uint16_t flags, uint64_t flags2);
+xfs_failaddr_t xfs_inode_validate_atomicwrites(struct xfs_mount *mp,
+               uint32_t extsize, uint64_t flags2);
 
 static inline uint64_t xfs_inode_encode_bigtime(struct timespec64 tv)
 {
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 515f5dd23b6d..aee285899ffc 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -166,6 +166,8 @@ xfs_sb_version_to_features(
                features |= XFS_FEAT_INOBTCNT;
        if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FORCEALIGN)
                features |= XFS_FEAT_FORCEALIGN;
+       if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_ATOMICWRITES)
+               features |= XFS_FEAT_ATOMICWRITES;
        if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_FTYPE)
                features |= XFS_FEAT_FTYPE;
        if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index aa4dbda7b536..44bee3e2b2bb 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -2060,6 +2060,8 @@ int
 xfs_init_buftarg(
        struct xfs_buftarg              *btp,
        size_t                          logical_sectorsize,
+       unsigned int                    awu_min,
+       unsigned int                    awu_max,
        const char                      *descr)
 {
        /* Set up device logical sector size mask */
@@ -2086,6 +2088,9 @@ xfs_init_buftarg(
        btp->bt_shrinker->scan_objects = xfs_buftarg_shrink_scan;
        btp->bt_shrinker->private_data = btp;
        shrinker_register(btp->bt_shrinker);
+
+       btp->bt_bdev_awu_min = awu_min;
+       btp->bt_bdev_awu_max = awu_max;
        return 0;
 
 out_destroy_io_count:
@@ -2102,6 +2107,7 @@ xfs_alloc_buftarg(
 {
        struct xfs_buftarg      *btp;
        const struct dax_holder_operations *ops = NULL;
+       unsigned int awu_min = 0, awu_max = 0;
 
 #if defined(CONFIG_FS_DAX) && defined(CONFIG_MEMORY_FAILURE)
        ops = &xfs_dax_holder_operations;
@@ -2115,6 +2121,13 @@ xfs_alloc_buftarg(
        btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off,
                                            mp, ops);
 
+       if (bdev_can_atomic_write(btp->bt_bdev)) {
+               struct request_queue *q = bdev_get_queue(btp->bt_bdev);
+
+               awu_min = queue_atomic_write_unit_min_bytes(q);
+               awu_max = queue_atomic_write_unit_max_bytes(q);
+       }
+
        /*
         * When allocating the buftargs we have not yet read the super block and
         * thus don't know the file system sector size yet.
@@ -2122,7 +2135,7 @@ xfs_alloc_buftarg(
        if (xfs_setsize_buftarg(btp, bdev_logical_block_size(btp->bt_bdev)))
                goto error_free;
        if (xfs_init_buftarg(btp, bdev_logical_block_size(btp->bt_bdev),
-                       mp->m_super->s_id))
+                       awu_min, awu_max, mp->m_super->s_id))
                goto error_free;
 
        return btp;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index b1580644501f..3bcd8137d739 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -124,6 +124,8 @@ struct xfs_buftarg {
        struct percpu_counter   bt_io_count;
        struct ratelimit_state  bt_ioerror_rl;
 
+       unsigned int            bt_bdev_awu_min, bt_bdev_awu_max;
+
        /* built-in cache, if we're not using the perag one */
        struct xfs_buf_cache    bt_cache[];
 };
@@ -393,7 +395,7 @@ bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic);
 
 /* for xfs_buf_mem.c only: */
 int xfs_init_buftarg(struct xfs_buftarg *btp, size_t logical_sectorsize,
-               const char *descr);
+               unsigned int awu_min, unsigned int awu_max, const char *descr);
 void xfs_destroy_buftarg(struct xfs_buftarg *btp);
 
 #endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c
index 9bb2d24de709..af48a8da2f0f 100644
--- a/fs/xfs/xfs_buf_mem.c
+++ b/fs/xfs/xfs_buf_mem.c
@@ -93,7 +93,7 @@ xmbuf_alloc(
        btp->bt_meta_sectorsize = XMBUF_BLOCKSIZE;
        btp->bt_meta_sectormask = XMBUF_BLOCKSIZE - 1;
 
-       error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr);
+       error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, 0, 0, descr);
        if (error)
                goto out_bcache;
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 9c0ae5c3682e..e8d7ad4e6d38 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -610,6 +610,8 @@ xfs_ip2xflags(
                        flags |= FS_XFLAG_COWEXTSIZE;
                if (ip->i_diflags2 & XFS_DIFLAG2_FORCEALIGN)
                        flags |= FS_XFLAG_FORCEALIGN;
+               if (ip->i_diflags2 & XFS_DIFLAG2_ATOMICWRITES)
+                       flags |= FS_XFLAG_ATOMICWRITES;
        }
 
        if (xfs_inode_has_attr_fork(ip))
@@ -758,6 +760,13 @@ xfs_inode_inherit_flags2(
                if (failaddr)
                        ip->i_diflags2 &= ~XFS_DIFLAG2_FORCEALIGN;
        }
+
+       if (ip->i_diflags2 & XFS_DIFLAG2_ATOMICWRITES) {
+               failaddr = xfs_inode_validate_atomicwrites(ip->i_mount,
+                               ip->i_extsize, ip->i_diflags2);
+               if (failaddr)
+                       ip->i_diflags2 &= ~XFS_DIFLAG2_ATOMICWRITES;
+       }
 }
 
 /*
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 7005ea29bf11..1e8a8778ce0b 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -334,6 +334,11 @@ static inline bool xfs_inode_has_forcealign(struct 
xfs_inode *ip)
        return ip->i_diflags2 & XFS_DIFLAG2_FORCEALIGN;
 }
 
+static inline bool xfs_inode_has_atomicwrites(struct xfs_inode *ip)
+{
+       return ip->i_diflags2 & XFS_DIFLAG2_ATOMICWRITES;
+}
+
 /*
  * Decide if this file is a realtime file whose data allocation unit is larger
  * than a single filesystem block.
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 5eff8fd9fa3e..2877553b6151 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -527,6 +527,8 @@ xfs_flags2diflags2(
                di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
        if (xflags & FS_XFLAG_FORCEALIGN)
                di_flags2 |= XFS_DIFLAG2_FORCEALIGN;
+       if (xflags & FS_XFLAG_ATOMICWRITES)
+               di_flags2 |= XFS_DIFLAG2_ATOMICWRITES;
 
        return di_flags2;
 }
@@ -567,15 +569,45 @@ xfs_ioctl_setattr_forcealign(
        return 0;
 }
 
+static int
+xfs_ioctl_setattr_atomicwrites(
+       struct xfs_inode        *ip,
+       struct fileattr         *fa,
+       struct xfs_buftarg      *target)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_sb           *sbp = &mp->m_sb;
+
+       if (!xfs_has_atomicwrites(mp))
+               return -EINVAL;
+
+       if (!(fa->fsx_xflags & FS_XFLAG_FORCEALIGN))
+               return -EINVAL;
+
+       if (!is_power_of_2(fa->fsx_extsize))
+               return -EINVAL;
+
+       /* bdev can actually support atomic writes range required */
+       if (target->bt_bdev_awu_min > sbp->sb_blocksize)
+               return -EINVAL;
+
+       if (target->bt_bdev_awu_max < fa->fsx_extsize)
+               return -EINVAL;
+
+       return 0;
+}
+
 static int
 xfs_ioctl_setattr_xflags(
        struct xfs_trans        *tp,
        struct xfs_inode        *ip,
        struct fileattr         *fa)
 {
+       struct xfs_buftarg      *target = xfs_inode_buftarg(ip);
        struct xfs_mount        *mp = ip->i_mount;
        bool                    rtflag = (fa->fsx_xflags & FS_XFLAG_REALTIME);
        bool                    forcealign = fa->fsx_xflags & 
FS_XFLAG_FORCEALIGN;
+       bool                    atomicwrites = fa->fsx_xflags & 
FS_XFLAG_ATOMICWRITES;
        uint64_t                i_flags2;
 
        /* Can't change RT or forcealign flags if any extents are allocated. */
@@ -604,6 +636,10 @@ xfs_ioctl_setattr_xflags(
        if (forcealign && (xfs_ioctl_setattr_forcealign(ip, fa) < 0))
                return -EINVAL;
 
+       if (atomicwrites &&
+           (xfs_ioctl_setattr_atomicwrites(ip, fa, target) < 0))
+               return -EINVAL;
+
        ip->i_diflags = xfs_flags2diflags(ip, fa->fsx_xflags);
        ip->i_diflags2 = i_flags2;
 
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 30228fea908d..0c5a3ae3cdaf 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -300,6 +300,7 @@ typedef struct xfs_mount {
 #define XFS_FEAT_NREXT64       (1ULL << 26)    /* large extent counters */
 #define XFS_FEAT_EXCHANGE_RANGE        (1ULL << 27)    /* exchange range */
 #define XFS_FEAT_FORCEALIGN    (1ULL << 28)    /* aligned file data extents */
+#define XFS_FEAT_ATOMICWRITES  (1ULL << 29)    /* atomic writes support */
 
 /* Mount features */
 #define XFS_FEAT_NOATTR2       (1ULL << 48)    /* disable attr2 creation */
@@ -387,6 +388,7 @@ __XFS_HAS_V4_FEAT(v3inodes, V3INODES)
 __XFS_HAS_V4_FEAT(crc, CRC)
 __XFS_HAS_V4_FEAT(pquotino, PQUOTINO)
 __XFS_HAS_FEAT(forcealign, FORCEALIGN)
+__XFS_HAS_FEAT(atomicwrites, ATOMICWRITES)
 
 /*
  * Mount features
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 852bbfb21506..85799bd12e92 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1725,6 +1725,10 @@ xfs_fs_fill_super(
                xfs_warn(mp,
 "EXPERIMENTAL forced data extent alignment feature in use. Use at your own 
risk!");
 
+       if (xfs_has_atomicwrites(mp))
+               xfs_warn(mp,
+"EXPERIMENTAL atomicwrites feature in use. Use at your own risk!");
+
        if (xfs_has_reflink(mp)) {
                if (mp->m_sb.sb_rblocks) {
                        xfs_alert(mp,
-- 
2.31.1



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to