This introduces add_dev_v2 ioctl to add a device as raid56 journal
device.  With the help of a journal device, raid56 is able to to get
rid of potential write holes.

Signed-off-by: Liu Bo <bo.li....@oracle.com>
---
 fs/btrfs/ctree.h                |  6 ++++++
 fs/btrfs/ioctl.c                | 48 ++++++++++++++++++++++++++++++++++++++++-
 fs/btrfs/raid56.c               | 42 ++++++++++++++++++++++++++++++++++++
 fs/btrfs/raid56.h               |  1 +
 fs/btrfs/volumes.c              | 26 ++++++++++++++++------
 fs/btrfs/volumes.h              |  3 ++-
 include/uapi/linux/btrfs.h      |  3 +++
 include/uapi/linux/btrfs_tree.h |  4 ++++
 8 files changed, 125 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 643c70d..d967627 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -697,6 +697,7 @@ struct btrfs_stripe_hash_table {
 void btrfs_init_async_reclaim_work(struct work_struct *work);
 
 /* fs_info */
+struct btrfs_r5l_log;
 struct reloc_control;
 struct btrfs_device;
 struct btrfs_fs_devices;
@@ -1114,6 +1115,9 @@ struct btrfs_fs_info {
        u32 nodesize;
        u32 sectorsize;
        u32 stripesize;
+
+       /* raid56 log */
+       struct btrfs_r5l_log *r5log;
 };
 
 static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
@@ -2932,6 +2936,8 @@ static inline int btrfs_need_cleaner_sleep(struct 
btrfs_fs_info *fs_info)
 
 static inline void free_fs_info(struct btrfs_fs_info *fs_info)
 {
+       if (fs_info->r5log)
+               kfree(fs_info->r5log);
        kfree(fs_info->balance_ctl);
        kfree(fs_info->delayed_root);
        kfree(fs_info->extent_root);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e176375..3d1ef4d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2653,6 +2653,50 @@ static int btrfs_ioctl_defrag(struct file *file, void 
__user *argp)
        return ret;
 }
 
+/* identical to btrfs_ioctl_add_dev, but this is with flags */
+static long btrfs_ioctl_add_dev_v2(struct btrfs_fs_info *fs_info, void __user 
*arg)
+{
+       struct btrfs_ioctl_vol_args_v2 *vol_args;
+       int ret;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
+               return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
+
+       mutex_lock(&fs_info->volume_mutex);
+       vol_args = memdup_user(arg, sizeof(*vol_args));
+       if (IS_ERR(vol_args)) {
+               ret = PTR_ERR(vol_args);
+               goto out;
+       }
+
+       if (vol_args->flags & BTRFS_DEVICE_RAID56_LOG &&
+           fs_info->r5log) {
+               ret = -EEXIST;
+               btrfs_info(fs_info, "r5log: attempting to add another log 
device!");
+               goto out_free;
+       }
+
+       vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+       ret = btrfs_init_new_device(fs_info, vol_args->name, vol_args->flags);
+       if (!ret) {
+               if (vol_args->flags & BTRFS_DEVICE_RAID56_LOG) {
+                       ASSERT(fs_info->r5log);
+                       btrfs_info(fs_info, "disk added %s as raid56 log", 
vol_args->name);
+               } else {
+                       btrfs_info(fs_info, "disk added %s", vol_args->name);
+               }
+       }
+out_free:
+       kfree(vol_args);
+out:
+       mutex_unlock(&fs_info->volume_mutex);
+       clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+       return ret;
+}
+
 static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user 
*arg)
 {
        struct btrfs_ioctl_vol_args *vol_args;
@@ -2672,7 +2716,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info 
*fs_info, void __user *arg)
        }
 
        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-       ret = btrfs_init_new_device(fs_info, vol_args->name);
+       ret = btrfs_init_new_device(fs_info, vol_args->name, 0);
 
        if (!ret)
                btrfs_info(fs_info, "disk added %s", vol_args->name);
@@ -5539,6 +5583,8 @@ long btrfs_ioctl(struct file *file, unsigned int
                return btrfs_ioctl_resize(file, argp);
        case BTRFS_IOC_ADD_DEV:
                return btrfs_ioctl_add_dev(fs_info, argp);
+       case BTRFS_IOC_ADD_DEV_V2:
+               return btrfs_ioctl_add_dev_v2(fs_info, argp);
        case BTRFS_IOC_RM_DEV:
                return btrfs_ioctl_rm_dev(file, argp);
        case BTRFS_IOC_RM_DEV_V2:
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index d8ea0eb..2b91b95 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -177,6 +177,25 @@ struct btrfs_raid_bio {
        unsigned long *dbitmap;
 };
 
+/* raid56 log */
+struct btrfs_r5l_log {
+       /* protect this struct and log io */
+       struct mutex io_mutex;
+
+       /* r5log device */
+       struct btrfs_device *dev;
+
+       /* allocation range for log entries */
+       u64 data_offset;
+       u64 device_size;
+
+       u64 last_checkpoint;
+       u64 last_cp_seq;
+       u64 seq;
+       u64 log_start;
+       struct btrfs_r5l_io_unit *current_io;
+};
+
 static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
 static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
 static void rmw_work(struct btrfs_work *work);
@@ -2715,3 +2734,26 @@ void raid56_submit_missing_rbio(struct btrfs_raid_bio 
*rbio)
        if (!lock_stripe_add(rbio))
                async_missing_raid56(rbio);
 }
+
+int btrfs_set_r5log(struct btrfs_fs_info *fs_info, struct btrfs_device *device)
+{
+       struct btrfs_r5l_log *log;
+
+       log = kzalloc(sizeof(*log), GFP_NOFS);
+       if (!log)
+               return -ENOMEM;
+
+       /* see find_free_dev_extent for 1M start offset */
+       log->data_offset = 1024ull * 1024;
+       log->device_size = btrfs_device_get_total_bytes(device) - 
log->data_offset;
+       log->device_size = round_down(log->device_size, PAGE_SIZE);
+       log->dev = device;
+       mutex_init(&log->io_mutex);
+
+       cmpxchg(&fs_info->r5log, NULL, log);
+       ASSERT(fs_info->r5log == log);
+
+       trace_printk("r5log: set a r5log in fs_info,  alloc_range 0x%llx 
0x%llx",
+                    log->data_offset, log->data_offset + log->device_size);
+       return 0;
+}
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index 4ee4fe3..0c8bf6a 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -65,4 +65,5 @@ void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio);
 
 int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
 void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
+int btrfs_set_r5log(struct btrfs_fs_info *fs_info, struct btrfs_device 
*device);
 #endif
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 017b67d..dafc541 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2313,7 +2313,7 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle 
*trans,
        return ret;
 }
 
-int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char 
*device_path)
+int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char 
*device_path, const u64 flags)
 {
        struct btrfs_root *root = fs_info->dev_root;
        struct request_queue *q;
@@ -2326,6 +2326,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
        u64 tmp;
        int seeding_dev = 0;
        int ret = 0;
+       bool is_r5log = (flags & BTRFS_DEVICE_RAID56_LOG);
+
+       if (is_r5log)
+               ASSERT(!fs_info->fs_devices->seeding);
 
        if ((sb->s_flags & MS_RDONLY) && !fs_info->fs_devices->seeding)
                return -EROFS;
@@ -2382,6 +2386,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
        q = bdev_get_queue(bdev);
        if (blk_queue_discard(q))
                device->can_discard = 1;
+       if (is_r5log)
+               device->type |= BTRFS_DEV_RAID56_LOG;
        device->writeable = 1;
        device->generation = trans->transid;
        device->io_width = fs_info->sectorsize;
@@ -2434,11 +2440,13 @@ int btrfs_init_new_device(struct btrfs_fs_info 
*fs_info, const char *device_path
        /* add sysfs device entry */
        btrfs_sysfs_add_device_link(fs_info->fs_devices, device);
 
-       /*
-        * we've got more storage, clear any full flags on the space
-        * infos
-        */
-       btrfs_clear_space_info_full(fs_info);
+       if (!is_r5log) {
+               /*
+                * we've got more storage, clear any full flags on the space
+                * infos
+                */
+               btrfs_clear_space_info_full(fs_info);
+       }
 
        mutex_unlock(&fs_info->chunk_mutex);
        mutex_unlock(&fs_info->fs_devices->device_list_mutex);
@@ -2459,6 +2467,12 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, 
const char *device_path
                goto error_trans;
        }
 
+       if (is_r5log) {
+               ret = btrfs_set_r5log(fs_info, device);
+               if (ret)
+                       goto error_trans;
+       }
+
        if (seeding_dev) {
                char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index c7d0fbc..60e347a 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -437,7 +437,8 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
 struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 
devid,
                                       u8 *uuid, u8 *fsid);
 int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
-int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
+int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path,
+                         const u64 flags);
 int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
                                  const char *device_path,
                                  struct btrfs_device *srcdev,
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index a456e53..be5991f 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -35,6 +35,7 @@ struct btrfs_ioctl_vol_args {
 #define BTRFS_DEVICE_PATH_NAME_MAX 1024
 
 #define BTRFS_DEVICE_SPEC_BY_ID                (1ULL << 3)
+#define BTRFS_DEVICE_RAID56_LOG                (1ULL << 4)
 
 #define BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED               \
                        (BTRFS_SUBVOL_CREATE_ASYNC |    \
@@ -818,5 +819,7 @@ enum btrfs_err_code {
                                   struct btrfs_ioctl_feature_flags[3])
 #define BTRFS_IOC_RM_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 58, \
                                   struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_ADD_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 59, \
+                                  struct btrfs_ioctl_vol_args_v2)
 
 #endif /* _UAPI_LINUX_BTRFS_H */
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index 10689e1..52fed59 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -347,6 +347,10 @@ struct btrfs_key {
        __u64 offset;
 } __attribute__ ((__packed__));
 
+/* dev_item.type */
+/* #define BTRFS_DEV_REGULAR   0 */
+#define BTRFS_DEV_RAID56_LOG   (1ULL << 0)
+
 struct btrfs_dev_item {
        /* the internal btrfs device id */
        __le64 devid;
-- 
2.9.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to