On 2018/07/12 10:25, Lu Fengqi wrote:
> From: Wang Xiaoguang <wangxg.f...@cn.fujitsu.com>
> 
> Add ioctl interface for inband deduplication, which includes:
> 1) enable
> 2) disable
> 3) status
> 
> And a pseudo RO compat flag, to imply that btrfs now supports inband
> dedup.
> However we don't add any ondisk format change, it's just a pseudo RO
> compat flag.
> 
> All these ioctl interfaces are state-less, which means caller don't need
> to bother previous dedupe state before calling them, and only need to
> care the final desired state.
> 
> For example, if user want to enable dedupe with specified block size and
> limit, just fill the ioctl structure and call enable ioctl.
> No need to check if dedupe is already running.
> 
> These ioctls will handle things like re-configure or disable quite well.
> 
> Also, for invalid parameters, enable ioctl interface will set the field
> of the first encountered invalid parameter to (-1) to inform caller.
> While for limit_nr/limit_mem, the value will be (0).
> 
> Signed-off-by: Qu Wenruo <quwen...@cn.fujitsu.com>
> Signed-off-by: Wang Xiaoguang <wangxg.f...@cn.fujitsu.com>
> Signed-off-by: Lu Fengqi <lufq.f...@cn.fujitsu.com>
> ---
>  fs/btrfs/dedupe.c          | 50 ++++++++++++++++++++++++++++
>  fs/btrfs/dedupe.h          | 17 +++++++---
>  fs/btrfs/disk-io.c         |  3 ++
>  fs/btrfs/ioctl.c           | 67 ++++++++++++++++++++++++++++++++++++++
>  fs/btrfs/sysfs.c           |  2 ++
>  include/uapi/linux/btrfs.h | 12 ++++++-
>  6 files changed, 145 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/btrfs/dedupe.c b/fs/btrfs/dedupe.c
> index 14c8d245480e..f068321fdd1c 100644
> --- a/fs/btrfs/dedupe.c
> +++ b/fs/btrfs/dedupe.c
> @@ -29,6 +29,35 @@ static inline struct inmem_hash *inmem_alloc_hash(u16 algo)
>                       GFP_NOFS);
>  }
>  
> +void btrfs_dedupe_status(struct btrfs_fs_info *fs_info,
> +                      struct btrfs_ioctl_dedupe_args *dargs)
> +{
> +     struct btrfs_dedupe_info *dedupe_info = fs_info->dedupe_info;
> +
> +     if (!fs_info->dedupe_enabled || !dedupe_info) {
> +             dargs->status = 0;
> +             dargs->blocksize = 0;
> +             dargs->backend = 0;
> +             dargs->hash_algo = 0;
> +             dargs->limit_nr = 0;
> +             dargs->current_nr = 0;
> +             memset(dargs->__unused, -1, sizeof(dargs->__unused));
> +             return;
> +     }
> +     mutex_lock(&dedupe_info->lock);
> +     dargs->status = 1;
> +     dargs->blocksize = dedupe_info->blocksize;
> +     dargs->backend = dedupe_info->backend;
> +     dargs->hash_algo = dedupe_info->hash_algo;
> +     dargs->limit_nr = dedupe_info->limit_nr;
> +     dargs->limit_mem = dedupe_info->limit_nr *
> +             (sizeof(struct inmem_hash) +
> +              btrfs_hash_sizes[dedupe_info->hash_algo]);
> +     dargs->current_nr = dedupe_info->current_nr;
> +     mutex_unlock(&dedupe_info->lock);
> +     memset(dargs->__unused, -1, sizeof(dargs->__unused));
> +}
> +
>  static int init_dedupe_info(struct btrfs_dedupe_info **ret_info,
>                           struct btrfs_ioctl_dedupe_args *dargs)
>  {
> @@ -409,6 +438,27 @@ static void unblock_all_writers(struct btrfs_fs_info 
> *fs_info)
>       percpu_up_write(sb->s_writers.rw_sem + SB_FREEZE_WRITE - 1);
>  }
>  
> +int btrfs_dedupe_cleanup(struct btrfs_fs_info *fs_info)
> +{
> +     struct btrfs_dedupe_info *dedupe_info;
> +
> +     fs_info->dedupe_enabled = 0;
> +     /* same as disable */
> +     smp_wmb();
> +     dedupe_info = fs_info->dedupe_info;
> +     fs_info->dedupe_info = NULL;
> +
> +     if (!dedupe_info)
> +             return 0;
> +
> +     if (dedupe_info->backend == BTRFS_DEDUPE_BACKEND_INMEMORY)
> +             inmem_destroy(dedupe_info);
> +
> +     crypto_free_shash(dedupe_info->dedupe_driver);
> +     kfree(dedupe_info);
> +     return 0;
> +}
> +
>  int btrfs_dedupe_disable(struct btrfs_fs_info *fs_info)
>  {
>       struct btrfs_dedupe_info *dedupe_info;
> diff --git a/fs/btrfs/dedupe.h b/fs/btrfs/dedupe.h
> index ebcbb89d79a0..85a87093ab04 100644
> --- a/fs/btrfs/dedupe.h
> +++ b/fs/btrfs/dedupe.h
> @@ -96,6 +96,15 @@ static inline struct btrfs_dedupe_hash 
> *btrfs_dedupe_alloc_hash(u16 algo)
>  int btrfs_dedupe_enable(struct btrfs_fs_info *fs_info,
>                       struct btrfs_ioctl_dedupe_args *dargs);
>  
> +
> +/*
> + * Get inband dedupe info
> + * Since it needs to access different backends' hash size, which
> + * is not exported, we need such simple function.
> + */
> +void btrfs_dedupe_status(struct btrfs_fs_info *fs_info,
> +                      struct btrfs_ioctl_dedupe_args *dargs);
> +
>  /*
>   * Disable dedupe and invalidate all its dedupe data.
>   * Called at dedupe disable time.
> @@ -107,12 +116,10 @@ int btrfs_dedupe_enable(struct btrfs_fs_info *fs_info,
>  int btrfs_dedupe_disable(struct btrfs_fs_info *fs_info);
>  
>  /*
> - * Get current dedupe status.
> - * Return 0 for success
> - * No possible error yet
> + * Cleanup current btrfs_dedupe_info
> + * Called in umount time
>   */
> -void btrfs_dedupe_status(struct btrfs_fs_info *fs_info,
> -                      struct btrfs_ioctl_dedupe_args *dargs);
> +int btrfs_dedupe_cleanup(struct btrfs_fs_info *fs_info);
>  
>  /*
>   * Calculate hash for dedupe.
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index cf0ddd5d8108..5f0397747832 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -40,6 +40,7 @@
>  #include "compression.h"
>  #include "tree-checker.h"
>  #include "ref-verify.h"
> +#include "dedupe.h"
>  
>  #ifdef CONFIG_X86
>  #include <asm/cpufeature.h>
> @@ -4026,6 +4027,8 @@ void close_ctree(struct btrfs_fs_info *fs_info)
>       btrfs_free_qgroup_config(fs_info);
>       ASSERT(list_empty(&fs_info->delalloc_roots));
>  
> +     btrfs_dedupe_cleanup(fs_info);
> +
>       if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
>               btrfs_info(fs_info, "at unmount delalloc count %lld",
>                      percpu_counter_sum(&fs_info->delalloc_bytes));
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index bd6498a9c924..a8220ae9fc29 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -3627,6 +3627,69 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, 
> u64 loff, u64 olen,
>       return olen;
>  }
>  
> +static long btrfs_ioctl_dedupe_ctl(struct btrfs_root *root, void __user 
> *args)
> +{
> +     struct btrfs_ioctl_dedupe_args *dargs;
> +     struct btrfs_fs_info *fs_info = root->fs_info;
> +     int ret = 0;
> +
> +     if (!capable(CAP_SYS_ADMIN))
> +             return -EPERM;
> +
> +     dargs = memdup_user(args, sizeof(*dargs));
> +     if (IS_ERR(dargs)) {
> +             ret = PTR_ERR(dargs);
> +             return ret;
> +     }
> +
> +     if (dargs->cmd >= BTRFS_DEDUPE_CTL_LAST) {
> +             ret = -EINVAL;
> +             goto out;
> +     }
> +     switch (dargs->cmd) {
> +     case BTRFS_DEDUPE_CTL_ENABLE:
> +             mutex_lock(&fs_info->dedupe_ioctl_lock);
> +             ret = btrfs_dedupe_enable(fs_info, dargs);
> +             /*
> +              * Also copy the result to caller for further use
> +              * if enable succeeded.
> +              * For error case, dargs is already set up with
> +              * special values indicating error reason.
> +              */
> +             if (!ret)
> +                     btrfs_dedupe_status(fs_info, dargs);
> +             mutex_unlock(&fs_info->dedupe_ioctl_lock);
> +             break;
> +     case BTRFS_DEDUPE_CTL_DISABLE:
> +             mutex_lock(&fs_info->dedupe_ioctl_lock);
> +             ret = btrfs_dedupe_disable(fs_info);
> +             btrfs_dedupe_status(fs_info, dargs);
> +             mutex_unlock(&fs_info->dedupe_ioctl_lock);
> +             break;
> +     case BTRFS_DEDUPE_CTL_STATUS:
> +             mutex_lock(&fs_info->dedupe_ioctl_lock);
> +             btrfs_dedupe_status(fs_info, dargs);
> +             mutex_unlock(&fs_info->dedupe_ioctl_lock);
> +             break;
> +     default:
> +             /*
> +              * Use this return value to inform progs that kernel
> +              * doesn't support such new command.
> +              */
> +             ret = -EOPNOTSUPP;
> +             goto out;
> +     }
> +     /*
> +      * All ioctl subcommand will modify user dargs,
> +      * Don't override return value unless copy fails
> +      */
> +     if (copy_to_user(args, dargs, sizeof(*dargs)))
> +             ret = -EFAULT;
> +out:
> +     kfree(dargs);
> +     return ret;
> +}
> +
>  static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
>                                    struct inode *inode,
>                                    u64 endoff,
> @@ -5961,6 +6024,10 @@ long btrfs_ioctl(struct file *file, unsigned int
>               return btrfs_ioctl_get_fslabel(file, argp);
>       case BTRFS_IOC_SET_FSLABEL:
>               return btrfs_ioctl_set_fslabel(file, argp);

> +#ifdef CONFIG_BTRFS_DEBUG

Is it better to use a different config symbol than CONFIG_BTRFS_DEBUG?
(For example, CONFIG_BTRFS_INBAND_DEDUPE)

Thanks,
Tsutomu

> +     case BTRFS_IOC_DEDUPE_CTL:
> +             return btrfs_ioctl_dedupe_ctl(root, argp);
> +#endif
>       case BTRFS_IOC_GET_SUPPORTED_FEATURES:
>               return btrfs_ioctl_get_supported_features(argp);
>       case BTRFS_IOC_GET_FEATURES:
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index 4a4e960c7c66..bb23b1222fdf 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -194,6 +194,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
>  BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
>  BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
>  BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
> +BTRFS_FEAT_ATTR_COMPAT_RO(dedupe, DEDUPE);
>  
>  static struct attribute *btrfs_supported_feature_attrs[] = {
>       BTRFS_FEAT_ATTR_PTR(mixed_backref),
> @@ -207,6 +208,7 @@ static struct attribute *btrfs_supported_feature_attrs[] 
> = {
>       BTRFS_FEAT_ATTR_PTR(skinny_metadata),
>       BTRFS_FEAT_ATTR_PTR(no_holes),
>       BTRFS_FEAT_ATTR_PTR(free_space_tree),
> +     BTRFS_FEAT_ATTR_PTR(dedupe),
>       NULL
>  };
>  
> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> index 77c9219f54fe..95286dc7e683 100644
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h
> @@ -252,6 +252,7 @@ struct btrfs_ioctl_fs_info_args {
>   * first mount when booting older kernel versions.
>   */
>  #define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID        (1ULL << 1)
> +#define BTRFS_FEATURE_COMPAT_RO_DEDUPE               (1ULL << 2)
>  
>  #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
>  #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL        (1ULL << 1)
> @@ -684,7 +685,14 @@ struct btrfs_ioctl_get_dev_stats {
>  
>  /* Default dedupe limit on number of hash */
>  #define BTRFS_DEDUPE_LIMIT_NR_DEFAULT        (32 * 1024)
> -
> +/*
> + * de-duplication control modes
> + * For re-config, re-enable will handle it
> + */
> +#define BTRFS_DEDUPE_CTL_ENABLE      1
> +#define BTRFS_DEDUPE_CTL_DISABLE 2
> +#define BTRFS_DEDUPE_CTL_STATUS      3
> +#define BTRFS_DEDUPE_CTL_LAST        4
>  /*
>   * This structure is used for dedupe enable/disable/configure
>   * and status ioctl.
> @@ -960,6 +968,8 @@ enum btrfs_err_code {
>                                   struct btrfs_ioctl_dev_replace_args)
>  #define BTRFS_IOC_FILE_EXTENT_SAME _IOWR(BTRFS_IOCTL_MAGIC, 54, \
>                                        struct btrfs_ioctl_same_args)
> +#define BTRFS_IOC_DEDUPE_CTL _IOWR(BTRFS_IOCTL_MAGIC, 55, \
> +                                   struct btrfs_ioctl_dedupe_args)
>  #define BTRFS_IOC_GET_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \
>                                  struct btrfs_ioctl_feature_flags)
>  #define BTRFS_IOC_SET_FEATURES _IOW(BTRFS_IOCTL_MAGIC, 57, \
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to