On Wed, Aug 04, 2021 at 03:14:34PM +0800, Chao Yu wrote:
> On 2021/7/31 3:46, Gao Xiang wrote:
> > DAX is quite useful for some VM use cases in order to save guest
> > memory extremely with minimal lightweight EROFS.
> > 
> > In order to prepare for such use cases, add preliminary dax support
> > for non-tailpacking regular files for now.
> > 
> > Tested with the DRAM-emulated PMEM and the EROFS image generated by
> > "mkfs.erofs -Enoinline_data enwik9.fsdax.img enwik9"
> > 
> > Cc: nvd...@lists.linux.dev
> > Cc: linux-fsde...@vger.kernel.org
> > Signed-off-by: Gao Xiang <hsiang...@linux.alibaba.com>
> > ---
> >   fs/erofs/data.c     | 42 +++++++++++++++++++++++++++++--
> >   fs/erofs/inode.c    |  4 +++
> >   fs/erofs/internal.h |  3 +++
> >   fs/erofs/super.c    | 60 +++++++++++++++++++++++++++++++++++++++++++--
> >   4 files changed, 105 insertions(+), 4 deletions(-)
> > 
> > diff --git a/fs/erofs/data.c b/fs/erofs/data.c
> > index 1f97151a9f90..911521293b20 100644
> > --- a/fs/erofs/data.c
> > +++ b/fs/erofs/data.c
> > @@ -6,7 +6,7 @@
> >   #include "internal.h"
> >   #include <linux/prefetch.h>
> >   #include <linux/iomap.h>
> > -
> > +#include <linux/dax.h>
> >   #include <trace/events/erofs.h>
> >   static void erofs_readendio(struct bio *bio)
> > @@ -323,6 +323,7 @@ static int erofs_iomap_begin(struct inode *inode, 
> > loff_t offset, loff_t length,
> >             return ret;
> >     iomap->bdev = inode->i_sb->s_bdev;
> > +   iomap->dax_dev = EROFS_I_SB(inode)->dax_dev;
> >     iomap->offset = map.m_la;
> >     iomap->length = map.m_llen;
> >     iomap->flags = 0;
> > @@ -382,6 +383,10 @@ static ssize_t erofs_file_read_iter(struct kiocb 
> > *iocb, struct iov_iter *to)
> >     if (!iov_iter_count(to))
> >             return 0;
> > +#ifdef CONFIG_FS_DAX
> > +   if (IS_DAX(iocb->ki_filp->f_mapping->host))
> > +           return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
> > +#endif
> >     if (iocb->ki_flags & IOCB_DIRECT) {
> >             int err = erofs_prepare_dio(iocb, to);
> > @@ -410,9 +415,42 @@ const struct address_space_operations 
> > erofs_raw_access_aops = {
> >     .direct_IO = noop_direct_IO,
> >   };
> > +#ifdef CONFIG_FS_DAX
> > +static vm_fault_t erofs_dax_huge_fault(struct vm_fault *vmf,
> > +           enum page_entry_size pe_size)
> > +{
> > +   return dax_iomap_fault(vmf, pe_size, NULL, NULL, &erofs_iomap_ops);
> > +}
> > +
> > +static vm_fault_t erofs_dax_fault(struct vm_fault *vmf)
> > +{
> > +   return erofs_dax_huge_fault(vmf, PE_SIZE_PTE);
> > +}
> > +
> > +static const struct vm_operations_struct erofs_dax_vm_ops = {
> > +   .fault          = erofs_dax_fault,
> > +   .huge_fault     = erofs_dax_huge_fault,
> > +};
> > +
> > +static int erofs_file_mmap(struct file *file, struct vm_area_struct *vma)
> > +{
> > +   if (!IS_DAX(file_inode(file)))
> > +           return generic_file_readonly_mmap(file, vma);
> > +
> > +   if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
> > +           return -EINVAL;
> > +
> > +   vma->vm_ops = &erofs_dax_vm_ops;
> > +   vma->vm_flags |= VM_HUGEPAGE;
> > +   return 0;
> > +}
> > +#else
> > +#define erofs_file_mmap    generic_file_readonly_mmap
> > +#endif
> > +
> >   const struct file_operations erofs_file_fops = {
> >     .llseek         = generic_file_llseek,
> >     .read_iter      = erofs_file_read_iter,
> > -   .mmap           = generic_file_readonly_mmap,
> > +   .mmap           = erofs_file_mmap,
> >     .splice_read    = generic_file_splice_read,
> >   };
> > diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
> > index 00edb7562fea..e875fba18159 100644
> > --- a/fs/erofs/inode.c
> > +++ b/fs/erofs/inode.c
> > @@ -174,6 +174,10 @@ static struct page *erofs_read_inode(struct inode 
> > *inode,
> >     inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec;
> >     inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec;
> > +   inode->i_flags &= ~S_DAX;
> > +   if (test_opt(&sbi->ctx, DAX_ALWAYS) && S_ISREG(inode->i_mode) &&
> > +       vi->datalayout == EROFS_INODE_FLAT_PLAIN)
> > +           inode->i_flags |= S_DAX;
> >     if (!nblks)
> >             /* measure inode.i_blocks as generic filesystems */
> >             inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9;
> > diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
> > index 2669c785d548..7c9abfc93109 100644
> > --- a/fs/erofs/internal.h
> > +++ b/fs/erofs/internal.h
> > @@ -83,6 +83,7 @@ struct erofs_sb_info {
> >     struct erofs_sb_lz4_info lz4;
> >   #endif    /* CONFIG_EROFS_FS_ZIP */
> > +   struct dax_device *dax_dev;
> >     u32 blocks;
> >     u32 meta_blkaddr;
> >   #ifdef CONFIG_EROFS_FS_XATTR
> > @@ -115,6 +116,8 @@ struct erofs_sb_info {
> >   /* Mount flags set via mount options or defaults */
> >   #define EROFS_MOUNT_XATTR_USER            0x00000010
> >   #define EROFS_MOUNT_POSIX_ACL             0x00000020
> > +#define EROFS_MOUNT_DAX_ALWAYS             0x00000040
> > +#define EROFS_MOUNT_DAX_NEVER              0x00000080
> >   #define clear_opt(ctx, option)    ((ctx)->mount_opt &= 
> > ~EROFS_MOUNT_##option)
> >   #define set_opt(ctx, option)      ((ctx)->mount_opt |= 
> > EROFS_MOUNT_##option)
> > diff --git a/fs/erofs/super.c b/fs/erofs/super.c
> > index 8fc6c04b54f4..d5b110fd365d 100644
> > --- a/fs/erofs/super.c
> > +++ b/fs/erofs/super.c
> > @@ -11,6 +11,7 @@
> >   #include <linux/crc32c.h>
> >   #include <linux/fs_context.h>
> >   #include <linux/fs_parser.h>
> > +#include <linux/dax.h>
> >   #include "xattr.h"
> >   #define CREATE_TRACE_POINTS
> > @@ -355,6 +356,8 @@ enum {
> >     Opt_user_xattr,
> >     Opt_acl,
> >     Opt_cache_strategy,
> > +   Opt_dax,
> > +   Opt_dax_enum,
> 
> We need to update doc for those new dax mount options.

Yeah, I agree, let me update this soon.

> 
> >     Opt_err
> >   };
> > @@ -365,14 +368,47 @@ static const struct constant_table 
> > erofs_param_cache_strategy[] = {
> >     {}
> >   };
> > +static const struct constant_table erofs_dax_param_enums[] = {
> > +   {"always",      EROFS_MOUNT_DAX_ALWAYS},
> > +   {"never",       EROFS_MOUNT_DAX_NEVER},
> > +   {}
> > +};
> > +
> >   static const struct fs_parameter_spec erofs_fs_parameters[] = {
> >     fsparam_flag_no("user_xattr",   Opt_user_xattr),
> >     fsparam_flag_no("acl",          Opt_acl),
> >     fsparam_enum("cache_strategy",  Opt_cache_strategy,
> >                  erofs_param_cache_strategy),
> > +   fsparam_flag("dax",             Opt_dax),
> > +   fsparam_enum("dax",             Opt_dax_enum, erofs_dax_param_enums),
> >     {}
> >   };
> > +static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
> > +{
> > +#ifdef CONFIG_FS_DAX
> > +   struct erofs_fs_context *ctx = fc->fs_private;
> > +
> > +   switch (mode) {
> > +   case EROFS_MOUNT_DAX_ALWAYS:
> > +           warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own 
> > risk");
> > +           set_opt(ctx, DAX_ALWAYS);
> > +           clear_opt(ctx, DAX_NEVER);
> > +           return true;
> > +   case EROFS_MOUNT_DAX_NEVER:
> > +           set_opt(ctx, DAX_NEVER);
> > +           clear_opt(ctx, DAX_ALWAYS);
> > +           return true;
> > +   default:
> > +           DBG_BUGON(1);
> > +           return false;
> > +   }
> > +#else
> > +   errorfc(fc, "dax options not supported");
> > +   return false;
> > +#endif
> > +}
> > +
> >   static int erofs_fc_parse_param(struct fs_context *fc,
> >                             struct fs_parameter *param)
> >   {
> > @@ -412,6 +448,14 @@ static int erofs_fc_parse_param(struct fs_context *fc,
> >             errorfc(fc, "compression not supported, cache_strategy 
> > ignored");
> >   #endif
> >             break;
> > +   case Opt_dax:
> > +           if (!erofs_fc_set_dax_mode(fc, EROFS_MOUNT_DAX_ALWAYS))
> > +                   return -EINVAL;
> > +           break;
> > +   case Opt_dax_enum:
> > +           if (!erofs_fc_set_dax_mode(fc, result.uint_32))
> > +                   return -EINVAL;
> > +           break;
> >     default:
> >             return -ENOPARAM;
> >     }
> > @@ -496,10 +540,16 @@ static int erofs_fc_fill_super(struct super_block 
> > *sb, struct fs_context *fc)
> >             return -ENOMEM;
> >     sb->s_fs_info = sbi;
> > +   sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
> >     err = erofs_read_superblock(sb);
> >     if (err)
> >             return err;
> > +   if (test_opt(ctx, DAX_ALWAYS) &&
> > +       !bdev_dax_supported(sb->s_bdev, EROFS_BLKSIZ)) {
> > +           errorfc(fc, "DAX unsupported by block device. Turning off 
> > DAX.");
> > +           clear_opt(ctx, DAX_ALWAYS);
> > +   }
> >     sb->s_flags |= SB_RDONLY | SB_NOATIME;
> >     sb->s_maxbytes = MAX_LFS_FILESIZE;
> >     sb->s_time_gran = 1;
> > @@ -609,6 +659,8 @@ static void erofs_kill_sb(struct super_block *sb)
> >     sbi = EROFS_SB(sb);
> >     if (!sbi)
> >             return;
> > +   if (sbi->dax_dev)
> > +           fs_put_dax(sbi->dax_dev);
> 
> fs_put_dax(sbi->dax_dev);

Will update.

Thanks,
Gao Xiang

> 
> Thanks,
> 
> >     kfree(sbi);
> >     sb->s_fs_info = NULL;
> >   }
> > @@ -711,8 +763,8 @@ static int erofs_statfs(struct dentry *dentry, struct 
> > kstatfs *buf)
> >   static int erofs_show_options(struct seq_file *seq, struct dentry *root)
> >   {
> > -   struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb);
> > -   struct erofs_fs_context *ctx __maybe_unused = &sbi->ctx;
> > +   struct erofs_sb_info *sbi = EROFS_SB(root->d_sb);
> > +   struct erofs_fs_context *ctx = &sbi->ctx;
> >   #ifdef CONFIG_EROFS_FS_XATTR
> >     if (test_opt(ctx, XATTR_USER))
> > @@ -734,6 +786,10 @@ static int erofs_show_options(struct seq_file *seq, 
> > struct dentry *root)
> >     else if (ctx->cache_strategy == EROFS_ZIP_CACHE_READAROUND)
> >             seq_puts(seq, ",cache_strategy=readaround");
> >   #endif
> > +   if (test_opt(ctx, DAX_ALWAYS))
> > +           seq_puts(seq, ",dax=always");
> > +   if (test_opt(ctx, DAX_NEVER))
> > +           seq_puts(seq, ",dax=never");
> >     return 0;
> >   }
> > 

Reply via email to