On Tue, 24 Feb 2015 20:01:40 +0100, Andreas Rohner wrote:
> This patch adds simple tracking of block deletions and updates for
> all files except the DAT- and the SUFILE-Metadatafiles. It uses the
> fact, that for every block, NILFS2 keeps an entry in the DAT-File
> and stores the checkpoint where it was created and deleted or
> overwritten. So whenever a block is deleted or overwritten
> nilfs_dat_commit_end() is called to update the DAT-Entry. At this
> point this patch simply decrements the su_nlive_blks field of the
> corresponding segment. The value of su_nlive_blks is set at segment
> creation time.
> 
> The blocks of the DAT-File cannot be counted this way, because it
> does not contain any entries about itself, so the function
> nilfs_dat_commit_end() is not called when its blocks are deleted or
> overwritten.
> 
> The SUFILE cannot be counted this way, because it would lead to a
> deadlock. When nilfs_dat_commit_end() is called, the bmap->b_sem is
> held by code way up the call chain. To decrement the SUFILE entry
> the same semaphore has to be aquired. So if the DAT-Entry belongs to
> the SUFILE both semaphores are the same and a deadlock will occur.
> But it works for any other file. So by excluding the SUFILE from
> being counted by the extra parameter count_blocks a deadlock can be
> avoided.
> 
> With the above changes the code does not pass the lock dependency
> checks of the kernel, because all the locks have the same class and
> the order in which the locks are taken is different. Usually it is:
> 
> 1. down_write(&NILFS_MDT(sufile)->mi_sem);
> 2. down_write(&bmap->b_sem);
> 
> Now it can also be reversed, which leads to failed checks:
> 
> 1. down_write(&bmap->b_sem); /* lock of a file other than SUFILE */
> 2. down_write(&NILFS_MDT(sufile)->mi_sem);
> 
> But this is safe as long as the first lock down_write(&bmap->b_sem)
> doesn't belong to the SUFILE.
> 
> It is also possible, that two bmap->b_sem locks have to be taken at
> the same time:
> 
> 1. down_write(&bmap->b_sem); /* lock of a file other than SUFILE */
> 2. down_write(&bmap->b_sem); /* lock of SUFILE */
> 
> Since bmap->b_sem of normal files and the bmap->b_sem of the
> SUFILE have the same lock class, the above behavior would also lead
> to a warning.
> 
> Because of this, it is necessary to introduce two new lock classes
> for the SUFILE. So the bmap->b_sem of the SUFILE gets its own lock
> class and the NILFS_MDT(sufile)->mi_sem as well.
> 
> A new feature compatibility flag
> NILFS_FEATURE_COMPAT_TRACK_LIVE_BLKS was added, so that the new
> features introduced by this patch can be enabled or disabled at any
> time.
> 
> Signed-off-by: Andreas Rohner <andreas.roh...@gmx.net>
> ---
>  fs/nilfs2/bmap.c          |  8 +++++++-
>  fs/nilfs2/bmap.h          |  5 +++--
>  fs/nilfs2/btree.c         |  4 +++-
>  fs/nilfs2/dat.c           | 25 ++++++++++++++++++++-----
>  fs/nilfs2/dat.h           |  7 +++++--
>  fs/nilfs2/direct.c        |  4 +++-
>  fs/nilfs2/mdt.c           |  5 ++++-
>  fs/nilfs2/segbuf.c        |  1 +
>  fs/nilfs2/segbuf.h        |  1 +
>  fs/nilfs2/segment.c       | 25 +++++++++++++++++++++----
>  fs/nilfs2/the_nilfs.c     |  4 ++++
>  fs/nilfs2/the_nilfs.h     | 16 ++++++++++++++++
>  include/linux/nilfs2_fs.h |  4 +++-
>  13 files changed, 91 insertions(+), 18 deletions(-)
> 
> diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
> index aadbd0b..ecd62ba 100644
> --- a/fs/nilfs2/bmap.c
> +++ b/fs/nilfs2/bmap.c
> @@ -467,6 +467,7 @@ __u64 nilfs_bmap_find_target_in_group(const struct 
> nilfs_bmap *bmap)
>  
>  static struct lock_class_key nilfs_bmap_dat_lock_key;
>  static struct lock_class_key nilfs_bmap_mdt_lock_key;
> +static struct lock_class_key nilfs_bmap_sufile_lock_key;
>  
>  /**
>   * nilfs_bmap_read - read a bmap from an inode
> @@ -498,12 +499,17 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct 
> nilfs_inode *raw_inode)
>               lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
>               break;
>       case NILFS_CPFILE_INO:
> -     case NILFS_SUFILE_INO:
>               bmap->b_ptr_type = NILFS_BMAP_PTR_VS;
>               bmap->b_last_allocated_key = 0;
>               bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
>               lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key);
>               break;
> +     case NILFS_SUFILE_INO:
> +             bmap->b_ptr_type = NILFS_BMAP_PTR_VS;
> +             bmap->b_last_allocated_key = 0;
> +             bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
> +             lockdep_set_class(&bmap->b_sem, &nilfs_bmap_sufile_lock_key);
> +             break;
>       case NILFS_IFILE_INO:
>               lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key);
>               /* Fall through */
> diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
> index b89e680..718c814 100644
> --- a/fs/nilfs2/bmap.h
> +++ b/fs/nilfs2/bmap.h
> @@ -222,8 +222,9 @@ static inline void nilfs_bmap_commit_end_ptr(struct 
> nilfs_bmap *bmap,
>                                            struct inode *dat)
>  {
>       if (dat)
> -             nilfs_dat_commit_end(dat, &req->bpr_req,
> -                                  bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
> +             nilfs_dat_commit_end(dat, &req->bpr_req, NULL,
> +                                  bmap->b_ptr_type == NILFS_BMAP_PTR_VS,
> +                                  bmap->b_inode->i_ino != NILFS_SUFILE_INO);
>  }
>  
>  static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap,
> diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
> index b2e3ff3..2af0519 100644
> --- a/fs/nilfs2/btree.c
> +++ b/fs/nilfs2/btree.c
> @@ -1851,7 +1851,9 @@ static void nilfs_btree_commit_update_v(struct 
> nilfs_bmap *btree,
>  
>       nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
>                               &path[level].bp_newreq.bpr_req,
> -                             btree->b_ptr_type == NILFS_BMAP_PTR_VS);
> +                             NULL,
> +                             btree->b_ptr_type == NILFS_BMAP_PTR_VS,
> +                             btree->b_inode->i_ino != NILFS_SUFILE_INO);
>  
>       if (buffer_nilfs_node(path[level].bp_bh)) {
>               nilfs_btnode_commit_change_key(
> diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
> index 0d5fada..d2c8f7e 100644
> --- a/fs/nilfs2/dat.c
> +++ b/fs/nilfs2/dat.c
> @@ -28,6 +28,7 @@
>  #include "mdt.h"
>  #include "alloc.h"
>  #include "dat.h"
> +#include "sufile.h"
>  
>  
>  #define NILFS_CNO_MIN        ((__u64)1)
> @@ -185,12 +186,14 @@ int nilfs_dat_prepare_end(struct inode *dat, struct 
> nilfs_palloc_req *req)
>  }
>  
>  void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
> -                       int dead)
> +                       struct nilfs_sufile_mod_cache *mc,
> +                       int dead, int count_blocks)
>  {
>       struct nilfs_dat_entry *entry;
> -     __u64 start, end;
> +     __u64 start, end, segnum;
>       sector_t blocknr;
>       void *kaddr;
> +     struct the_nilfs *nilfs;
>  
>       kaddr = kmap_atomic(req->pr_entry_bh->b_page);
>       entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
> @@ -206,8 +209,18 @@ void nilfs_dat_commit_end(struct inode *dat, struct 
> nilfs_palloc_req *req,
>  
>       if (blocknr == 0)
>               nilfs_dat_commit_free(dat, req);
> -     else
> +     else {
>               nilfs_dat_commit_entry(dat, req);
> +
> +             nilfs = dat->i_sb->s_fs_info;
> +
> +             if (count_blocks && nilfs_feature_track_live_blks(nilfs)) {
> +                     segnum = nilfs_get_segnum_of_block(nilfs, blocknr);
> +
> +                     nilfs_sufile_mod_nlive_blks(nilfs->ns_sufile, mc,
> +                                                 segnum, -1);
> +             }
> +     }
>  }
>  
>  void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req)
> @@ -246,9 +259,11 @@ int nilfs_dat_prepare_update(struct inode *dat,
>  
>  void nilfs_dat_commit_update(struct inode *dat,
>                            struct nilfs_palloc_req *oldreq,
> -                          struct nilfs_palloc_req *newreq, int dead)
> +                          struct nilfs_palloc_req *newreq,
> +                          struct nilfs_sufile_mod_cache *mc,
> +                          int dead, int count_blocks)
>  {
> -     nilfs_dat_commit_end(dat, oldreq, dead);
> +     nilfs_dat_commit_end(dat, oldreq, mc, dead, count_blocks);
>       nilfs_dat_commit_alloc(dat, newreq);
>  }
>  
> diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h
> index cbd8e97..d196f09 100644
> --- a/fs/nilfs2/dat.h
> +++ b/fs/nilfs2/dat.h
> @@ -29,6 +29,7 @@
>  
>  
>  struct nilfs_palloc_req;
> +struct nilfs_sufile_mod_cache;
>  
>  int nilfs_dat_translate(struct inode *, __u64, sector_t *);
>  
> @@ -39,12 +40,14 @@ int nilfs_dat_prepare_start(struct inode *, struct 
> nilfs_palloc_req *);
>  void nilfs_dat_commit_start(struct inode *, struct nilfs_palloc_req *,
>                           sector_t);
>  int nilfs_dat_prepare_end(struct inode *, struct nilfs_palloc_req *);
> -void nilfs_dat_commit_end(struct inode *, struct nilfs_palloc_req *, int);
> +void nilfs_dat_commit_end(struct inode *, struct nilfs_palloc_req *,
> +                       struct nilfs_sufile_mod_cache *, int, int);
>  void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *);
>  int nilfs_dat_prepare_update(struct inode *, struct nilfs_palloc_req *,
>                            struct nilfs_palloc_req *);
>  void nilfs_dat_commit_update(struct inode *, struct nilfs_palloc_req *,
> -                          struct nilfs_palloc_req *, int);
> +                          struct nilfs_palloc_req *,
> +                          struct nilfs_sufile_mod_cache *, int, int);
>  void nilfs_dat_abort_update(struct inode *, struct nilfs_palloc_req *,
>                           struct nilfs_palloc_req *);
>  
> diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
> index 82f4865..e022cfb 100644
> --- a/fs/nilfs2/direct.c
> +++ b/fs/nilfs2/direct.c
> @@ -272,7 +272,9 @@ static int nilfs_direct_propagate(struct nilfs_bmap *bmap,
>               if (ret < 0)
>                       return ret;
>               nilfs_dat_commit_update(dat, &oldreq, &newreq,
> -                                     bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
> +                             NULL,
> +                             bmap->b_ptr_type == NILFS_BMAP_PTR_VS,
> +                             bmap->b_inode->i_ino != NILFS_SUFILE_INO);
>               set_buffer_nilfs_volatile(bh);
>               nilfs_direct_set_ptr(bmap, key, newreq.pr_entry_nr);
>       } else
> diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
> index 892cf5f..2a81f82 100644
> --- a/fs/nilfs2/mdt.c
> +++ b/fs/nilfs2/mdt.c
> @@ -414,7 +414,7 @@ static const struct address_space_operations def_mdt_aops 
> = {
>  
>  static const struct inode_operations def_mdt_iops;
>  static const struct file_operations def_mdt_fops;
> -
> +static struct lock_class_key nilfs_mdt_mi_sufile_lock_key;
>  
>  int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz)
>  {
> @@ -427,6 +427,9 @@ int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, 
> size_t objsz)
>       init_rwsem(&mi->mi_sem);
>       inode->i_private = mi;
>  
> +     if (inode->i_ino == NILFS_SUFILE_INO)
> +             lockdep_set_class(&mi->mi_sem, &nilfs_mdt_mi_sufile_lock_key);
> +
>       inode->i_mode = S_IFREG;
>       mapping_set_gfp_mask(inode->i_mapping, gfp_mask);
>  
> diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
> index dc3a9efd..7a6e9cd 100644
> --- a/fs/nilfs2/segbuf.c
> +++ b/fs/nilfs2/segbuf.c
> @@ -57,6 +57,7 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct 
> super_block *sb)
>       INIT_LIST_HEAD(&segbuf->sb_segsum_buffers);
>       INIT_LIST_HEAD(&segbuf->sb_payload_buffers);
>       segbuf->sb_super_root = NULL;
> +     segbuf->sb_nlive_blks_added = 0;
>  
>       init_completion(&segbuf->sb_bio_event);
>       atomic_set(&segbuf->sb_err, 0);
> diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
> index b04f08c..d04da26 100644
> --- a/fs/nilfs2/segbuf.h
> +++ b/fs/nilfs2/segbuf.h
> @@ -83,6 +83,7 @@ struct nilfs_segment_buffer {
>       sector_t                sb_fseg_start, sb_fseg_end;
>       sector_t                sb_pseg_start;
>       unsigned                sb_rest_blocks;
> +     __u32                   sb_nlive_blks_added;
>  
>       /* Buffers */
>       struct list_head        sb_segsum_buffers;
> diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
> index 469086b..6059f53 100644
> --- a/fs/nilfs2/segment.c
> +++ b/fs/nilfs2/segment.c
> @@ -1367,9 +1367,10 @@ static void nilfs_free_incomplete_logs(struct 
> list_head *logs,
>  }
>  
>  static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci,
> -                                       struct inode *sufile)
> +                                       struct the_nilfs *nilfs)
>  {
>       struct nilfs_segment_buffer *segbuf;
> +     struct inode *sufile = nilfs->ns_sufile;
>       unsigned long live_blocks;
>       int ret;
>  
> @@ -1380,12 +1381,22 @@ static void nilfs_segctor_update_segusage(struct 
> nilfs_sc_info *sci,
>                                                    live_blocks,
>                                                    sci->sc_seg_ctime);
>               WARN_ON(ret); /* always succeed because the segusage is dirty */
> +
> +             /* should always be positive */
> +             segbuf->sb_nlive_blks_added = segbuf->sb_sum.nfileblk;
> +
> +             if (nilfs_feature_track_live_blks(nilfs))
> +                     nilfs_sufile_mod_nlive_blks(sufile, NULL,
> +                                             segbuf->sb_segnum,
> +                                             segbuf->sb_nlive_blks_added);
>       }
>  }
>  
> -static void nilfs_cancel_segusage(struct list_head *logs, struct inode 
> *sufile)
> +static void nilfs_cancel_segusage(struct list_head *logs,
> +                               struct the_nilfs *nilfs)
>  {
>       struct nilfs_segment_buffer *segbuf;
> +     struct inode *sufile = nilfs->ns_sufile;
>       int ret;
>  
>       segbuf = NILFS_FIRST_SEGBUF(logs);
> @@ -1394,6 +1405,12 @@ static void nilfs_cancel_segusage(struct list_head 
> *logs, struct inode *sufile)
>                                            segbuf->sb_fseg_start, 0);
>       WARN_ON(ret); /* always succeed because the segusage is dirty */
>  
> +     if (nilfs_feature_track_live_blks(nilfs))
> +             nilfs_sufile_mod_nlive_blks(sufile, NULL, segbuf->sb_segnum,
> +                                     -((__s64)segbuf->sb_nlive_blks_added));
> +
> +     segbuf->sb_nlive_blks_added = 0;
> +
>       list_for_each_entry_continue(segbuf, logs, sb_list) {
>               ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
>                                                    0, 0);
> @@ -1729,7 +1746,7 @@ static void nilfs_segctor_abort_construction(struct 
> nilfs_sc_info *sci,
>       nilfs_abort_logs(&logs, ret ? : err);
>  
>       list_splice_tail_init(&sci->sc_segbufs, &logs);
> -     nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
> +     nilfs_cancel_segusage(&logs, nilfs);
>       nilfs_free_incomplete_logs(&logs, nilfs);
>  
>       if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
> @@ -1995,7 +2012,7 @@ static int nilfs_segctor_do_construct(struct 
> nilfs_sc_info *sci, int mode)
>  
>                       nilfs_segctor_fill_in_super_root(sci, nilfs);
>               }
> -             nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
> +             nilfs_segctor_update_segusage(sci, nilfs);
>  
>               /* Write partial segments */
>               nilfs_segctor_prepare_write(sci);

Please separate changes below.

> diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
> index 69bd801..606fdfc 100644
> --- a/fs/nilfs2/the_nilfs.c
> +++ b/fs/nilfs2/the_nilfs.c
> @@ -630,6 +630,10 @@ int init_nilfs(struct the_nilfs *nilfs, struct 
> super_block *sb, char *data)
>       get_random_bytes(&nilfs->ns_next_generation,
>                        sizeof(nilfs->ns_next_generation));
>  
> +     nilfs->ns_feature_compat = le64_to_cpu(sbp->s_feature_compat);
> +     nilfs->ns_feature_compat_ro = le64_to_cpu(sbp->s_feature_compat_ro);
> +     nilfs->ns_feature_incompat = le64_to_cpu(sbp->s_feature_incompat);
> +
>       err = nilfs_store_disk_layout(nilfs, sbp);
>       if (err)
>               goto failed_sbh;
> diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
> index 23778d3..87cab10 100644
> --- a/fs/nilfs2/the_nilfs.h
> +++ b/fs/nilfs2/the_nilfs.h
> @@ -101,6 +101,9 @@ enum {
>   * @ns_dev_kobj: /sys/fs/<nilfs>/<device>
>   * @ns_dev_kobj_unregister: completion state
>   * @ns_dev_subgroups: <device> subgroups pointer
> + * @ns_feature_compat: Compatible feature set
> + * @ns_feature_compat_ro: Read-only compatible feature set
> + * @ns_feature_incompat: Incompatible feature set
>   */
>  struct the_nilfs {
>       unsigned long           ns_flags;
> @@ -201,6 +204,11 @@ struct the_nilfs {
>       struct kobject ns_dev_kobj;
>       struct completion ns_dev_kobj_unregister;
>       struct nilfs_sysfs_dev_subgroups *ns_dev_subgroups;
> +
> +     /* Features */
> +     __u64                   ns_feature_compat;
> +     __u64                   ns_feature_compat_ro;
> +     __u64                   ns_feature_incompat;
>  };
>  
>  #define THE_NILFS_FNS(bit, name)                                     \
> @@ -393,4 +401,12 @@ static inline int nilfs_flush_device(struct the_nilfs 
> *nilfs)
>       return err;
>  }
>  
> +static inline int nilfs_feature_track_live_blks(struct the_nilfs *nilfs)
> +{
> +     return (nilfs->ns_feature_compat &
> +             NILFS_FEATURE_COMPAT_TRACK_LIVE_BLKS) &&
> +             (nilfs->ns_feature_compat &
> +             NILFS_FEATURE_COMPAT_SUFILE_EXTENSION);
> +}
> +

This should be written as below:

static inline int nilfs_feature_track_live_blks(struct the_nilfs *nilfs)
{
        const __u64 required_bits = NILFS_FEATURE_COMPAT_TRACK_LIVE_BLKS |
                                    NILFS_FEATURE_COMPAT_SUFILE_EXTENSION;

        return ((nilfs->ns_feature_compat & required_bits) == required_bits);
}

Or you can drop the track flag at mount time if
NILFS_FEATURE_COMPAT_SUFILE_EXTENSION flag is not set or
nilfs_sufile_ext_supported(sufile) is false.

Regards,
Ryusuke Konishi

>  #endif /* _THE_NILFS_H */
> diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
> index 5d83c55..6ccb2ad 100644
> --- a/include/linux/nilfs2_fs.h
> +++ b/include/linux/nilfs2_fs.h
> @@ -221,10 +221,12 @@ struct nilfs_super_block {
>   * doesn't know about, it should refuse to mount the filesystem.
>   */
>  #define NILFS_FEATURE_COMPAT_SUFILE_EXTENSION                (1ULL << 0)
> +#define NILFS_FEATURE_COMPAT_TRACK_LIVE_BLKS         (1ULL << 1)
>  
>  #define NILFS_FEATURE_COMPAT_RO_BLOCK_COUNT          (1ULL << 0)
>  
> -#define NILFS_FEATURE_COMPAT_SUPP    NILFS_FEATURE_COMPAT_SUFILE_EXTENSION
> +#define NILFS_FEATURE_COMPAT_SUPP    (NILFS_FEATURE_COMPAT_SUFILE_EXTENSION \
> +                             | NILFS_FEATURE_COMPAT_TRACK_LIVE_BLKS)
>  #define NILFS_FEATURE_COMPAT_RO_SUPP NILFS_FEATURE_COMPAT_RO_BLOCK_COUNT
>  #define NILFS_FEATURE_INCOMPAT_SUPP  0ULL
>  
> -- 
> 2.3.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to