On Tue, 24 Feb 2015 20:01:43 +0100, Andreas Rohner wrote:
> This patch improves the accuracy of the su_nlive_blks segment
> usage field by also counting the blocks of the DAT-File. A block in
> the DAT-File is considered reclaimable as soon as it is overwritten.
> There is no need to consider protection periods, snapshots or
> checkpoints. So whenever a block is overwritten during segment
> construction, the segment usage information of the segment at the
> previous location of the block is decremented. To get the previous
> location of the block the b_blocknr field of the buffer_head
> structure is used.
> 
> SUFILE blocks are counted in a similar way, but if the GC reads a
> block into a GC inode, that already is in the cache, then there are
> two versions of the block. If this happens both versions will be
> counted, which can lead to small seemingly random incorrect values.
> But it is better to accept these small inaccuracies than to not
> count the SUFILE at all. These inaccuracies do not occur for the
> DAT-File, because it does not need a GC inode.
> 
> Additionally the blocks that belong to a GC inode are rechecked if
> they are reclaimable. If so the corresponding counter is
> decremented. The blocks were already checked in userspace, but
> without the proper locking. It is furthermore possible, that blocks
> become reclaimable during the cleaning process. For example by
> deleting checkpoints. To improve the performance of these extra
> checks, flags from userspace are used to determine reclaimability.
> If a block belongs to a snapshot it cannot be reclaimable and if
> it is within the protection period it must be counted as
> reclaimable.
> 
> Signed-off-by: Andreas Rohner <[email protected]>
> ---
>  fs/nilfs2/dat.c     |  70 ++++++++++++++++++++++++++++++++++++
>  fs/nilfs2/dat.h     |   1 +
>  fs/nilfs2/inode.c   |   2 ++
>  fs/nilfs2/segbuf.c  |   4 +++
>  fs/nilfs2/segbuf.h  |   1 +
>  fs/nilfs2/segment.c | 101 
> ++++++++++++++++++++++++++++++++++++++++++++++++++--
>  6 files changed, 177 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
> index d2c8f7e..63d079c 100644
> --- a/fs/nilfs2/dat.c
> +++ b/fs/nilfs2/dat.c
> @@ -35,6 +35,17 @@
>  #define NILFS_CNO_MAX        (~(__u64)0)
>  
>  /**
> + * nilfs_dat_entry_is_alive - check if @entry is alive
> + * @entry: DAT-Entry
> + *
> + * Description: Simple check if @entry is alive in the current checkpoint.
> + */
> +static inline int nilfs_dat_entry_is_live(struct nilfs_dat_entry *entry)
> +{
> +     return entry->de_end == cpu_to_le64(NILFS_CNO_MAX);
> +}
> +

Do not use "inline" directive in *.c files.  Compiler aggressively
does it.  "noinline" directive should be used instead for functions
that we want to prevent from being inlined.

> +/**
>   * struct nilfs_dat_info - on-memory private data of DAT file
>   * @mi: on-memory private data of metadata file
>   * @palloc_cache: persistent object allocator cache of DAT file
> @@ -391,6 +402,65 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, 
> sector_t blocknr)
>  }
>  
>  /**
> + * nilfs_dat_is_live - checks if the virtual block number is alive
> + * @dat: DAT file inode
> + * @vblocknr: virtual block number
> + * @errp: pointer to return code if error occurred
> + *
> + * Description: nilfs_dat_is_live() looks up the DAT-Entry for
> + * @vblocknr and determines if the corresponding block is alive in the 
> current
> + * checkpoint or not. This check ignores snapshots and protection periods.
> + *
> + * Return Value: 1 if vblocknr is alive and 0 otherwise. On error, 0 is
> + * returned and @errp is set to one of the following negative error codes.
> + *
> + * %-EIO - I/O error.
> + *
> + * %-ENOMEM - Insufficient amount of memory available.
> + *
> + * %-ENOENT - A block number associated with @vblocknr does not exist.
> + */
> +int nilfs_dat_is_live(struct inode *dat, __u64 vblocknr, int *errp)
> +{
> +     struct buffer_head *entry_bh, *bh;
> +     struct nilfs_dat_entry *entry;
> +     sector_t blocknr;
> +     void *kaddr;
> +     int ret = 0, err;
> +
> +     err = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh);
> +     if (err < 0)
> +             goto out;
> +
> +     if (!nilfs_doing_gc() && buffer_nilfs_redirected(entry_bh)) {
> +             bh = nilfs_mdt_get_frozen_buffer(dat, entry_bh);
> +             if (bh) {
> +                     WARN_ON(!buffer_uptodate(bh));
> +                     put_bh(entry_bh);
> +                     entry_bh = bh;
> +             }
> +     }
> +
> +     kaddr = kmap_atomic(entry_bh->b_page);
> +     entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
> +     blocknr = le64_to_cpu(entry->de_blocknr);
> +     if (blocknr == 0) {
> +             err = -ENOENT;
> +             goto out_unmap;
> +     }
> +
> +     ret = nilfs_dat_entry_is_live(entry);
> +
> +out_unmap:
> +     kunmap_atomic(kaddr);
> +     put_bh(entry_bh);
> +out:
> +     if (errp)
> +             *errp = err;

Remove errp argument by returning it as the return value.  Rather, the
true/false result should be returned via an argument if you'd like to
avoid mixing these two.

> +     return ret;
> +}
> +
> +/**
>   * nilfs_dat_translate - translate a virtual block number to a block number
>   * @dat: DAT file inode
>   * @vblocknr: virtual block number
> diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h
> index d196f09..3cbddd6 100644
> --- a/fs/nilfs2/dat.h
> +++ b/fs/nilfs2/dat.h
> @@ -32,6 +32,7 @@ struct nilfs_palloc_req;
>  struct nilfs_sufile_mod_cache;
>  
>  int nilfs_dat_translate(struct inode *, __u64, sector_t *);
> +int nilfs_dat_is_live(struct inode *, __u64, int *);
>  
>  int nilfs_dat_prepare_alloc(struct inode *, struct nilfs_palloc_req *);
>  void nilfs_dat_commit_alloc(struct inode *, struct nilfs_palloc_req *);
> diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
> index 7f6d056..5412a76 100644
> --- a/fs/nilfs2/inode.c
> +++ b/fs/nilfs2/inode.c
> @@ -90,6 +90,8 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
>       int err = 0, ret;
>       unsigned maxblocks = bh_result->b_size >> inode->i_blkbits;
>  
> +     bh_result->b_blocknr = 0;
> +

Please do not add this in this big patch as I mentioned before.

>       down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
>       ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
>       up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
> diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
> index 7a6e9cd..bbd807b 100644
> --- a/fs/nilfs2/segbuf.c
> +++ b/fs/nilfs2/segbuf.c
> @@ -58,6 +58,7 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct 
> super_block *sb)
>       INIT_LIST_HEAD(&segbuf->sb_payload_buffers);
>       segbuf->sb_super_root = NULL;
>       segbuf->sb_nlive_blks_added = 0;
> +     segbuf->sb_nlive_blks_diff = 0;
>  
>       init_completion(&segbuf->sb_bio_event);
>       atomic_set(&segbuf->sb_err, 0);
> @@ -451,6 +452,9 @@ static int nilfs_segbuf_submit_bh(struct 
> nilfs_segment_buffer *segbuf,
>  

>       len = bio_add_page(wi->bio, bh->b_page, bh->b_size, bh_offset(bh));
>       if (len == bh->b_size) {
> +             lock_buffer(bh);
> +             map_bh(bh, segbuf->sb_super, wi->blocknr + wi->end);
> +             unlock_buffer(bh);
>               wi->end++;
>               return 0;
>       }

ditto.  Stop this.

> diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
> index d04da26..4e994f7 100644
> --- a/fs/nilfs2/segbuf.h
> +++ b/fs/nilfs2/segbuf.h
> @@ -84,6 +84,7 @@ struct nilfs_segment_buffer {
>       sector_t                sb_pseg_start;
>       unsigned                sb_rest_blocks;
>       __u32                   sb_nlive_blks_added;

> +     __s64                   sb_nlive_blks_diff;

sb_nlive_blks_diff is always decremented.  It looks better
to alter this to

        __u32                   sb_nlive_blks_deducted;

and increment it. (The term "diff" is ambiguous. Maybe,
it should be "deducted" or so.)


>  
>       /* Buffers */
>       struct list_head        sb_segsum_buffers;
> diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
> index dc0070c..16c7c36 100644
> --- a/fs/nilfs2/segment.c
> +++ b/fs/nilfs2/segment.c
> @@ -1385,7 +1385,8 @@ static void nilfs_segctor_update_segusage(struct 
> nilfs_sc_info *sci,
>               WARN_ON(ret); /* always succeed because the segusage is dirty */
>  
>               /* should always be positive */
> -             segbuf->sb_nlive_blks_added = segbuf->sb_sum.nfileblk;
> +             segbuf->sb_nlive_blks_added = segbuf->sb_nlive_blks_diff +
> +                                           segbuf->sb_sum.nfileblk;
>  
>               if (nilfs_feature_track_live_blks(nilfs))
>                       nilfs_sufile_mod_nlive_blks(sufile, sci->sc_mc,
> @@ -1497,12 +1498,98 @@ static void nilfs_list_replace_buffer(struct 
> buffer_head *old_bh,
>       /* The caller must release old_bh */
>  }
>  
> +/**
> + * nilfs_segctor_dec_nlive_blks_gc - dec. nlive_blks for blocks of GC-Inodes
> + * @dat: dat inode
> + * @segbuf: currtent segment buffer
> + * @bh: current buffer head
> + *
> + * Description: nilfs_segctor_dec_nlive_blks_gc() is called if the inode to
> + * which @bh belongs is a GC-Inode. In that case it is not necessary to
> + * decrement the previous segment, because at the end of the GC process it
> + * will be freed anyway. It is however necessary to check again if the blocks
> + * are alive here, because the last check was in userspace without the proper
> + * locking. Additionally the blocks protected by the protection period should
> + * be considered reclaimable. It is assumed, that @bh->b_blocknr contains
> + * a virtual block number, which is only true if @bh is part of a GC-Inode.
> + */
> +static void nilfs_segctor_dec_nlive_blks_gc(struct inode *dat,
> +                                         struct nilfs_segment_buffer *segbuf,
> +                                         struct buffer_head *bh) {
> +     bool isreclaimable = buffer_nilfs_protection_period(bh) ||
> +                             !nilfs_dat_is_live(dat, bh->b_blocknr, NULL);
> +
> +     if (!buffer_nilfs_snapshot(bh) && isreclaimable)
> +             segbuf->sb_nlive_blks_diff--;
> +}
> +
> +/**
> + * nilfs_segctor_dec_nlive_blks_nogc - dec. nlive_blks of segment
> + * @nilfs: the nilfs object
> + * @mc: modification cache
> + * @sb: currtent segment buffer
> + * @blocknr: current block number
> + *
> + * Description: Gets the segment number of the segment @blocknr belongs to
> + * and decrements the su_nlive_blks field of the corresponding segment usage
> + * entry.
> + */
> +static void nilfs_segctor_dec_nlive_blks_nogc(struct the_nilfs *nilfs,
> +                                           struct nilfs_sufile_mod_cache *mc,
> +                                           struct nilfs_segment_buffer *sb,
> +                                           sector_t blocknr)
> +{
> +     __u64 segnum = nilfs_get_segnum_of_block(nilfs, blocknr);
> +
> +     if (segnum >= nilfs->ns_nsegments)
> +             return;
> +
> +     if (segnum == sb->sb_segnum)
> +             sb->sb_nlive_blks_diff--;
> +     else
> +             nilfs_sufile_mod_nlive_blks(nilfs->ns_sufile, mc, segnum, -1);
> +}

As I mentioned before, sufile shouldn't be changed (in precise, newly
marked dirty) after the collection phase of sufile.  This looks to be
violating it.

Regards,
Ryusuke Konishi

> +
> +/**
> + * nilfs_segctor_dec_nlive_blks - dec. nlive_blks of previous segment
> + * @nilfs: the nilfs object
> + * @mc: modification cache
> + * @sb: currtent segment buffer
> + * @bh: current buffer head
> + * @ino: current inode number
> + * @gc_inode: true if current inode is a GC-Inode
> + *
> + * Description: Handles GC-Inodes and normal inodes differently. For normal
> + * inodes @bh->b_blocknr contains the location where the block was read in. 
> If
> + * the block is updated, the old version of it is considered reclaimable and 
> so
> + * the su_nlive_blks field of the segment usage information of the old 
> segment
> + * needs to be decremented. Only the DATFILE and SUFILE are decremented here,
> + * because normal files and other meta data files can be better decremented 
> in
> + * nilfs_dat_commit_end().
> + */
> +static void nilfs_segctor_dec_nlive_blks(struct the_nilfs *nilfs,
> +                                      struct nilfs_sufile_mod_cache *mc,
> +                                      struct nilfs_segment_buffer *sb,
> +                                      struct buffer_head *bh,
> +                                      ino_t ino,
> +                                      bool gc_inode)
> +{
> +     bool isnode = buffer_nilfs_node(bh);
> +
> +     if (gc_inode)
> +             nilfs_segctor_dec_nlive_blks_gc(nilfs->ns_dat, sb, bh);
> +     else if (ino == NILFS_DAT_INO || (ino == NILFS_SUFILE_INO && !isnode))
> +             nilfs_segctor_dec_nlive_blks_nogc(nilfs, mc, sb, bh->b_blocknr);
> +}
> +
>  static int
>  nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
>                                    struct nilfs_segment_buffer *segbuf,
>                                    int mode)
>  {
> +     struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
>       struct inode *inode = NULL;
> +     struct nilfs_inode_info *ii;
>       sector_t blocknr;
>       unsigned long nfinfo = segbuf->sb_sum.nfinfo;
>       unsigned long nblocks = 0, ndatablk = 0;
> @@ -1512,7 +1599,9 @@ nilfs_segctor_update_payload_blocknr(struct 
> nilfs_sc_info *sci,
>       union nilfs_binfo binfo;
>       struct buffer_head *bh, *bh_org;
>       ino_t ino = 0;
> -     int err = 0;
> +     int err = 0, gc_inode = 0, track_live_blks;
> +
> +     track_live_blks = nilfs_feature_track_live_blks(nilfs);
>  
>       if (!nfinfo)
>               goto out;
> @@ -1533,6 +1622,9 @@ nilfs_segctor_update_payload_blocknr(struct 
> nilfs_sc_info *sci,
>  
>                       inode = bh->b_page->mapping->host;
>  
> +                     ii = NILFS_I(inode);
> +                     gc_inode = test_bit(NILFS_I_GCINODE, &ii->i_state);
> +
>                       if (mode == SC_LSEG_DSYNC)
>                               sc_op = &nilfs_sc_dsync_ops;
>                       else if (ino == NILFS_DAT_INO)
> @@ -1540,6 +1632,11 @@ nilfs_segctor_update_payload_blocknr(struct 
> nilfs_sc_info *sci,
>                       else /* file blocks */
>                               sc_op = &nilfs_sc_file_ops;
>               }
> +
> +             if (track_live_blks)
> +                     nilfs_segctor_dec_nlive_blks(nilfs, sci->sc_mc, segbuf,
> +                                                  bh, ino, gc_inode);
> +
>               bh_org = bh;
>               get_bh(bh_org);
>               err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr,
> -- 
> 2.3.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to