On 2014-03-17 08:04, Vyacheslav Dubeyko wrote:
> On Sun, 2014-03-16 at 11:47 +0100, Andreas Rohner wrote:
>> To accurately count the number of live blocks in a segment, it is
>> important to take snapshots into account, because snapshots can protect
>> reclaimable blocks from being cleaned.
>>
>> This patch uses the previously reserved de_rsv field of the
>> nilfs_dat_entry struct to store one of the snapshots the corresponding
>> block belongs to. One block can belong to many snapshots, but because
>> the snapshots are stored in a sorted linked list, it is easy to check if
>> a block belongs to any other snapshot given the previous and the next
>> snapshot. For example if the current snapshot (in de_ss) is being
>> removed and neither the previous nor the next snapshot is in the range
>> of de_start to de_end, then it is guaranteed that the block doesn't
>> belong to any other snapshot and is reclaimable. On the other hand if
>> lets say the previous snapshot is in the range of de_start to de_end, we
>> simply set de_ss to the previous snapshot and the block is not
>> reclaimable.
>>
>> To implement this every DAT entry is scanned at snapshot
>> creation/deletion time and updated if needed. 
> 
> It is well known problem of NILFS2 that deletion is very slow operation
> for big files because of necessity to update DAT file (de_end: end
> checkpoint number). So, how your addition does affect this disadvantage?

Additionally to setting "de_end: end checkpoint number" the live block
counter in the SUFILE needs to be decremented. This makes the deletion a
little bit more expensive, but its not really noticeable, because the
SUFILE-Entries are mostly in the cache. I have timed the deletion of 100
GB and there is no discernible difference in the performance.

But my additions make snapshot creation and deletion more expensive.

>> To avoid too many update
>> operations only potentially reclaimable blocks are ever updated. For
>> example if there are some deleted files and the checkpoint to which
>> these files belong is turned into a snapshot, then su_nblocks is
>> incremented for these blocks, which reverses the decrement that happened
>> when the files were deleted. If after some time this snapshot is
>> deleted, su_nblocks is decremented again to reverse the increment at
>> creation time.
>>
>> Signed-off-by: Andreas Rohner <[email protected]>
>> ---
>>  fs/nilfs2/cpfile.c        |  7 ++++
>>  fs/nilfs2/dat.c           | 86 
>> +++++++++++++++++++++++++++++++++++++++++++++++
>>  fs/nilfs2/dat.h           | 26 ++++++++++++++
>>  include/linux/nilfs2_fs.h |  4 +--
>>  4 files changed, 121 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
>> index 0d58075..29952f5 100644
>> --- a/fs/nilfs2/cpfile.c
>> +++ b/fs/nilfs2/cpfile.c
>> @@ -28,6 +28,7 @@
>>  #include <linux/nilfs2_fs.h>
>>  #include "mdt.h"
>>  #include "cpfile.h"
>> +#include "sufile.h"
>>  
>>
>>  static inline unsigned long
>> @@ -584,6 +585,7 @@ static int nilfs_cpfile_set_snapshot(struct inode 
>> *cpfile, __u64 cno)
>>      struct nilfs_cpfile_header *header;
>>      struct nilfs_checkpoint *cp;
>>      struct nilfs_snapshot_list *list;
>> +    struct the_nilfs *nilfs = cpfile->i_sb->s_fs_info;
>>      __u64 curr, prev;
>>      unsigned long curr_blkoff, prev_blkoff;
>>      void *kaddr;
>> @@ -681,6 +683,8 @@ static int nilfs_cpfile_set_snapshot(struct inode 
>> *cpfile, __u64 cno)
>>      mark_buffer_dirty(header_bh);
>>      nilfs_mdt_mark_dirty(cpfile);
>>  
>> +    nilfs_dat_scan_inc_ss(nilfs->ns_dat, cno);
>> +
>>      brelse(prev_bh);
>>  
>>   out_curr:
>> @@ -703,6 +707,7 @@ static int nilfs_cpfile_clear_snapshot(struct inode 
>> *cpfile, __u64 cno)
>>      struct nilfs_cpfile_header *header;
>>      struct nilfs_checkpoint *cp;
>>      struct nilfs_snapshot_list *list;
>> +    struct the_nilfs *nilfs = cpfile->i_sb->s_fs_info;
>>      __u64 next, prev;
>>      void *kaddr;
>>      int ret;
>> @@ -784,6 +789,8 @@ static int nilfs_cpfile_clear_snapshot(struct inode 
>> *cpfile, __u64 cno)
>>      mark_buffer_dirty(header_bh);
>>      nilfs_mdt_mark_dirty(cpfile);
>>  
>> +    nilfs_dat_scan_dec_ss(nilfs->ns_dat, cno, prev, next);
>> +
>>      brelse(prev_bh);
>>  
>>   out_next:
>> diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
>> index 0d5fada..89a4a5f 100644
>> --- a/fs/nilfs2/dat.c
>> +++ b/fs/nilfs2/dat.c
>> @@ -28,6 +28,7 @@
>>  #include "mdt.h"
>>  #include "alloc.h"
>>  #include "dat.h"
>> +#include "sufile.h"
>>  
>>
>>  #define NILFS_CNO_MIN       ((__u64)1)
>> @@ -97,6 +98,7 @@ void nilfs_dat_commit_alloc(struct inode *dat, struct 
>> nilfs_palloc_req *req)
>>      entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
>>      entry->de_end = cpu_to_le64(NILFS_CNO_MAX);
>>      entry->de_blocknr = cpu_to_le64(0);
>> +    entry->de_ss = cpu_to_le64(0);
>>      kunmap_atomic(kaddr);
>>  
>>      nilfs_palloc_commit_alloc_entry(dat, req);
>> @@ -121,6 +123,7 @@ static void nilfs_dat_commit_free(struct inode *dat,
>>      entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
>>      entry->de_end = cpu_to_le64(NILFS_CNO_MIN);
>>      entry->de_blocknr = cpu_to_le64(0);
>> +    entry->de_ss = cpu_to_le64(0);
>>      kunmap_atomic(kaddr);
>>  
>>      nilfs_dat_commit_entry(dat, req);
>> @@ -201,6 +204,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct 
>> nilfs_palloc_req *req,
>>              WARN_ON(start > end);
>>      }
>>      entry->de_end = cpu_to_le64(end);
>> +    entry->de_ss = cpu_to_le64(NILFS_CNO_MAX);
>>      blocknr = le64_to_cpu(entry->de_blocknr);
>>      kunmap_atomic(kaddr);
>>  
>> @@ -365,6 +369,8 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, 
>> sector_t blocknr)
>>      }
>>      WARN_ON(blocknr == 0);
>>      entry->de_blocknr = cpu_to_le64(blocknr);
>> +    if (entry->de_ss == cpu_to_le64(NILFS_CNO_MAX))
>> +            entry->de_ss = cpu_to_le64(0);
>>      kunmap_atomic(kaddr);
>>  
>>      mark_buffer_dirty(entry_bh);
>> @@ -430,6 +436,86 @@ int nilfs_dat_translate(struct inode *dat, __u64 
>> vblocknr, sector_t *blocknrp)
>>      return ret;
>>  }
>>  
>> +void nilfs_dat_do_scan_dec(struct inode *dat, struct nilfs_palloc_req *req,
>> +                       void *data)
>> +{
>> +    struct nilfs_dat_entry *entry;
>> +    __u64 start, end, prev_ss;
>> +    __u64 *ssp = data, ss = ssp[0], prev = ssp[1], next = ssp[2];
>> +    sector_t blocknr;
>> +    void *kaddr;
>> +    struct the_nilfs *nilfs;
>> +
>> +    kaddr = kmap_atomic(req->pr_entry_bh->b_page);
>> +    entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
>> +                                         req->pr_entry_bh, kaddr);
>> +    start = le64_to_cpu(entry->de_start);
>> +    end = le64_to_cpu(entry->de_end);
>> +    blocknr = le64_to_cpu(entry->de_blocknr);
>> +    prev_ss = le64_to_cpu(entry->de_ss);
>> +
>> +    if (blocknr != 0 && end != NILFS_CNO_MAX && ss >= start && ss < end) {
> 
> I think that it makes sense to use small functions with clear names
> about what we check.

Ok.

>> +            if (prev_ss == ss || prev_ss == NILFS_CNO_MAX) {
>> +                    if (prev && prev >= start && prev < end)
>> +                            entry->de_ss = cpu_to_le64(prev);
>> +                    else if (next && next >= start && next < end)
>> +                            entry->de_ss = cpu_to_le64(next);
>> +                    else
>> +                            entry->de_ss = cpu_to_le64(0);
> 
> Ditto.
> 
>> +
>> +                    if (prev_ss != NILFS_CNO_MAX)
>> +                            prev_ss = le64_to_cpu(entry->de_ss);
>> +                    kunmap_atomic(kaddr);
>> +                    mark_buffer_dirty(req->pr_entry_bh);
>> +                    nilfs_mdt_mark_dirty(dat);
>> +            } else
>> +                    kunmap_atomic(kaddr);
>> +
>> +            if (prev_ss == 0) {
>> +                    nilfs = dat->i_sb->s_fs_info;
>> +                    nilfs_sufile_add_segment_usage(nilfs->ns_sufile,
>> +                            nilfs_get_segnum_of_block(nilfs, blocknr),
>> +                            -1, 0);
>> +            }
>> +    } else
>> +            kunmap_atomic(kaddr);
>> +}
>> +
>> +void nilfs_dat_do_scan_inc(struct inode *dat, struct nilfs_palloc_req *req,
>> +                       void *data)
>> +{
>> +    struct nilfs_dat_entry *entry;
>> +    __u64 start, end, prev_ss;
>> +    __u64 *ssp = data, ss = *ssp;
>> +    sector_t blocknr;
>> +    void *kaddr;
>> +    struct the_nilfs *nilfs;
>> +
>> +    kaddr = kmap_atomic(req->pr_entry_bh->b_page);
>> +    entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
>> +                    req->pr_entry_bh, kaddr);
>> +    start = le64_to_cpu(entry->de_start);
>> +    end = le64_to_cpu(entry->de_end);
>> +    blocknr = le64_to_cpu(entry->de_blocknr);
>> +    prev_ss = le64_to_cpu(entry->de_ss);
>> +
>> +    if (blocknr != 0 && end != NILFS_CNO_MAX && ss >= start && ss < end &&
>> +                    (prev_ss == 0 || prev_ss == NILFS_CNO_MAX)) {
> 
> Ditto. Moreover, you repeat this check.

What do you mean? Where do I repeat this check?

>> +
>> +            entry->de_ss = cpu_to_le64(ss);
>> +
>> +            kunmap_atomic(kaddr);
>> +            mark_buffer_dirty(req->pr_entry_bh);
>> +            nilfs_mdt_mark_dirty(dat);
>> +
>> +            nilfs = dat->i_sb->s_fs_info;
>> +            nilfs_sufile_add_segment_usage(nilfs->ns_sufile,
>> +                            nilfs_get_segnum_of_block(nilfs, blocknr),
> 
> Looks weird. Maybe, variable?

Ok.

Thanks for your review so far.

Best regards,
Andreas Rohner

> Thanks,
> Vyacheslav Dubeyko.
> 
>> +                            1, 0);
>> +    } else
>> +            kunmap_atomic(kaddr);
>> +}
>> +
>>  ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
>>                          size_t nvi)
>>  {
>> diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h
>> index cbd8e97..92a187e 100644
>> --- a/fs/nilfs2/dat.h
>> +++ b/fs/nilfs2/dat.h
>> @@ -55,5 +55,31 @@ ssize_t nilfs_dat_get_vinfo(struct inode *, void *, 
>> unsigned, size_t);
>>  
>>  int nilfs_dat_read(struct super_block *sb, size_t entry_size,
>>                 struct nilfs_inode *raw_inode, struct inode **inodep);
>> +void nilfs_dat_do_scan_dec(struct inode *, struct nilfs_palloc_req *, void 
>> *);
>> +void nilfs_dat_do_scan_inc(struct inode *, struct nilfs_palloc_req *, void 
>> *);
>> +
>> +/**
>> + * nilfs_dat_scan_dec_ss - scan all dat entries for a checkpoint dec suinfo
>> + * @dat: inode of dat file
>> + * @cno: snapshot number
>> + * @prev: previous snapshot number
>> + * @next: next snapshot number
>> + */
>> +static inline int nilfs_dat_scan_dec_ss(struct inode *dat, __u64 cno,
>> +                                    __u64 prev, __u64 next)
>> +{
>> +    __u64 data[3] = { cno, prev, next };
>> +    return nilfs_palloc_scan_entries(dat, nilfs_dat_do_scan_dec, data);
>> +}
>> +
>> +/**
>> + * nilfs_dat_scan_dec_ss - scan all dat entries for a checkpoint inc suinfo
>> + * @dat: inode of dat file
>> + * @cno: snapshot number
>> + */
>> +static inline int nilfs_dat_scan_inc_ss(struct inode *dat, __u64 cno)
>> +{
>> +    return nilfs_palloc_scan_entries(dat, nilfs_dat_do_scan_inc, &cno);
>> +}
>>  
>>  #endif      /* _NILFS_DAT_H */
>> diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
>> index ca269ad..ba9ebe02 100644
>> --- a/include/linux/nilfs2_fs.h
>> +++ b/include/linux/nilfs2_fs.h
>> @@ -475,13 +475,13 @@ struct nilfs_palloc_group_desc {
>>   * @de_blocknr: block number
>>   * @de_start: start checkpoint number
>>   * @de_end: end checkpoint number
>> - * @de_rsv: reserved for future use
>> + * @de_ss: one of the snapshots the block belongs to
>>   */
>>  struct nilfs_dat_entry {
>>      __le64 de_blocknr;
>>      __le64 de_start;
>>      __le64 de_end;
>> -    __le64 de_rsv;
>> +    __le64 de_ss;
>>  };
>>  
>>  #define NILFS_MIN_DAT_ENTRY_SIZE    32
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to