Add 'Decrement refcount for delete' in to the normal truncate process. So for a refcounted extent record, call refcount rec decrementation instead of cluster free.
Signed-off-by: Tao Ma <[email protected]> --- fs/ocfs2/alloc.c | 55 ++++++++++++-- fs/ocfs2/journal.h | 4 + fs/ocfs2/refcounttree.c | 195 +++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/refcounttree.h | 6 ++ 4 files changed, 253 insertions(+), 7 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index bbea71c..e8ff5f7 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6547,7 +6547,7 @@ out: */ static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path, handle_t *handle, struct ocfs2_truncate_context *tc, - u32 clusters_to_del, u64 *delete_start) + u32 clusters_to_del, u64 *delete_start, u8 *flags) { int ret, i, index = path->p_tree_depth; u32 new_edge = 0; @@ -6557,6 +6557,7 @@ static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path, struct ocfs2_extent_rec *rec; *delete_start = 0; + *flags = 0; while (index >= 0) { bh = path->p_node[index].bh; @@ -6644,6 +6645,7 @@ find_tail_record: *delete_start = le64_to_cpu(rec->e_blkno) + ocfs2_clusters_to_blocks(inode->i_sb, le16_to_cpu(rec->e_leaf_clusters)); + *flags = rec->e_flags; /* * If it's now empty, remove this record. @@ -6743,7 +6745,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, struct buffer_head *fe_bh, handle_t *handle, struct ocfs2_truncate_context *tc, - struct ocfs2_path *path) + struct ocfs2_path *path, + struct ocfs2_alloc_context *meta_ac) { int status; struct ocfs2_dinode *fe; @@ -6751,6 +6754,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, struct ocfs2_extent_list *el; struct buffer_head *last_eb_bh = NULL; u64 delete_blk = 0; + u8 rec_flags; fe = (struct ocfs2_dinode *) fe_bh->b_data; @@ -6806,7 +6810,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, inode->i_blocks = ocfs2_inode_sector_count(inode); status = ocfs2_trim_tree(inode, path, handle, tc, - clusters_to_del, &delete_blk); + clusters_to_del, &delete_blk, &rec_flags); if (status) { mlog_errno(status); goto bail; @@ -6838,8 +6842,16 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, } if (delete_blk) { - status = ocfs2_truncate_log_append(osb, handle, delete_blk, - clusters_to_del); + if (rec_flags & OCFS2_EXT_REFCOUNTED) + status = ocfs2_decrease_refcount(inode, fe_bh, handle, + ocfs2_blocks_to_clusters(osb->sb, + delete_blk), + clusters_to_del, meta_ac, + &tc->tc_dealloc); + else + status = ocfs2_truncate_log_append(osb, handle, + delete_blk, + clusters_to_del); if (status < 0) { mlog_errno(status); goto bail; @@ -7257,11 +7269,13 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, { int status, i, credits, tl_sem = 0; u32 clusters_to_del, new_highest_cpos, range; + u64 blkno = 0; struct ocfs2_extent_list *el; handle_t *handle = NULL; struct inode *tl_inode = osb->osb_tl_inode; struct ocfs2_path *path = NULL; struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; + struct ocfs2_alloc_context *meta_ac = NULL; mlog_entry_void(); @@ -7287,6 +7301,8 @@ start: goto bail; } + credits = 0; + /* * Truncate always works against the rightmost tree branch. */ @@ -7327,10 +7343,15 @@ start: clusters_to_del = 0; } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]); + blkno = le64_to_cpu(el->l_recs[i].e_blkno); } else if (range > new_highest_cpos) { clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) + le32_to_cpu(el->l_recs[i].e_cpos)) - new_highest_cpos; + blkno = le64_to_cpu(el->l_recs[i].e_blkno) + + ocfs2_clusters_to_blocks(inode->i_sb, + ocfs2_rec_clusters(el, &el->l_recs[i]) - + clusters_to_del); } else { status = 0; goto bail; @@ -7339,6 +7360,18 @@ start: mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr); + if (el->l_recs[i].e_flags & OCFS2_EXT_REFCOUNTED && clusters_to_del) { + status = ocfs2_prepare_refcount_change_for_del(inode, fe_bh, + blkno, + clusters_to_del, + &credits, + &meta_ac); + if (status < 0) { + mlog_errno(status); + goto bail; + } + } + mutex_lock(&tl_inode->i_mutex); tl_sem = 1; /* ocfs2_truncate_log_needs_flush guarantees us at least one @@ -7352,7 +7385,7 @@ start: } } - credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, + credits += ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, (struct ocfs2_dinode *)fe_bh->b_data, el); handle = ocfs2_start_trans(osb, credits); @@ -7364,7 +7397,7 @@ start: } status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle, - tc, path); + tc, path, meta_ac); if (status < 0) { mlog_errno(status); goto bail; @@ -7378,6 +7411,11 @@ start: ocfs2_reinit_path(path, 1); + if (meta_ac) { + ocfs2_free_alloc_context(meta_ac); + meta_ac = NULL; + } + /* * The check above will catch the case where we've truncated * away all allocation. @@ -7398,6 +7436,9 @@ bail: if (handle) ocfs2_commit_trans(osb, handle); + if (meta_ac) + ocfs2_free_alloc_context(meta_ac); + ocfs2_run_deallocs(osb, &tc->tc_dealloc); ocfs2_free_path(path); diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index bfd5942..dbd7888 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -485,6 +485,10 @@ static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb) #define OCFS2_REFCOUNT_TREE_CREATE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \ + OCFS2_SUBALLOC_ALLOC + 1) #define OCFS2_REFCOUNT_TREE_SET_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) + +/* 2 metadata alloc, 2 new blocks and root refcount block */ +#define OCFS2_EXPAND_REFCOUNT_TREE_CREDITS (OCFS2_SUBALLOC_ALLOC * 2 + 3) + /* * Please note that the caller must make sure that root_el is the root * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 8dc5f3c..b56d083 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -1550,3 +1550,198 @@ static int ocfs2_mark_extent_refcounted(struct inode *inode, out: return ret; } + +/* + * Given some contiguous physical clusters, calculate what we need + * for modifying their refcount. + */ +static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, + struct ocfs2_caching_info *ci, + struct buffer_head *ref_root_bh, + u64 start_cpos, + u32 clusters, + int *meta_add, + int *credits) +{ + int ret = 0, index, ref_blocks = 0, recs_add = 0; + u64 cpos = start_cpos; + struct ocfs2_refcount_block *rb; + struct ocfs2_refcount_rec rec; + struct buffer_head *ref_leaf_bh = NULL, *prev_bh = NULL; + u32 len; + + mlog(0, "start_cpos %llu, clusters %u\n", + (unsigned long long)start_cpos, clusters); + while (clusters) { + ret = ocfs2_get_refcount_rec(ci, ref_root_bh, + cpos, clusters, &rec, + &index, &ref_leaf_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data; + + mlog(0, "recs_add %d,cpos %llu, clusters %u, rec->r_cpos %llu," + "rec->r_clusters %u, rec->r_refcount %u, index %d\n", + recs_add, (unsigned long long)cpos, clusters, + (unsigned long long)le64_to_cpu(rec.r_cpos), + le32_to_cpu(rec.r_clusters), + le32_to_cpu(rec.r_refcount), index); + + len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + + le32_to_cpu(rec.r_clusters)) - cpos; + /* + * If the refcount rec already exist, cool. We just need + * to check whether there is a split. + * If we will insert one, check whether we have space. + * + * We record all the records which will be inserted to the + * same refcount block, so that we can tell exactly whether + * we need a new refcount block or not. + */ + if (rec.r_refcount) { + /* Check whether we need a split at the beginning. */ + if (cpos == start_cpos && + cpos != le64_to_cpu(rec.r_cpos)) { + recs_add++; + + if (le64_to_cpu(rb->rf_records.rl_used) + + recs_add > + le16_to_cpu(rb->rf_records.rl_count)) + ref_blocks++; + } + + /* Check whether we need a split in the end. */ + if (cpos + clusters > le64_to_cpu(rec.r_cpos) + + le32_to_cpu(rec.r_clusters)) { + recs_add++; + + if (le64_to_cpu(rb->rf_records.rl_used) + + recs_add > + le16_to_cpu(rb->rf_records.rl_count)) + ref_blocks++; + } + } else { + recs_add++; + + if (le64_to_cpu(rb->rf_records.rl_used) + recs_add > + le16_to_cpu(rb->rf_records.rl_count)) + ref_blocks++; + } + + if (ref_leaf_bh != prev_bh) { + mlog(0, "ref block %llu, rl_used %u, rl_count %u\n", + (unsigned long long)ref_leaf_bh->b_blocknr, + le16_to_cpu(rb->rf_records.rl_used), + le16_to_cpu(rb->rf_records.rl_count)); + recs_add = 0; + *credits += 1; + brelse(prev_bh); + prev_bh = ref_leaf_bh; + ref_leaf_bh = NULL; + } + + clusters -= len; + cpos += len; + } + + mlog(0, "we need ref_blocks %d\n", ref_blocks); + *meta_add += ref_blocks; + + if (!ref_blocks) + goto out; + + /* + * So we may need ref_blocks to insert into the tree. + * That also means we need to change the b-tree and add that number + * of records since we never merge them. + * We need one more block for expansion since the new created leaf + * block is also full and needs split. + */ + rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; + if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_LEAF_FL) { + *credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; + *meta_add += 1; + } else { + struct ocfs2_extent_tree et; + + ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); + *credits += ocfs2_calc_extend_credits(sb, + et.et_root_el, + ref_blocks); + } + +out: + brelse(ref_leaf_bh); + brelse(prev_bh); + return ret; +} + +/* + * For refcount tree, we will decrease some contiguous clusters + * refcount count, so just go through it to see how many blocks + * we gonna touch and whether we need to create new blocks. + * + * Normally the refcount blocks store these refcount should be + * continguous also, so that we can get the number easily. + * As for meta_ac, we will at most add split 2 refcount record and + * 2 more refcount block, so just check it in a rough way. + */ +int ocfs2_prepare_refcount_change_for_del(struct inode *inode, + struct buffer_head *di_bh, + u64 phys_blkno, + u32 clusters, + int *credits, + struct ocfs2_alloc_context **meta_ac) +{ + int ret, ref_blocks = 0; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; + struct ocfs2_inode_info *oi = OCFS2_I(inode); + struct buffer_head *ref_root_bh = NULL; + u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno); + + if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) { + ocfs2_error(inode->i_sb, "Inode %lu want to use refcount " + "tree, but the feature bit is not set in the " + "super block.", inode->i_ino); + ret = -EROFS; + goto out; + } + + BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); + BUG_ON(!di->i_refcount_loc); + + ret = ocfs2_read_refcount_block(INODE_CACHE(inode), + le64_to_cpu(di->i_refcount_loc), + &ref_root_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_calc_refcount_meta_credits(inode->i_sb, + INODE_CACHE(inode), + ref_root_bh, + start_cpos, clusters, + &ref_blocks, credits); + if (ret) { + mlog_errno(ret); + goto out; + } + + mlog(0, "reserve new metadata %d, credits = %d\n", + ref_blocks, *credits); + + if (ref_blocks) { + ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), + ref_blocks, meta_ac); + if (ret) + mlog_errno(ret); + } + +out: + brelse(ref_root_bh); + return ret; +} diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index 92fe116..0fdf726 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h @@ -27,4 +27,10 @@ int ocfs2_decrease_refcount(struct inode *inode, struct buffer_head *di_bh, handle_t *handle, u32 cpos, u32 len, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc); +int ocfs2_prepare_refcount_change_for_del(struct inode *inode, + struct buffer_head *di_bh, + u64 phys_blkno, + u32 clusters, + int *credits, + struct ocfs2_alloc_context **meta_ac); #endif /* OCFS2_REFCOUNTTREE_H */ -- 1.6.2.rc2.16.gf474c _______________________________________________ Ocfs2-devel mailing list [email protected] http://oss.oracle.com/mailman/listinfo/ocfs2-devel
