On 10/29/13, 9:56 AM, Josef Bacik wrote: > Btrfs has always had these filler extent data items for holes in inodes. This > has made somethings very easy, like logging hole punches and sending hole > punches. However for large holey files these extent data items are pure > overhead. So add an incompatible feature to no longer add hole extents to > reduce the amount of metadata used by these sort of files. This has a few > changes for logging and send obviously since they will need to detect holes > and > log/send the holes if there are any. I've tested this thoroughly with > xfstests > and it doesn't cause any issues with and without the incompat format set. > Thanks,
This sounds like it could be a candidate for the online-enable mask in my feature ioctl patchset. -Jeff > Signed-off-by: Josef Bacik <[email protected]> > --- > V1->V2: my snapshot exerciser pointed out I don't understand how INLINE > extents > work, fixed this. > > fs/btrfs/ctree.h | 4 +- > fs/btrfs/file.c | 13 ++++-- > fs/btrfs/inode.c | 78 ++++++++++++++++++++------------- > fs/btrfs/send.c | 122 > +++++++++++++++++++++++++++++++++++++++++++++------- > fs/btrfs/tree-log.c | 56 +++++++++++++++++++++--- > 5 files changed, 217 insertions(+), 56 deletions(-) > > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h > index 9f5e1cf..0a008d8 100644 > --- a/fs/btrfs/ctree.h > +++ b/fs/btrfs/ctree.h > @@ -521,6 +521,7 @@ struct btrfs_super_block { > #define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6) > #define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7) > #define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8) > +#define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9) > > #define BTRFS_FEATURE_COMPAT_SUPP 0ULL > #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL > @@ -532,7 +533,8 @@ struct btrfs_super_block { > BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ > BTRFS_FEATURE_INCOMPAT_RAID56 | \ > BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ > - BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) > + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \ > + BTRFS_FEATURE_INCOMPAT_NO_HOLES) > > /* > * A leaf is full of items. offset and size tell us where to find > diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c > index 728f56f..ec0a95a 100644 > --- a/fs/btrfs/file.c > +++ b/fs/btrfs/file.c > @@ -1963,11 +1963,13 @@ static int fill_holes(struct btrfs_trans_handle > *trans, struct inode *inode, > struct btrfs_key key; > int ret; > > + if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) > + goto out; > + > key.objectid = btrfs_ino(inode); > key.type = BTRFS_EXTENT_DATA_KEY; > key.offset = offset; > > - > ret = btrfs_search_slot(trans, root, &key, path, 0, 1); > if (ret < 0) > return ret; > @@ -2064,8 +2066,10 @@ static int btrfs_punch_hole(struct inode *inode, > loff_t offset, loff_t len) > u64 drop_end; > int ret = 0; > int err = 0; > + int rsv_count; > bool same_page = ((offset >> PAGE_CACHE_SHIFT) == > ((offset + len - 1) >> PAGE_CACHE_SHIFT)); > + bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); > > btrfs_wait_ordered_range(inode, offset, len); > > @@ -2157,9 +2161,10 @@ static int btrfs_punch_hole(struct inode *inode, > loff_t offset, loff_t len) > /* > * 1 - update the inode > * 1 - removing the extents in the range > - * 1 - adding the hole extent > + * 1 - adding the hole extent if no_holes isn't set > */ > - trans = btrfs_start_transaction(root, 3); > + rsv_count = no_holes ? 2 : 3; > + trans = btrfs_start_transaction(root, rsv_count); > if (IS_ERR(trans)) { > err = PTR_ERR(trans); > goto out_free; > @@ -2196,7 +2201,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t > offset, loff_t len) > btrfs_end_transaction(trans, root); > btrfs_btree_balance_dirty(root); > > - trans = btrfs_start_transaction(root, 3); > + trans = btrfs_start_transaction(root, rsv_count); > if (IS_ERR(trans)) { > ret = PTR_ERR(trans); > trans = NULL; > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index 30bdacd..38fa301 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -4204,6 +4204,49 @@ out: > return ret; > } > > +static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode, > + u64 offset, u64 len) > +{ > + struct btrfs_trans_handle *trans; > + int ret; > + > + /* > + * Still need to make sure the inode looks like it's been updated so > + * that any holes get logged if we fsync. > + */ > + if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) { > + BTRFS_I(inode)->last_trans = root->fs_info->generation; > + BTRFS_I(inode)->last_sub_trans = root->log_transid; > + BTRFS_I(inode)->last_log_commit = root->last_log_commit; > + return 0; > + } > + > + /* > + * 1 - for the one we're dropping > + * 1 - for the one we're adding > + * 1 - for updating the inode. > + */ > + trans = btrfs_start_transaction(root, 3); > + if (IS_ERR(trans)) > + return PTR_ERR(trans); > + > + ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1); > + if (ret) { > + btrfs_abort_transaction(trans, root, ret); > + btrfs_end_transaction(trans, root); > + return ret; > + } > + > + ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset, > + 0, 0, len, 0, len, 0, 0, 0); > + if (ret) > + btrfs_abort_transaction(trans, root, ret); > + else > + btrfs_update_inode(trans, root, inode); > + btrfs_end_transaction(trans, root); > + return ret; > +} > + > /* > * This function puts in dummy file extents for the area we're creating a > hole > * for. So if we are truncating this file to a larger size we need to insert > @@ -4212,7 +4255,6 @@ out: > */ > int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) > { > - struct btrfs_trans_handle *trans; > struct btrfs_root *root = BTRFS_I(inode)->root; > struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; > struct extent_map *em = NULL; > @@ -4267,31 +4309,10 @@ int btrfs_cont_expand(struct inode *inode, loff_t > oldsize, loff_t size) > struct extent_map *hole_em; > hole_size = last_byte - cur_offset; > > - trans = btrfs_start_transaction(root, 3); > - if (IS_ERR(trans)) { > - err = PTR_ERR(trans); > - break; > - } > - > - err = btrfs_drop_extents(trans, root, inode, > - cur_offset, > - cur_offset + hole_size, 1); > - if (err) { > - btrfs_abort_transaction(trans, root, err); > - btrfs_end_transaction(trans, root); > - break; > - } > - > - err = btrfs_insert_file_extent(trans, root, > - btrfs_ino(inode), cur_offset, 0, > - 0, hole_size, 0, hole_size, > - 0, 0, 0); > - if (err) { > - btrfs_abort_transaction(trans, root, err); > - btrfs_end_transaction(trans, root); > + err = maybe_insert_hole(root, inode, cur_offset, > + hole_size); > + if (err) > break; > - } > - > btrfs_drop_extent_cache(inode, cur_offset, > cur_offset + hole_size - 1, 0); > hole_em = alloc_extent_map(); > @@ -4310,7 +4331,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t > oldsize, loff_t size) > hole_em->ram_bytes = hole_size; > hole_em->bdev = root->fs_info->fs_devices->latest_bdev; > hole_em->compress_type = BTRFS_COMPRESS_NONE; > - hole_em->generation = trans->transid; > + hole_em->generation = root->fs_info->generation; > > while (1) { > write_lock(&em_tree->lock); > @@ -4323,17 +4344,14 @@ int btrfs_cont_expand(struct inode *inode, loff_t > oldsize, loff_t size) > hole_size - 1, 0); > } > free_extent_map(hole_em); > -next: > - btrfs_update_inode(trans, root, inode); > - btrfs_end_transaction(trans, root); > } > +next: > free_extent_map(em); > em = NULL; > cur_offset = last_byte; > if (cur_offset >= block_end) > break; > } > - > free_extent_map(em); > unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, > GFP_NOFS); > diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c > index 5a3874c..2a2677b 100644 > --- a/fs/btrfs/send.c > +++ b/fs/btrfs/send.c > @@ -111,6 +111,7 @@ struct send_ctx { > int cur_inode_deleted; > u64 cur_inode_size; > u64 cur_inode_mode; > + u64 cur_inode_last_extent; > > u64 send_progress; > > @@ -146,6 +147,13 @@ struct name_cache_entry { > char name[]; > }; > > +static int need_send_hole(struct send_ctx *sctx) > +{ > + return (sctx->parent_root && !sctx->cur_inode_new && > + !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted && > + S_ISREG(sctx->cur_inode_mode)); > +} > + > static void fs_path_reset(struct fs_path *p) > { > if (p->reversed) { > @@ -3776,6 +3784,43 @@ out: > return ret; > } > > +static int send_hole(struct send_ctx *sctx, u64 end) > +{ > + struct fs_path *p = NULL; > + u64 offset = sctx->cur_inode_last_extent; > + u64 len; > + int ret; > + > + p = fs_path_alloc(); > + if (!p) > + return -ENOMEM; > + ret = open_cur_inode_file(sctx); > + if (ret < 0) > + goto out; > + memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE); > + while (offset < end) { > + len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE); > + > + ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); > + if (ret < 0) > + break; > + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); > + if (ret < 0) > + break; > + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); > + TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); > + TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); > + ret = send_cmd(sctx); > + if (ret < 0) > + break; > + offset += len; > + } > +tlv_put_failure: > +out: > + fs_path_free(p); > + return ret; > +} > + > static int send_write_or_clone(struct send_ctx *sctx, > struct btrfs_path *path, > struct btrfs_key *key, > @@ -4008,26 +4053,32 @@ static int process_extent(struct send_ctx *sctx, > struct btrfs_key *key) > { > struct clone_root *found_clone = NULL; > + struct btrfs_file_extent_item *ei; > + u64 len; > + u8 type; > int ret = 0; > > if (S_ISLNK(sctx->cur_inode_mode)) > return 0; > > + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], > + struct btrfs_file_extent_item); > + type = btrfs_file_extent_type(path->nodes[0], ei); > + if (type == BTRFS_FILE_EXTENT_INLINE) > + len = ALIGN(btrfs_file_extent_inline_len(path->nodes[0], ei), > + sctx->send_root->sectorsize); > + else > + len = btrfs_file_extent_num_bytes(path->nodes[0], ei); > + > if (sctx->parent_root && !sctx->cur_inode_new) { > ret = is_extent_unchanged(sctx, path, key); > if (ret < 0) > goto out; > if (ret) { > ret = 0; > - goto out; > + goto out_hole; > } > } else { > - struct btrfs_file_extent_item *ei; > - u8 type; > - > - ei = btrfs_item_ptr(path->nodes[0], path->slots[0], > - struct btrfs_file_extent_item); > - type = btrfs_file_extent_type(path->nodes[0], ei); > if (type == BTRFS_FILE_EXTENT_PREALLOC || > type == BTRFS_FILE_EXTENT_REG) { > /* > @@ -4055,7 +4106,12 @@ static int process_extent(struct send_ctx *sctx, > goto out; > > ret = send_write_or_clone(sctx, path, key, found_clone); > - > + if (ret) > + goto out; > +out_hole: > + if (need_send_hole(sctx) && sctx->cur_inode_last_extent != key->offset) > + ret = send_hole(sctx, key->offset); > + sctx->cur_inode_last_extent = key->offset + len; > out: > return ret; > } > @@ -4181,6 +4237,12 @@ static int finish_inode_if_needed(struct send_ctx > *sctx, int at_end) > } > > if (S_ISREG(sctx->cur_inode_mode)) { > + if (need_send_hole(sctx) && > + sctx->cur_inode_last_extent < sctx->cur_inode_size) { > + ret = send_hole(sctx, sctx->cur_inode_size); > + if (ret) > + goto out; > + } > ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen, > sctx->cur_inode_size); > if (ret < 0) > @@ -4228,6 +4290,7 @@ static int changed_inode(struct send_ctx *sctx, > > sctx->cur_ino = key->objectid; > sctx->cur_inode_new_gen = 0; > + sctx->cur_inode_last_extent = 0; > > /* > * Set send_progress to current inode. This will tell all get_cur_xxx > @@ -4492,6 +4555,31 @@ out: > return ret; > } > > +static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, > + struct btrfs_key *key) > +{ > + struct btrfs_file_extent_item *fi; > + u64 len; > + u8 type; > + int ret = 0; > + > + if (sctx->cur_ino != key->objectid || !need_send_hole(sctx)) > + return 0; > + > + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], > + struct btrfs_file_extent_item); > + type = btrfs_file_extent_type(path->nodes[0], fi); > + if (type == BTRFS_FILE_EXTENT_INLINE) > + len = ALIGN(btrfs_file_extent_inline_len(path->nodes[0], fi), > + sctx->send_root->sectorsize); > + else > + len = btrfs_file_extent_num_bytes(path->nodes[0], fi); > + if (sctx->cur_inode_last_extent != key->offset) > + ret = send_hole(sctx, key->offset); > + sctx->cur_inode_last_extent = key->offset + len; > + return ret; > +} > + > /* > * Updates compare related fields in sctx and simply forwards to the actual > * changed_xxx functions. > @@ -4508,14 +4596,18 @@ static int changed_cb(struct btrfs_root *left_root, > struct send_ctx *sctx = ctx; > > if (result == BTRFS_COMPARE_TREE_SAME) { > - if (key->type != BTRFS_INODE_REF_KEY && > - key->type != BTRFS_INODE_EXTREF_KEY) > - return 0; > - ret = compare_refs(sctx, left_path, key); > - if (!ret) > + if (key->type == BTRFS_INODE_REF_KEY || > + key->type == BTRFS_INODE_EXTREF_KEY) { > + ret = compare_refs(sctx, left_path, key); > + if (!ret) > + return 0; > + if (ret < 0) > + return ret; > + } else if (key->type == BTRFS_EXTENT_DATA_KEY) { > + return maybe_send_hole(sctx, left_path, key); > + } else { > return 0; > - if (ret < 0) > - return ret; > + } > result = BTRFS_COMPARE_TREE_CHANGED; > ret = 0; > } > diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c > index 1134aa4..3c8564a 100644 > --- a/fs/btrfs/tree-log.c > +++ b/fs/btrfs/tree-log.c > @@ -3188,7 +3188,7 @@ static int log_inode_item(struct btrfs_trans_handle > *trans, > static noinline int copy_items(struct btrfs_trans_handle *trans, > struct inode *inode, > struct btrfs_path *dst_path, > - struct extent_buffer *src, > + struct extent_buffer *src, u64 *last_extent, > int start_slot, int nr, int inode_only) > { > unsigned long src_offset; > @@ -3203,6 +3203,7 @@ static noinline int copy_items(struct > btrfs_trans_handle *trans, > int i; > struct list_head ordered_sums; > int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; > + bool has_extents = false; > > INIT_LIST_HEAD(&ordered_sums); > > @@ -3242,6 +3243,8 @@ static noinline int copy_items(struct > btrfs_trans_handle *trans, > src_offset, ins_sizes[i]); > } > > + if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) > + has_extents = true; > /* take a reference on file data extents so that truncates > * or deletes of this inode don't have to relog the inode > * again > @@ -3306,6 +3309,46 @@ static noinline int copy_items(struct > btrfs_trans_handle *trans, > list_del(&sums->list); > kfree(sums); > } > + > + if (!has_extents) > + return ret; > + > + /* > + * Ok so here we need to go through and fill in any holes we may have > + * to make sure that holes are punched for those areas in case they had > + * extents previously. > + */ > + for (i = start_slot; i < start_slot + nr; i++) { > + struct btrfs_key key; > + u64 offset, len; > + u64 extent_end; > + > + btrfs_item_key_to_cpu(src, &key, i); > + if (key.type != BTRFS_EXTENT_DATA_KEY) > + continue; > + extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item); > + if (btrfs_file_extent_type(src, extent) == > + BTRFS_FILE_EXTENT_INLINE) { > + len = btrfs_file_extent_inline_len(src, extent); > + extent_end = ALIGN(key.offset + len, log->sectorsize); > + } else { > + len = btrfs_file_extent_num_bytes(src, extent); > + extent_end = key.offset + len; > + } > + > + if (*last_extent == key.offset) { > + *last_extent = extent_end; > + continue; > + } > + offset = *last_extent; > + len = key.offset - *last_extent; > + ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode), > + offset, 0, 0, len, 0, len, 0, > + 0, 0); > + if (ret) > + break; > + *last_extent = extent_end; > + } > return ret; > } > > @@ -3624,6 +3667,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle > *trans, > struct btrfs_key max_key; > struct btrfs_root *log = root->log_root; > struct extent_buffer *src = NULL; > + u64 last_extent = 0; > int err = 0; > int ret; > int nritems; > @@ -3738,8 +3782,8 @@ again: > goto next_slot; > } > > - ret = copy_items(trans, inode, dst_path, src, ins_start_slot, > - ins_nr, inode_only); > + ret = copy_items(trans, inode, dst_path, src, &last_extent, > + ins_start_slot, ins_nr, inode_only); > if (ret) { > err = ret; > goto out_unlock; > @@ -3757,7 +3801,7 @@ next_slot: > } > if (ins_nr) { > ret = copy_items(trans, inode, dst_path, src, > - ins_start_slot, > + &last_extent, ins_start_slot, > ins_nr, inode_only); > if (ret) { > err = ret; > @@ -3777,8 +3821,8 @@ next_slot: > } > } > if (ins_nr) { > - ret = copy_items(trans, inode, dst_path, src, ins_start_slot, > - ins_nr, inode_only); > + ret = copy_items(trans, inode, dst_path, src, &last_extent, > + ins_start_slot, ins_nr, inode_only); > if (ret) { > err = ret; > goto out_unlock; > -- Jeff Mahoney SUSE Labs
signature.asc
Description: OpenPGP digital signature
