2017-12-20 0:23 GMT+03:00 Darrick J. Wong <darrick.w...@oracle.com>: > On Tue, Dec 19, 2017 at 01:02:44PM +0300, Timofey Titovets wrote: >> At now btrfs_dedupe_file_range() restricted to 16MiB range for >> limit locking time and memory requirement for dedup ioctl() >> >> For too big input range code silently set range to 16MiB >> >> Let's remove that restriction by do iterating over dedup range. >> That's backward compatible and will not change anything for request >> less then 16MiB. >> >> Changes: >> v1 -> v2: >> - Refactor btrfs_cmp_data_prepare and btrfs_extent_same >> - Store memory of pages array between iterations >> - Lock inodes once, not on each iteration >> - Small inplace cleanups > > /me wonders if you could take advantage of vfs_clone_file_prep_inodes, > which takes care of the content comparison (and flushing files, and inode > checks, etc.) ? > > (ISTR Qu Wenruo(??) or someone remarking that this might not work well > with btrfs locking model, but I could be mistaken about all that...) > > --D
Sorry, not enough knowledge to give an authoritative answer. I can only say that, i try lightly test that, by add call before btrfs_extent_same() with inode_locks, at least that works. Thanks. >> >> Signed-off-by: Timofey Titovets <nefelim...@gmail.com> >> --- >> fs/btrfs/ioctl.c | 160 >> ++++++++++++++++++++++++++++++++----------------------- >> 1 file changed, 94 insertions(+), 66 deletions(-) >> >> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c >> index be5bd81b3669..45a47d0891fc 100644 >> --- a/fs/btrfs/ioctl.c >> +++ b/fs/btrfs/ioctl.c >> @@ -2965,8 +2965,8 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp) >> put_page(pg); >> } >> } >> - kfree(cmp->src_pages); >> - kfree(cmp->dst_pages); >> + >> + cmp->num_pages = 0; >> } >> >> static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, >> @@ -2974,41 +2974,22 @@ static int btrfs_cmp_data_prepare(struct inode *src, >> u64 loff, >> u64 len, struct cmp_pages *cmp) >> { >> int ret; >> - int num_pages = PAGE_ALIGN(len) >> PAGE_SHIFT; >> - struct page **src_pgarr, **dst_pgarr; >> - >> - /* >> - * We must gather up all the pages before we initiate our >> - * extent locking. We use an array for the page pointers. Size >> - * of the array is bounded by len, which is in turn bounded by >> - * BTRFS_MAX_DEDUPE_LEN. >> - */ >> - src_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); >> - dst_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); >> - if (!src_pgarr || !dst_pgarr) { >> - kfree(src_pgarr); >> - kfree(dst_pgarr); >> - return -ENOMEM; >> - } >> - cmp->num_pages = num_pages; >> - cmp->src_pages = src_pgarr; >> - cmp->dst_pages = dst_pgarr; >> >> /* >> * If deduping ranges in the same inode, locking rules make it >> mandatory >> * to always lock pages in ascending order to avoid deadlocks with >> * concurrent tasks (such as starting writeback/delalloc). >> */ >> - if (src == dst && dst_loff < loff) { >> - swap(src_pgarr, dst_pgarr); >> + if (src == dst && dst_loff < loff) >> swap(loff, dst_loff); >> - } >> >> - ret = gather_extent_pages(src, src_pgarr, cmp->num_pages, loff); >> + cmp->num_pages = PAGE_ALIGN(len) >> PAGE_SHIFT; >> + >> + ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff); >> if (ret) >> goto out; >> >> - ret = gather_extent_pages(dst, dst_pgarr, cmp->num_pages, dst_loff); >> + ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, >> dst_loff); >> >> out: >> if (ret) >> @@ -3078,31 +3059,23 @@ static int extent_same_check_offsets(struct inode >> *inode, u64 off, u64 *plen, >> return 0; >> } >> >> -static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, >> - struct inode *dst, u64 dst_loff) >> +static int __btrfs_extent_same(struct inode *src, u64 loff, u64 olen, >> + struct inode *dst, u64 dst_loff, >> + struct cmp_pages *cmp) >> { >> int ret; >> u64 len = olen; >> - struct cmp_pages cmp; >> bool same_inode = (src == dst); >> u64 same_lock_start = 0; >> u64 same_lock_len = 0; >> >> - if (len == 0) >> - return 0; >> - >> - if (same_inode) >> - inode_lock(src); >> - else >> - btrfs_double_inode_lock(src, dst); >> - >> ret = extent_same_check_offsets(src, loff, &len, olen); >> if (ret) >> - goto out_unlock; >> + return ret; >> >> ret = extent_same_check_offsets(dst, dst_loff, &len, olen); >> if (ret) >> - goto out_unlock; >> + return ret; >> >> if (same_inode) { >> /* >> @@ -3119,32 +3092,21 @@ static int btrfs_extent_same(struct inode *src, u64 >> loff, u64 olen, >> * allow an unaligned length so long as it ends at >> * i_size. >> */ >> - if (len != olen) { >> - ret = -EINVAL; >> - goto out_unlock; >> - } >> + if (len != olen) >> + return -EINVAL; >> >> /* Check for overlapping ranges */ >> - if (dst_loff + len > loff && dst_loff < loff + len) { >> - ret = -EINVAL; >> - goto out_unlock; >> - } >> + if (dst_loff + len > loff && dst_loff < loff + len) >> + return -EINVAL; >> >> same_lock_start = min_t(u64, loff, dst_loff); >> same_lock_len = max_t(u64, loff, dst_loff) + len - >> same_lock_start; >> } >> >> - /* don't make the dst file partly checksummed */ >> - if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != >> - (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) { >> - ret = -EINVAL; >> - goto out_unlock; >> - } >> - >> again: >> - ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); >> + ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, cmp); >> if (ret) >> - goto out_unlock; >> + return ret; >> >> if (same_inode) >> ret = lock_extent_range(src, same_lock_start, same_lock_len, >> @@ -3165,7 +3127,7 @@ static int btrfs_extent_same(struct inode *src, u64 >> loff, u64 olen, >> * Ranges in the io trees already unlocked. Now unlock all >> * pages before waiting for all IO to complete. >> */ >> - btrfs_cmp_data_free(&cmp); >> + btrfs_cmp_data_free(cmp); >> if (same_inode) { >> btrfs_wait_ordered_range(src, same_lock_start, >> same_lock_len); >> @@ -3178,12 +3140,12 @@ static int btrfs_extent_same(struct inode *src, u64 >> loff, u64 olen, >> ASSERT(ret == 0); >> if (WARN_ON(ret)) { >> /* ranges in the io trees already unlocked */ >> - btrfs_cmp_data_free(&cmp); >> + btrfs_cmp_data_free(cmp); >> return ret; >> } >> >> /* pass original length for comparison so we stay within i_size */ >> - ret = btrfs_cmp_data(olen, &cmp); >> + ret = btrfs_cmp_data(olen, cmp); >> if (ret == 0) >> ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); >> >> @@ -3193,8 +3155,79 @@ static int btrfs_extent_same(struct inode *src, u64 >> loff, u64 olen, >> else >> btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); >> >> - btrfs_cmp_data_free(&cmp); >> -out_unlock: >> + btrfs_cmp_data_free(cmp); >> + >> + return ret; >> +} >> + >> +#define BTRFS_MAX_DEDUPE_LEN SZ_16M >> + >> +static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, >> + struct inode *dst, u64 dst_loff) >> +{ >> + int ret; >> + int num_pages; >> + bool same_inode = (src == dst); >> + u64 i, tail_len, chunk_count; >> + struct cmp_pages cmp; >> + >> + if (olen == 0) >> + return 0; >> + >> + /* don't make the dst file partly checksummed */ >> + if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != >> + (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) { >> + return -EINVAL; >> + } >> + >> + if (same_inode) >> + inode_lock(src); >> + else >> + btrfs_double_inode_lock(src, dst); >> + >> + tail_len = olen % BTRFS_MAX_DEDUPE_LEN; >> + chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN); >> + >> + if (chunk_count > 0) { >> + num_pages = PAGE_ALIGN(BTRFS_MAX_DEDUPE_LEN) >> PAGE_SHIFT; >> + } else { >> + num_pages = PAGE_ALIGN(tail_len) >> PAGE_SHIFT; >> + } >> + /* >> + * We must gather up all the pages before we initiate our >> + * extent locking. We use an array for the page pointers. Size >> + * of the array is bounded by len, which is in turn bounded by >> + * BTRFS_MAX_DEDUPE_LEN. >> + */ >> + ret = -ENOMEM; >> + cmp.src_pages = kcalloc(num_pages, sizeof(struct page *), >> + GFP_KERNEL); >> + if (!cmp.src_pages) >> + goto out; >> + cmp.dst_pages = kcalloc(num_pages, sizeof(struct page *), >> + GFP_KERNEL); >> + if (!cmp.dst_pages) >> + goto out; >> + >> + >> + for (i = 0; i < chunk_count; i++) { >> + ret = __btrfs_extent_same(src, loff, BTRFS_MAX_DEDUPE_LEN, >> + dst, dst_loff, &cmp); >> + if (ret) >> + goto out; >> + >> + loff += BTRFS_MAX_DEDUPE_LEN; >> + dst_loff += BTRFS_MAX_DEDUPE_LEN; >> + } >> + >> + if (tail_len > 0) >> + ret = __btrfs_extent_same(src, loff, tail_len, >> + dst, dst_loff, &cmp); >> + >> +out: >> + kfree(cmp.src_pages); >> + kfree(cmp.dst_pages); >> + >> if (same_inode) >> inode_unlock(src); >> else >> @@ -3203,8 +3236,6 @@ static int btrfs_extent_same(struct inode *src, u64 >> loff, u64 olen, >> return ret; >> } >> >> -#define BTRFS_MAX_DEDUPE_LEN SZ_16M >> - >> ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, >> struct file *dst_file, u64 dst_loff) >> { >> @@ -3213,9 +3244,6 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, >> u64 loff, u64 olen, >> u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; >> ssize_t res; >> >> - if (olen > BTRFS_MAX_DEDUPE_LEN) >> - olen = BTRFS_MAX_DEDUPE_LEN; >> - >> if (WARN_ON_ONCE(bs < PAGE_SIZE)) { >> /* >> * Btrfs does not support blocksize < page_size. As a >> -- >> 2.15.1 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in >> the body of a message to majord...@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html -- Have a nice day, Timofey. -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html