Hi Josef, are you fine with V2?
On Thu, Oct 13, 2016 at 05:31:25PM +0800, Wang Xiaoguang wrote: > Since commit b02441999efcc6152b87cd58e7970bb7843f76cf, we don't wait all > ordered extents, but I run into some enospc errors when doing large file > create and delete tests, it's because shrink_delalloc() does not write > enough delalloc bytes and wait them finished: > From: Miao Xie <mi...@cn.fujitsu.com> > Date: Mon, 4 Nov 2013 23:13:25 +0800 > Subject: [PATCH] Btrfs: don't wait for the completion of all the ordered > extents > > It is very likely that there are lots of ordered extents in the filesytem, > if we wait for the completion of all of them when we want to reclaim some > space for the metadata space reservation, we would be blocked for a long > time. The performance would drop down suddenly for a long time. > > Here we introduce a simple reclaim_priority variable, the lower the > value, the higher the priority, 0 is the maximum priority. The core > idea is: > delalloc_bytes = > percpu_counter_sum_positive(&root->fs_info->delalloc_bytes); > if (reclaim_priority) > to_reclaim = orig * (2 << (BTRFS_DEFAULT_FLUSH_PRIORITY - > reclaim_priority)); > else > to_reclaim = delalloc_bytes; > > Here 'orig' is the number of metadata we want to reserve, and as the priority > increases, we will try wo write more delalloc bytes, meanwhile if > "reclaim_priority == 0" returns true, we'll also wait all current ordered > extents to finish. > > Signed-off-by: Wang Xiaoguang <wangxg.f...@cn.fujitsu.com> > --- > fs/btrfs/extent-tree.c | 63 > ++++++++++++++++++++++++++------------------ > include/trace/events/btrfs.h | 11 +++----- > 2 files changed, 42 insertions(+), 32 deletions(-) > > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c > index e08791d..7477c25 100644 > --- a/fs/btrfs/extent-tree.c > +++ b/fs/btrfs/extent-tree.c > @@ -4686,16 +4686,18 @@ static inline int calc_reclaim_items_nr(struct > btrfs_root *root, u64 to_reclaim) > } > > #define EXTENT_SIZE_PER_ITEM SZ_256K > +#define BTRFS_DEFAULT_FLUSH_PRIORITY 3 > > /* > * shrink metadata reservation for delalloc > */ > -static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 > orig, > - bool wait_ordered) > +static void shrink_delalloc(struct btrfs_root *root, u64 orig, > + bool wait_ordered, int reclaim_priority) > { > struct btrfs_block_rsv *block_rsv; > struct btrfs_space_info *space_info; > struct btrfs_trans_handle *trans; > + u64 to_reclaim; > u64 delalloc_bytes; > u64 max_reclaim; > long time_left; > @@ -4703,22 +4705,36 @@ static void shrink_delalloc(struct btrfs_root *root, > u64 to_reclaim, u64 orig, > int loops; > int items; > enum btrfs_reserve_flush_enum flush; > + int items_to_wait; > + > + delalloc_bytes = percpu_counter_sum_positive( > + &root->fs_info->delalloc_bytes); > + if (reclaim_priority < 0) > + reclaim_priority = 0; > + > + if (reclaim_priority) > + to_reclaim = orig * (2 << (BTRFS_DEFAULT_FLUSH_PRIORITY - > + reclaim_priority)); > + else > + to_reclaim = delalloc_bytes; > > /* Calc the number of the pages we need flush for space reservation */ > items = calc_reclaim_items_nr(root, to_reclaim); > to_reclaim = (u64)items * EXTENT_SIZE_PER_ITEM; > + if (reclaim_priority) > + items_to_wait = items; > + else > + items_to_wait = -1; > > trans = (struct btrfs_trans_handle *)current->journal_info; > block_rsv = &root->fs_info->delalloc_block_rsv; > space_info = block_rsv->space_info; > > - delalloc_bytes = percpu_counter_sum_positive( > - &root->fs_info->delalloc_bytes); > if (delalloc_bytes == 0) { > if (trans) > return; > if (wait_ordered) > - btrfs_wait_ordered_roots(root->fs_info, items, > + btrfs_wait_ordered_roots(root->fs_info, items_to_wait, > 0, (u64)-1); > return; > } > @@ -4763,7 +4779,7 @@ static void shrink_delalloc(struct btrfs_root *root, > u64 to_reclaim, u64 orig, > > loops++; > if (wait_ordered && !trans) { > - btrfs_wait_ordered_roots(root->fs_info, items, > + btrfs_wait_ordered_roots(root->fs_info, items_to_wait, > 0, (u64)-1); > } else { > time_left = schedule_timeout_killable(1); > @@ -4836,7 +4852,7 @@ struct reserve_ticket { > > static int flush_space(struct btrfs_root *root, > struct btrfs_space_info *space_info, u64 num_bytes, > - u64 orig_bytes, int state) > + int state, int reclaim_priority) > { > struct btrfs_trans_handle *trans; > int nr; > @@ -4860,8 +4876,8 @@ static int flush_space(struct btrfs_root *root, > break; > case FLUSH_DELALLOC: > case FLUSH_DELALLOC_WAIT: > - shrink_delalloc(root, num_bytes * 2, orig_bytes, > - state == FLUSH_DELALLOC_WAIT); > + shrink_delalloc(root, num_bytes, state == FLUSH_DELALLOC_WAIT, > + reclaim_priority); > break; > case ALLOC_CHUNK: > trans = btrfs_join_transaction(root); > @@ -4877,7 +4893,7 @@ static int flush_space(struct btrfs_root *root, > ret = 0; > break; > case COMMIT_TRANS: > - ret = may_commit_transaction(root, space_info, orig_bytes, 0); > + ret = may_commit_transaction(root, space_info, num_bytes, 0); > break; > default: > ret = -ENOSPC; > @@ -4885,7 +4901,7 @@ static int flush_space(struct btrfs_root *root, > } > > trace_btrfs_flush_space(root->fs_info, space_info->flags, num_bytes, > - orig_bytes, state, ret); > + state, ret); > return ret; > } > > @@ -5008,8 +5024,8 @@ static void btrfs_async_reclaim_metadata_space(struct > work_struct *work) > struct btrfs_space_info *space_info; > u64 to_reclaim; > int flush_state; > - int commit_cycles = 0; > u64 last_tickets_id; > + int reclaim_priority = BTRFS_DEFAULT_FLUSH_PRIORITY; > > fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); > space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); > @@ -5030,8 +5046,11 @@ static void btrfs_async_reclaim_metadata_space(struct > work_struct *work) > struct reserve_ticket *ticket; > int ret; > > + if (flush_state > COMMIT_TRANS) > + flush_state = FLUSH_DELAYED_ITEMS_NR; > ret = flush_space(fs_info->fs_root, space_info, to_reclaim, > - to_reclaim, flush_state); > + flush_state, reclaim_priority); > + > spin_lock(&space_info->lock); > if (!ret) > try_to_wake_tickets(fs_info->fs_root, space_info); > @@ -5049,21 +5068,15 @@ static void btrfs_async_reclaim_metadata_space(struct > work_struct *work) > } else { > last_tickets_id = space_info->tickets_id; > flush_state = FLUSH_DELAYED_ITEMS_NR; > - if (commit_cycles) > - commit_cycles--; > + reclaim_priority = BTRFS_DEFAULT_FLUSH_PRIORITY; > } > > - if (flush_state > COMMIT_TRANS) { > - commit_cycles++; > - if (commit_cycles > 2) { > - wake_all_tickets(&space_info->tickets); > - space_info->flush = 0; > - } else { > - flush_state = FLUSH_DELAYED_ITEMS_NR; > - } > + if (flush_state > COMMIT_TRANS && reclaim_priority == 0) { > + wake_all_tickets(&space_info->tickets); > + space_info->flush = 0; > } > spin_unlock(&space_info->lock); > - } while (flush_state <= COMMIT_TRANS); > + } while ((flush_state <= COMMIT_TRANS) || (--reclaim_priority >= 0)); > } > > void btrfs_init_async_reclaim_work(struct work_struct *work) > @@ -5089,7 +5102,7 @@ static void priority_reclaim_metadata_space(struct > btrfs_fs_info *fs_info, > > do { > flush_space(fs_info->fs_root, space_info, to_reclaim, > - to_reclaim, flush_state); > + flush_state, 1); > flush_state++; > spin_lock(&space_info->lock); > if (ticket->bytes == 0) { > diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h > index e030d6f..7d953a6 100644 > --- a/include/trace/events/btrfs.h > +++ b/include/trace/events/btrfs.h > @@ -857,15 +857,14 @@ TRACE_EVENT(btrfs_trigger_flush, > TRACE_EVENT(btrfs_flush_space, > > TP_PROTO(struct btrfs_fs_info *fs_info, u64 flags, u64 num_bytes, > - u64 orig_bytes, int state, int ret), > + int state, int ret), > > - TP_ARGS(fs_info, flags, num_bytes, orig_bytes, state, ret), > + TP_ARGS(fs_info, flags, num_bytes, state, ret), > > TP_STRUCT__entry( > __array( u8, fsid, BTRFS_UUID_SIZE ) > __field( u64, flags ) > __field( u64, num_bytes ) > - __field( u64, orig_bytes ) > __field( int, state ) > __field( int, ret ) > ), > @@ -874,19 +873,17 @@ TRACE_EVENT(btrfs_flush_space, > memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE); > __entry->flags = flags; > __entry->num_bytes = num_bytes; > - __entry->orig_bytes = orig_bytes; > __entry->state = state; > __entry->ret = ret; > ), > > TP_printk("%pU: state = %d(%s), flags = %llu(%s), num_bytes = %llu, " > - "orig_bytes = %llu, ret = %d", __entry->fsid, __entry->state, > + "ret = %d", __entry->fsid, __entry->state, > show_flush_state(__entry->state), > (unsigned long long)__entry->flags, > __print_flags((unsigned long)__entry->flags, "|", > BTRFS_GROUP_FLAGS), > - (unsigned long long)__entry->num_bytes, > - (unsigned long long)__entry->orig_bytes, __entry->ret) > + (unsigned long long)__entry->num_bytes, __entry->ret) > ); > > DECLARE_EVENT_CLASS(btrfs__reserved_extent, > -- > 2.7.4 > > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html