On Wed, Apr 21, 2010 at 11:10:46AM +0800, Shaohua Li wrote:
> On Wed, Apr 21, 2010 at 10:11:01AM +0800, Josef Bacik wrote:
> > On Wed, Apr 21, 2010 at 09:48:17AM +0800, Shaohua Li wrote:
> > > On Tue, Apr 20, 2010 at 10:39:01PM +0800, Josef Bacik wrote:
> > > > On Tue, Apr 20, 2010 at 05:21:58PM +0800, Shaohua Li wrote:
> > > > > searching extent_io_tree is frequently used and tooks a lot of cpu 
> > > > > time.
> > > > > We could cache last found extent_state to skip some full search. In my
> > > > > test, the hit rate is from 30% to 70% depending on different workload,
> > > > > which can speed up the search.
> > > > > 
> > > > > Signed-off-by: Shaohua Li <[email protected]>
> > > > > 
> > > > > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> > > > > index d2d0368..645f00c 100644
> > > > > --- a/fs/btrfs/extent_io.c
> > > > > +++ b/fs/btrfs/extent_io.c
> > > > > @@ -110,6 +110,7 @@ void extent_io_tree_init(struct extent_io_tree 
> > > > > *tree,
> > > > >       spin_lock_init(&tree->lock);
> > > > >       spin_lock_init(&tree->buffer_lock);
> > > > >       tree->mapping = mapping;
> > > > > +     tree->cached_state = NULL;
> > > > >  }
> > > > >  
> > > > >  static struct extent_state *alloc_extent_state(gfp_t mask)
> > > > > @@ -135,6 +136,22 @@ static struct extent_state 
> > > > > *alloc_extent_state(gfp_t mask)
> > > > >       return state;
> > > > >  }
> > > > >  
> > > > > +static void remove_cached_extent(struct extent_io_tree *tree,
> > > > > +     struct extent_state *state)
> > > > > +{
> > > > > +     if (!tree->cached_state)
> > > > > +             return;
> > > > > +     if (tree->cached_state == state)
> > > > > +             tree->cached_state = NULL;
> > > > > +}
> > > > > +
> > > > > +static void merge_cached_extent(struct extent_io_tree *tree,
> > > > > +     struct extent_state *first, struct extent_state *last)
> > > > > +{
> > > > > +     if (tree->cached_state == first || tree->cached_state == last)
> > > > > +             tree->cached_state = first;
> > > > > +}
> > > > > +
> > > > >  static void free_extent_state(struct extent_state *state)
> > > > >  {
> > > > >       if (!state)
> > > > > @@ -188,6 +205,12 @@ static struct rb_node *__etree_search(struct 
> > > > > extent_io_tree *tree, u64 offset,
> > > > >       struct rb_node *orig_prev = NULL;
> > > > >       struct tree_entry *entry;
> > > > >       struct tree_entry *prev_entry = NULL;
> > > > > +     struct tree_entry *cached_entry =
> > > > > +                             (struct tree_entry *)tree->cached_state;
> > > > > +
> > > > > +     if (likely(cached_entry && offset >= cached_entry->start &&
> > > > > +             offset <= cached_entry->end))
> > > > > +             return &cached_entry->rb_node;
> > > > >  
> > > > >       while (n) {
> > > > >               entry = rb_entry(n, struct tree_entry, rb_node);
> > > > > @@ -198,8 +221,10 @@ static struct rb_node *__etree_search(struct 
> > > > > extent_io_tree *tree, u64 offset,
> > > > >                       n = n->rb_left;
> > > > >               else if (offset > entry->end)
> > > > >                       n = n->rb_right;
> > > > > -             else
> > > > > +             else {
> > > > > +                     tree->cached_state = (struct extent_state 
> > > > > *)entry;
> > > > >                       return n;
> > > > > +             }
> > > > >       }
> > > > >  
> > > > >       if (prev_ret) {
> > > > > @@ -313,6 +338,7 @@ static int merge_state(struct extent_io_tree 
> > > > > *tree,
> > > > >                       merge_cb(tree, state, other);
> > > > >                       state->start = other->start;
> > > > >                       other->tree = NULL;
> > > > > +                     merge_cached_extent(tree, state, other);
> > > > >                       rb_erase(&other->rb_node, &tree->state);
> > > > >                       free_extent_state(other);
> > > > >               }
> > > > > @@ -325,6 +351,7 @@ static int merge_state(struct extent_io_tree 
> > > > > *tree,
> > > > >                       merge_cb(tree, state, other);
> > > > >                       other->start = state->start;
> > > > >                       state->tree = NULL;
> > > > > +                     merge_cached_extent(tree, other, state);
> > > > >                       rb_erase(&state->rb_node, &tree->state);
> > > > >                       free_extent_state(state);
> > > > >                       state = NULL;
> > > > > @@ -473,6 +500,7 @@ static int clear_state_bit(struct extent_io_tree 
> > > > > *tree,
> > > > >               wake_up(&state->wq);
> > > > >       if (delete || state->state == 0) {
> > > > >               if (state->tree) {
> > > > > +                     remove_cached_extent(tree, state);
> > > > >                       clear_state_cb(tree, state, state->state);
> > > > >                       rb_erase(&state->rb_node, &tree->state);
> > > > >                       state->tree = NULL;
> > > > > diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> > > > > index bbab481..e60b367 100644
> > > > > --- a/fs/btrfs/extent_io.h
> > > > > +++ b/fs/btrfs/extent_io.h
> > > > > @@ -89,6 +89,7 @@ struct extent_io_tree {
> > > > >       spinlock_t lock;
> > > > >       spinlock_t buffer_lock;
> > > > >       struct extent_io_ops *ops;
> > > > > +     struct extent_state *cached_state;
> > > > >  };
> > > > >  
> > > > >  struct extent_state {
> > > > 
> > > > Sorry I saw this earlier but then forgot about it.  So instead of doing 
> > > > a
> > > > per-tree thing, which will end up with misses if somebody else tries to 
> > > > search
> > > > the tree for a different offset, you will want to do something like this
> > > > 
> > > > http://git.kernel.org/?p=linux/kernel/git/mason/btrfs-unstable.git;a=commit;h=2ac55d41b5d6bf49e76bc85db5431240617e2f8f
> > > > 
> > > > So that way _anybody_ who does a search will have a cached state, and 
> > > > so all
> > > > subsequent searches won't be needed, instead of only working for the 
> > > > first guy
> > > > who gets their state cached.  Thanks,
> > > Hmm, the patch you pointed out is already in upstream but I still saw the 
> > > search
> > > takes a lot of CPU.
> > > 
> > 
> > I've probably missed some places where we could be using cached extent 
> > states, I
> > wasn't terribly thorough when I was checking.  It may be good to instrument 
> > the
> > cases where we come into test/clear/set bits and we not end up using the 
> > cached
> > state to see where the trouble spots are.  Thanks,
> My test basically is in the code path of .readpage/.readpages and
> .writepage/.writepages. In my first glance of the code, such places look not 
> easily
> to covert to use your scheme of cached extent states. But I need more check 
> anyway.
> 

Hmm I see your point.  I guess this doesn't mess with anybody that already uses
the cached state stuff, and if it helps out in these more complicated cases then
I guess thats good enough.

Acked-by: Josef Bacik <[email protected]>

Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to