On Oct 25, 2016, at 4:44 PM, Omar Sandoval <osan...@osandov.com> wrote:
> 
> On Tue, Oct 25, 2016 at 02:41:44PM -0400, Josef Bacik wrote:
>> With anything that populates the inode/dentry cache with a lot of one time 
>> use
>> inodes we can really put a lot of pressure on the system for things we don't
>> need to keep in cache.  It takes two runs through the LRU to evict these one 
>> use
>> entries, and if you have a lot of memory you can end up with 10's of 
>> millions of
>> entries in the dcache or icache that have never actually been touched since 
>> they
>> were first instantiated, and it will take a lot of CPU and a lot of pressure 
>> to
>> evict all of them.
>> 
>> So instead do what we do with pagecache, only set the *REFERENCED flags if we
>> are being used after we've been put onto the LRU.  This makes a significant
>> difference in the system's ability to evict these useless cache entries.  
>> With a
>> fs_mark workload that creates 40 million files we get the following results 
>> (all
>> in files/sec)
>> 
>> Btrfs                        Patched         Unpatched
>> Average Files/sec:   72209.3         63254.2
>> p50 Files/sec:               70850           57560
>> p90 Files/sec:               68757           53085
>> p99 Files/sec:               68757           53085
>> 
>> XFS                  Patched         Unpatched
>> Average Files/sec:   61025.5         60719.5
>> p50 Files/sec:               60107           59465
>> p90 Files/sec:               59300           57966
>> p99 Files/sec:               59227           57528
>> 
>> Ext4                 Patched         Unpatched
>> Average Files/sec:   121785.4        119248.0
>> p50 Files/sec:               120852          119274
>> p90 Files/sec:               116703          112783
>> p99 Files/sec:               114393          104934
>> 
>> The reason Btrfs has a much larger improvement is because it holds a lot more
>> things in memory so benefits more from faster slab reclaim, but across the 
>> board
>> is an improvement for each of the file systems.
>> 
>> Signed-off-by: Josef Bacik <jba...@fb.com>
>> ---
>> fs/dcache.c | 15 ++++++++++-----
>> fs/inode.c  |  5 ++++-
>> 2 files changed, 14 insertions(+), 6 deletions(-)
>> 
>> diff --git a/fs/dcache.c b/fs/dcache.c
>> index 5c7cc95..a558075 100644
>> --- a/fs/dcache.c
>> +++ b/fs/dcache.c
>> @@ -779,8 +779,6 @@ repeat:
>>                      goto kill_it;
>>      }
>> 
>> -    if (!(dentry->d_flags & DCACHE_REFERENCED))
>> -            dentry->d_flags |= DCACHE_REFERENCED;
>>      dentry_lru_add(dentry);
>> 
>>      dentry->d_lockref.count--;
>> @@ -803,6 +801,13 @@ static inline void __dget_dlock(struct dentry *dentry)
>>      dentry->d_lockref.count++;
>> }
>> 
>> +static inline void __dget_dlock_reference(struct dentry *dentry)
>> +{
>> +    if (dentry->d_flags & DCACHE_LRU_LIST)
>> +            dentry->d_flags |= DCACHE_REFERENCED;
>> +    dentry->d_lockref.count++;
>> +}
>> +
>> static inline void __dget(struct dentry *dentry)
>> {
>>      lockref_get(&dentry->d_lockref);
>> @@ -875,7 +880,7 @@ again:
>>                          (alias->d_flags & DCACHE_DISCONNECTED)) {
>>                              discon_alias = alias;
>>                      } else {
>> -                            __dget_dlock(alias);
>> +                            __dget_dlock_reference(alias);
>>                              spin_unlock(&alias->d_lock);
>>                              return alias;
>>                      }
>> @@ -886,7 +891,7 @@ again:
>>              alias = discon_alias;
>>              spin_lock(&alias->d_lock);
>>              if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
>> -                    __dget_dlock(alias);
>> +                    __dget_dlock_reference(alias);
>>                      spin_unlock(&alias->d_lock);
>>                      return alias;
>>              }
>> @@ -2250,7 +2255,7 @@ struct dentry *__d_lookup(const struct dentry *parent, 
>> const struct qstr *name)
>>              if (!d_same_name(dentry, parent, name))
>>                      goto next;
>> 
>> -            dentry->d_lockref.count++;
>> +            __dget_dlock_reference(dentry);
> 
> This misses dentries that we get through __d_lookup_rcu(), so I think
> your change made it so most things aren't getting DCACHE_REFERENCED set
> at all. Maybe something like this instead?
> 
> diff --git a/fs/dcache.c b/fs/dcache.c
> index 5c7cc95..d7a56a8 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -412,15 +412,6 @@ static void d_lru_shrink_move(struct list_lru_one *lru, 
> struct dentry *dentry,
>       list_lru_isolate_move(lru, &dentry->d_lru, list);
> }
> 
> -/*
> - * dentry_lru_(add|del)_list) must be called with d_lock held.
> - */
> -static void dentry_lru_add(struct dentry *dentry)
> -{
> -     if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
> -             d_lru_add(dentry);
> -}
> -
> /**
>  * d_drop - drop a dentry
>  * @dentry: dentry to drop
> @@ -779,9 +770,12 @@ void dput(struct dentry *dentry)
>                       goto kill_it;
>       }
> 
> -     if (!(dentry->d_flags & DCACHE_REFERENCED))
> -             dentry->d_flags |= DCACHE_REFERENCED;
> -     dentry_lru_add(dentry);
> +     if (likely(dentry->d_flags & DCACHE_LRU_LIST)) {
> +             if (!(dentry->d_flags & DCACHE_REFERENCED))
> +                     dentry->d_flags |= DCACHE_REFERENCED;

There is no benefit to checking if DCACHE_REFERENCED is unset before
trying to set it.  You've already accessed the cacheline, so you can
avoid the branch here and just set it unconditionally.

Cheers, Andreas

> +     } else {
> +             d_lru_add(dentry);
> +     }
> 
>       dentry->d_lockref.count--;
>       spin_unlock(&dentry->d_lock);
> diff --git a/fs/inode.c b/fs/inode.c
> index 88110fd..16faca3 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -798,6 +798,8 @@ static struct inode *find_inode(struct super_block *sb,
>                       __wait_on_freeing_inode(inode);
>                       goto repeat;
>               }
> +             if (!list_empty(&inode->i_lru))
> +                     inode->i_state |= I_REFERENCED;
>               __iget(inode);
>               spin_unlock(&inode->i_lock);
>               return inode;
> @@ -825,6 +827,8 @@ static struct inode *find_inode_fast(struct super_block 
> *sb,
>                       __wait_on_freeing_inode(inode);
>                       goto repeat;
>               }
> +             if (!list_empty(&inode->i_lru))
> +                     inode->i_state |= I_REFERENCED;
>               __iget(inode);
>               spin_unlock(&inode->i_lock);
>               return inode;
> @@ -1492,7 +1496,6 @@ static void iput_final(struct inode *inode)
>               drop = generic_drop_inode(inode);
> 
>       if (!drop && (sb->s_flags & MS_ACTIVE)) {
> -             inode->i_state |= I_REFERENCED;
>               inode_add_lru(inode);
>               spin_unlock(&inode->i_lock);
>               return;
> 
> --
> Omar
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Cheers, Andreas





Attachment: signature.asc
Description: Message signed with OpenPGP using GPGMail

Reply via email to