On Thu, Jul 06, 2017 at 08:41:00AM -0500, Eric W. Biederman wrote:
> Andrei Vagin <ava...@gmail.com> writes:
> 
> > I did a few experiments and found that the bug is reproduced for 6-12
> > hours on the our test server. Then I reverted two patches and the server
> > is working normally for more than 24 hours already, so the bug is
> > probably in one of these patches.
> >
> > commit e3d0065ab8535cbeee69a4c46a59f4d7360803ae
> > Author: Andrei Vagin <ava...@openvz.org>
> > Date:   Sun Jul 2 07:41:25 2017 +0200
> >
> >     Revert "proc/sysctl: prune stale dentries during unregistering"
> >     
> >     This reverts commit d6cffbbe9a7e51eb705182965a189457c17ba8a3.
> >
> > commit 2d3c50dac81011c1da4d2f7a63b84bd75287e320
> > Author: Andrei Vagin <ava...@openvz.org>
> > Date:   Sun Jul 2 07:40:08 2017 +0200
> >
> >     Revert "proc/sysctl: Don't grab i_lock under sysctl_lock."
> >     
> >     This reverts commit ace0c791e6c3cf5ef37cad2df69f0d90ccc40ffb.
> >
> >
> > FYI: This bug has been reproduced on 4.11.7
> 
> Instead of the revert could you test the patch below?

Our CI server are working with this patch for more than one day without
any problem.

Tested-by: Andrei Vagin <ava...@openvz.org>

Thanks,
Andrei
> 
> This should fix the issue by grabbing a s_active reference
> to the proc super block for every inode we flush.
> 
> diff --git a/fs/proc/internal.h b/fs/proc/internal.h
> index c5ae09b6c726..18694598bebf 100644
> --- a/fs/proc/internal.h
> +++ b/fs/proc/internal.h
> @@ -67,7 +67,7 @@ struct proc_inode {
>       struct proc_dir_entry *pde;
>       struct ctl_table_header *sysctl;
>       struct ctl_table *sysctl_entry;
> -     struct list_head sysctl_inodes;
> +     struct hlist_node sysctl_inodes;
>       const struct proc_ns_operations *ns_ops;
>       struct inode vfs_inode;
>  };
> diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
> index 67985a7233c2..9bf06e2b1284 100644
> --- a/fs/proc/proc_sysctl.c
> +++ b/fs/proc/proc_sysctl.c
> @@ -191,7 +191,7 @@ static void init_header(struct ctl_table_header *head,
>       head->set = set;
>       head->parent = NULL;
>       head->node = node;
> -     INIT_LIST_HEAD(&head->inodes);
> +     INIT_HLIST_HEAD(&head->inodes);
>       if (node) {
>               struct ctl_table *entry;
>               for (entry = table; entry->procname; entry++, node++)
> @@ -261,25 +261,42 @@ static void unuse_table(struct ctl_table_header *p)
>                       complete(p->unregistering);
>  }
>  
> -/* called under sysctl_lock */
>  static void proc_sys_prune_dcache(struct ctl_table_header *head)
>  {
> -     struct inode *inode, *prev = NULL;
> +     struct inode *inode;
>       struct proc_inode *ei;
> +     struct hlist_node *node;
> +     struct super_block *sb;
>  
>       rcu_read_lock();
> -     list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) {
> -             inode = igrab(&ei->vfs_inode);
> -             if (inode) {
> -                     rcu_read_unlock();
> -                     iput(prev);
> -                     prev = inode;
> -                     d_prune_aliases(inode);
> +     for (;;) {
> +             node = hlist_first_rcu(&head->inodes);
> +             if (!node)
> +                     break;
> +             ei = hlist_entry(node, struct proc_inode, sysctl_inodes);
> +             spin_lock(&sysctl_lock);
> +             hlist_del_init_rcu(&ei->sysctl_inodes);
> +             spin_unlock(&sysctl_lock);
> +
> +             inode = &ei->vfs_inode;
> +             sb = inode->i_sb;
> +             if (!atomic_inc_not_zero(&sb->s_active))
> +                     continue;
> +             inode = igrab(inode);
> +             rcu_read_unlock();
> +             if (unlikely(!inode)) {
> +                     deactivate_super(sb);
>                       rcu_read_lock();
> +                     continue;
>               }
> +
> +             d_prune_aliases(inode);
> +             iput(inode);
> +             deactivate_super(sb);
> +
> +             rcu_read_lock();
>       }
>       rcu_read_unlock();
> -     iput(prev);
>  }
>  
>  /* called under sysctl_lock, will reacquire if has to wait */
> @@ -461,7 +478,7 @@ static struct inode *proc_sys_make_inode(struct 
> super_block *sb,
>       }
>       ei->sysctl = head;
>       ei->sysctl_entry = table;
> -     list_add_rcu(&ei->sysctl_inodes, &head->inodes);
> +     hlist_add_head_rcu(&ei->sysctl_inodes, &head->inodes);
>       head->count++;
>       spin_unlock(&sysctl_lock);
>  
> @@ -489,7 +506,7 @@ static struct inode *proc_sys_make_inode(struct 
> super_block *sb,
>  void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
>  {
>       spin_lock(&sysctl_lock);
> -     list_del_rcu(&PROC_I(inode)->sysctl_inodes);
> +     hlist_del_init_rcu(&PROC_I(inode)->sysctl_inodes);
>       if (!--head->count)
>               kfree_rcu(head, rcu);
>       spin_unlock(&sysctl_lock);
> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
> index 80d07816def0..1c04a26bfd2f 100644
> --- a/include/linux/sysctl.h
> +++ b/include/linux/sysctl.h
> @@ -143,7 +143,7 @@ struct ctl_table_header
>       struct ctl_table_set *set;
>       struct ctl_dir *parent;
>       struct ctl_node *node;
> -     struct list_head inodes; /* head for proc_inode->sysctl_inodes */
> +     struct hlist_head inodes; /* head for proc_inode->sysctl_inodes */
>  };
>  
>  struct ctl_dir {
> 

Reply via email to