Re: [RFC PATCH 27/27] epoll: take epitem list out of struct file

2020-10-05 Thread Al Viro
On Mon, Oct 05, 2020 at 04:37:18PM -0400, Qian Cai wrote:
> On Sun, 2020-10-04 at 03:39 +0100, Al Viro wrote:
> >  /*
> >   * Must be called with "mtx" held.
> >   */
> > @@ -1367,19 +1454,21 @@ static int ep_insert(struct eventpoll *ep, const
> > struct epoll_event *event,
> > epi->event = *event;
> > epi->next = EP_UNACTIVE_PTR;
> >  
> > -   atomic_long_inc(&ep->user->epoll_watches);
> > -
> > if (tep)
> > mutex_lock(&tep->mtx);
> > /* Add the current item to the list of active epoll hook for this file
> > */
> > -   spin_lock(&tfile->f_lock);
> > -   hlist_add_head_rcu(&epi->fllink, &tfile->f_ep_links);
> > -   spin_unlock(&tfile->f_lock);
> > -   if (full_check && !tep) {
> > -   get_file(tfile);
> > -   list_add(&tfile->f_tfile_llink, &tfile_check_list);
> > +   if (unlikely(attach_epitem(tfile, epi) < 0)) {
> > +   kmem_cache_free(epi_cache, epi);
> > +   if (tep)
> > +   mutex_lock(&tep->mtx);
> 
> Shouldn't this be mutex_unlock() instead?

It should.  Fixed and force-pushed...

> > +   return -ENOMEM;
> > }
> >  
> 


Re: [RFC PATCH 27/27] epoll: take epitem list out of struct file

2020-10-05 Thread Qian Cai
On Sun, 2020-10-04 at 03:39 +0100, Al Viro wrote:
>  /*
>   * Must be called with "mtx" held.
>   */
> @@ -1367,19 +1454,21 @@ static int ep_insert(struct eventpoll *ep, const
> struct epoll_event *event,
>   epi->event = *event;
>   epi->next = EP_UNACTIVE_PTR;
>  
> - atomic_long_inc(&ep->user->epoll_watches);
> -
>   if (tep)
>   mutex_lock(&tep->mtx);
>   /* Add the current item to the list of active epoll hook for this file
> */
> - spin_lock(&tfile->f_lock);
> - hlist_add_head_rcu(&epi->fllink, &tfile->f_ep_links);
> - spin_unlock(&tfile->f_lock);
> - if (full_check && !tep) {
> - get_file(tfile);
> - list_add(&tfile->f_tfile_llink, &tfile_check_list);
> + if (unlikely(attach_epitem(tfile, epi) < 0)) {
> + kmem_cache_free(epi_cache, epi);
> + if (tep)
> + mutex_lock(&tep->mtx);

Shouldn't this be mutex_unlock() instead?

> + return -ENOMEM;
>   }
>  



[RFC PATCH 27/27] epoll: take epitem list out of struct file

2020-10-03 Thread Al Viro
From: Al Viro 

Move the head of epitem list out of struct file; for epoll ones it's
moved into struct eventpoll (->refs there), for non-epoll - into
the new object (struct epitem_head).  In place of ->f_ep_links we
leave a pointer to the list head (->f_ep).

->f_ep is protected by ->f_lock and it's zeroed as soon as the list
of epitems becomes empty (that can happen only in ep_remove() by
now).

The list of files for reverse path check is *not* going through
struct file now - it's a single-linked list going through epitem_head
instances.  It's terminated by ERR_PTR(-1) (== EP_UNACTIVE_POINTER),
so the elements of list can be distinguished by head->next != NULL.

epitem_head instances are allocated at ep_insert() time (by
attach_epitem()) and freed either by ep_remove() (if it empties
the set of epitems *and* epitem_head does not belong to the
reverse path check list) or by clear_tfile_check_list() when
the list is emptied (if the set of epitems is empty by that
point).  Allocations are done from a separate slab - minimal kmalloc()
size is too large on some architectures.

As the result, we trim struct file _and_ get rid of the games with
temporary file references.

Locking and barriers are interesting (aren't they always); see unlist_file()
and ep_remove() for details.  The non-obvious part is that ep_remove() needs
to decide if it will be the one to free the damn thing *before* actually
storing NULL to head->epitems.first - that's what smp_load_acquire is for
in there.  unlist_file() lockless path is safe, since we hit it only if
we observe NULL in head->epitems.first and whoever had done that store is
guaranteed to have observed non-NULL in head->next.  IOW, their last access
had been the store of NULL into ->epitems.first and we can safely free
the sucker.  OTOH, we are under rcu_read_lock() and both epitem and
epitem->file have their freeing RCU-delayed.  So if we see non-NULL
->epitems.first, we can grab ->f_lock (all epitems in there share the
same struct file) and safely recheck the emptiness of ->epitems; again,
->next is still non-NULL, so ep_remove() couldn't have freed head yet.
->f_lock serializes us wrt ep_remove(); the rest is trivial.

Note that once head->epitems becomes NULL, nothing can get inserted into
it - the only remaining reference to head after that point is from the
reverse path check list.

Signed-off-by: Al Viro 
---
 fs/eventpoll.c| 168 ++
 fs/file_table.c   |   1 -
 include/linux/eventpoll.h |  11 +--
 include/linux/fs.h|   5 +-
 4 files changed, 129 insertions(+), 56 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index eea269670168..d3fdabf6fd34 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -215,6 +215,7 @@ struct eventpoll {
 
/* used to optimize loop detection check */
u64 gen;
+   struct hlist_head refs;
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
/* used to track busy poll napi_id */
@@ -259,7 +260,45 @@ static struct kmem_cache *pwq_cache __read_mostly;
  * List of files with newly added links, where we may need to limit the number
  * of emanating paths. Protected by the epmutex.
  */
-static LIST_HEAD(tfile_check_list);
+struct epitems_head {
+   struct hlist_head epitems;
+   struct epitems_head *next;
+};
+static struct epitems_head *tfile_check_list = EP_UNACTIVE_PTR;
+
+static struct kmem_cache *ephead_cache __read_mostly;
+
+static inline void free_ephead(struct epitems_head *head)
+{
+   if (head)
+   kmem_cache_free(ephead_cache, head);
+}
+
+static void list_file(struct file *file)
+{
+   struct epitems_head *head;
+
+   head = container_of(file->f_ep, struct epitems_head, epitems);
+   if (!head->next) {
+   head->next = tfile_check_list;
+   tfile_check_list = head;
+   }
+}
+
+static void unlist_file(struct epitems_head *head)
+{
+   struct epitems_head *to_free = head;
+   struct hlist_node *p = rcu_dereference(hlist_first_rcu(&head->epitems));
+   if (p) {
+   struct epitem *epi= container_of(p, struct epitem, fllink);
+   spin_lock(&epi->ffd.file->f_lock);
+   if (!hlist_empty(&head->epitems))
+   to_free = NULL;
+   head->next = NULL;
+   spin_unlock(&epi->ffd.file->f_lock);
+   }
+   free_ephead(to_free);
+}
 
 #ifdef CONFIG_SYSCTL
 
@@ -632,6 +671,8 @@ static void epi_rcu_free(struct rcu_head *head)
 static int ep_remove(struct eventpoll *ep, struct epitem *epi)
 {
struct file *file = epi->ffd.file;
+   struct epitems_head *to_free;
+   struct hlist_head *head;
 
lockdep_assert_irqs_enabled();
 
@@ -642,8 +683,20 @@ static int ep_remove(struct eventpoll *ep, struct epitem 
*epi)
 
/* Remove the current item from the list of epoll hooks */
spin_lock(&file->f_lock);
+   to_free = NULL;
+   head = file->f_ep;
+   if (head->first == &epi