Mateusz Guzik <[email protected]> writes:
> Strictly speaking more error prone than the seq approach, but should be
> faster on weaker-ordered archs thanks to avoided fences.
>
> I'm definitely not going to protest the seqc route.

Linus probably wouldn't be thrilled if I break epoll again, so let's
stay with the simpler seqcount route.

Nam

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a3090b446af1..22c3f0186476 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -38,6 +38,7 @@
 #include <linux/compat.h>
 #include <linux/rculist.h>
 #include <linux/capability.h>
+#include <linux/seqlock.h>
 #include <net/busy_poll.h>
 
 /*
@@ -190,6 +191,9 @@ struct eventpoll {
        /* Lock which protects rdllist and ovflist */
        spinlock_t lock;
 
+       /* Protect switching between rdllist and ovflist */
+       seqcount_spinlock_t seq;
+
        /* RB tree root used to store monitored fd structs */
        struct rb_root_cached rbr;
 
@@ -382,8 +386,17 @@ static inline struct epitem 
*ep_item_from_wait(wait_queue_entry_t *p)
  */
 static inline int ep_events_available(struct eventpoll *ep)
 {
-       return !list_empty_careful(&ep->rdllist) ||
-               READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR;
+       bool events_available;
+       unsigned int seq;
+
+       do {
+               seq = read_seqcount_begin(&ep->seq);
+
+               events_available = !list_empty_careful(&ep->rdllist) ||
+                                  READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR;
+       } while (read_seqcount_retry(&ep->seq, seq));
+
+       return events_available;
 }
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
@@ -735,8 +748,12 @@ static void ep_start_scan(struct eventpoll *ep, struct 
list_head *txlist)
         */
        lockdep_assert_irqs_enabled();
        spin_lock_irq(&ep->lock);
+       write_seqcount_begin(&ep->seq);
+
        list_splice_init(&ep->rdllist, txlist);
        WRITE_ONCE(ep->ovflist, NULL);
+
+       write_seqcount_end(&ep->seq);
        spin_unlock_irq(&ep->lock);
 }
 
@@ -768,6 +785,9 @@ static void ep_done_scan(struct eventpoll *ep,
                        ep_pm_stay_awake(epi);
                }
        }
+
+       write_seqcount_begin(&ep->seq);
+
        /*
         * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
         * releasing the lock, events will be queued in the normal way inside
@@ -779,6 +799,9 @@ static void ep_done_scan(struct eventpoll *ep,
         * Quickly re-inject items left on "txlist".
         */
        list_splice(txlist, &ep->rdllist);
+
+       write_seqcount_end(&ep->seq);
+
        __pm_relax(ep->ws);
 
        if (!list_empty(&ep->rdllist)) {
@@ -1155,6 +1178,7 @@ static int ep_alloc(struct eventpoll **pep)
 
        mutex_init(&ep->mtx);
        spin_lock_init(&ep->lock);
+       seqcount_spinlock_init(&ep->seq, &ep->lock);
        init_waitqueue_head(&ep->wq);
        init_waitqueue_head(&ep->poll_wait);
        INIT_LIST_HEAD(&ep->rdllist);

Reply via email to