irqfd interrupt injection had divergent seqcount snapshot scaffolding in
three places, and inconsistent validity checks between the fast and slow
assert paths:

1. mshv_irqfd_wakeup() snapshotted irqfd_lapic_irq under seqcount, then on
fast-path failure called mshv_assert_irq_slow(), which re-snapshotted both
irqfd_girq_ent and irqfd_lapic_irq under seqcount again — wasteful and
duplicative.

2. The girq_entry_valid check only existed in the slow path.  The fast path
would happily accept a zero-initialised mshv_lapic_irq when routing was not
yet configured for the GSI, potentially injecting vector 0 to VP 0.

3. The slow path's validity predicate was 'guest_irq_num &&
!girq_entry_valid', which short-circuits for GSI 0 (guest_irq_num == 0) and
so bypasses the validity check entirely for that GSI.

4. mshv_irqfd_resampler_ack() read irqfd_lapic_irq.lapic_control without
seqcount protection, which could observe a stale or transient value while
mshv_irqfd_update() was concurrently rewriting irqfd_lapic_irq via
mshv_copy_girq_info()'s memset-and-fill sequence.

Introduce mshv_irqfd_snapshot() that takes a consistent snapshot of both
irqfd_girq_ent and irqfd_lapic_irq inside the seqcount loop; girq_ent is
optional so the resampler ack path can snapshot only the LAPIC IRQ.

Use the helper from mshv_irqfd_resampler_ack() (closes 4),
mshv_irqfd_wakeup() and mshv_irqfd_assign()'s EPOLLIN replay path,
replacing the three ad-hoc seqcount loops.

Move the validity check into mshv_irqfd_wakeup() before either injection
path runs, so the fast path no longer accepts an unrouted irqfd (closes
2).  Use !girq_entry_valid as the condition (closes 3).  Change
mshv_assert_irq_slow() to take a pre-snapshotted const struct
mshv_lapic_irq pointer, eliminating its internal seqcount and SRCU
scaffolding (closes 1).

Fixes: 621191d709b14 ("Drivers: hv: Introduce mshv_root module to expose 
/dev/mshv to VMMs")
Signed-off-by: Stanislav Kinsburskii <[email protected]>
---
 drivers/hv/mshv_eventfd.c |   90 ++++++++++++++++++++++++++-------------------
 1 file changed, 52 insertions(+), 38 deletions(-)

diff --git a/drivers/hv/mshv_eventfd.c b/drivers/hv/mshv_eventfd.c
index 25bdc5e678849..c24069dff9702 100644
--- a/drivers/hv/mshv_eventfd.c
+++ b/drivers/hv/mshv_eventfd.c
@@ -74,6 +74,27 @@ static inline bool hv_should_clear_interrupt(enum 
hv_interrupt_type type)
 }
 #endif
 
+/*
+ * Snapshot per-irqfd routing state under seqcount protection so callers
+ * see a consistent point-in-time view of irqfd_girq_ent and
+ * irqfd_lapic_irq even if mshv_irqfd_update() runs concurrently.
+ *
+ * @girq_ent may be NULL when the caller only needs the LAPIC IRQ.
+ */
+static void mshv_irqfd_snapshot(struct mshv_irqfd *irqfd,
+                               struct mshv_guest_irq_ent *girq_ent,
+                               struct mshv_lapic_irq *irq)
+{
+       unsigned int seq;
+
+       do {
+               seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
+               if (girq_ent)
+                       *girq_ent = irqfd->irqfd_girq_ent;
+               *irq = irqfd->irqfd_lapic_irq;
+       } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
+}
+
 static void mshv_irqfd_resampler_ack(struct mshv_irq_ack_notifier *mian)
 {
        struct mshv_irqfd_resampler *resampler;
@@ -90,7 +111,11 @@ static void mshv_irqfd_resampler_ack(struct 
mshv_irq_ack_notifier *mian)
        hlist_for_each_entry_srcu(irqfd, &resampler->rsmplr_irqfd_list,
                                 irqfd_resampler_hnode,
                                 srcu_read_lock_held(&partition->pt_irq_srcu)) {
-               if 
(hv_should_clear_interrupt(irqfd->irqfd_lapic_irq.lapic_control.interrupt_type))
+               struct mshv_lapic_irq irq;
+
+               mshv_irqfd_snapshot(irqfd, NULL, &irq);
+
+               if (hv_should_clear_interrupt(irq.lapic_control.interrupt_type))
                        hv_call_clear_virtual_interrupt(partition->pt_id);
 
                eventfd_signal(irqfd->irqfd_resamplefd);
@@ -198,37 +223,14 @@ static int mshv_try_assert_irq_fast(struct mshv_irqfd 
*irqfd,
 }
 #endif
 
-static void mshv_assert_irq_slow(struct mshv_irqfd *irqfd)
+static void mshv_assert_irq_slow(struct mshv_irqfd *irqfd,
+                                const struct mshv_lapic_irq *irq)
 {
        struct mshv_partition *partition = irqfd->irqfd_partn;
-       struct mshv_guest_irq_ent girq_ent;
-       struct mshv_lapic_irq irq;
-       unsigned int seq;
-       int idx;
-
-       idx = srcu_read_lock(&partition->pt_irq_srcu);
-
-       do {
-               seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
-               girq_ent = irqfd->irqfd_girq_ent;
-               irq = irqfd->irqfd_lapic_irq;
-       } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
-
-#if IS_ENABLED(CONFIG_X86)
-       WARN_ON(irqfd->irqfd_resampler &&
-               !irq.lapic_control.level_triggered);
-#endif
-
-       if (girq_ent.guest_irq_num && !girq_ent.girq_entry_valid) {
-               srcu_read_unlock(&partition->pt_irq_srcu, idx);
-               return;
-       }
 
        hv_call_assert_virtual_interrupt(partition->pt_id,
-                                        irq.lapic_vector, irq.lapic_apic_id,
-                                        irq.lapic_control);
-
-       srcu_read_unlock(&partition->pt_irq_srcu, idx);
+                                        irq->lapic_vector, irq->lapic_apic_id,
+                                        irq->lapic_control);
 }
 
 static void mshv_irqfd_resampler_shutdown(struct mshv_irqfd *irqfd)
@@ -308,26 +310,31 @@ static int mshv_irqfd_wakeup(wait_queue_entry_t *wait, 
unsigned int mode,
                                                irqfd_wait);
        __poll_t flags = key_to_poll(key);
        int idx;
-       unsigned int seq;
        struct mshv_partition *pt = irqfd->irqfd_partn;
        int ret = 0;
 
        if (flags & EPOLLIN) {
+               struct mshv_guest_irq_ent girq_ent;
                struct mshv_lapic_irq irq;
                u64 cnt;
 
                eventfd_ctx_do_read(irqfd->irqfd_eventfd_ctx, &cnt);
                idx = srcu_read_lock(&pt->pt_irq_srcu);
-               do {
-                       seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
-                       irq = irqfd->irqfd_lapic_irq;
-               } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
+               mshv_irqfd_snapshot(irqfd, &girq_ent, &irq);
+
+               if (!girq_ent.girq_entry_valid)
+                       goto out_unlock;
+
+#if IS_ENABLED(CONFIG_X86)
+               WARN_ON(irqfd->irqfd_resampler &&
+                       !irq.lapic_control.level_triggered);
+#endif
 
                /* An event has been signaled, raise an interrupt */
-               ret = mshv_try_assert_irq_fast(irqfd, &irq);
-               if (ret)
-                       mshv_assert_irq_slow(irqfd);
+               if (mshv_try_assert_irq_fast(irqfd, &irq))
+                       mshv_assert_irq_slow(irqfd, &irq);
 
+out_unlock:
                srcu_read_unlock(&pt->pt_irq_srcu, idx);
 
                ret = 1;
@@ -517,8 +524,15 @@ static int mshv_irqfd_assign(struct mshv_partition *pt,
         */
        events = vfs_poll(fd_file(f), &irqfd->irqfd_polltbl);
 
-       if (events & EPOLLIN)
-               mshv_assert_irq_slow(irqfd);
+       if (events & EPOLLIN) {
+               struct mshv_guest_irq_ent girq_ent;
+               struct mshv_lapic_irq irq;
+
+               mshv_irqfd_snapshot(irqfd, &girq_ent, &irq);
+
+               if (girq_ent.girq_entry_valid)
+                       mshv_assert_irq_slow(irqfd, &irq);
+       }
 
        srcu_read_unlock(&pt->pt_irq_srcu, idx);
        return 0;



Reply via email to