The irqfd interrupt injection had duplicated seqcount reads and
inconsistent validity checks between the fast and slow paths:

1. The wakeup handler snapshots irqfd_lapic_irq under seqcount, then on
   fast-path failure calls mshv_assert_irq_slow() which re-reads both
   girq_ent and lapic_irq under seqcount again — wasteful and confusing.

2. The validity check (girq_entry_valid) only existed in the slow path.
   The fast path would blindly accept a zero-initialized structure when
   routing was not configured, potentially injecting vector 0 to VP 0.

3. The condition 'girq_ent.guest_irq_num && !girq_ent.girq_entry_valid'
   short-circuits for GSI 0 (guest_irq_num == 0), bypassing the
   validity check entirely.

4. mshv_irqfd_resampler_ack() reads irqfd_lapic_irq.lapic_control
   without seqcount protection, allowing torn reads when racing with
   mshv_irqfd_update().

Consolidate by:
- Moving the seqcount snapshot and validity check into the wakeup
  handler, performed once before either injection path.
- Changing mshv_assert_irq_slow() to accept a pre-snapshotted
  const struct mshv_lapic_irq pointer, eliminating its internal
  seqcount read and SRCU lock/unlock.
- Using !girq_entry_valid alone as the validity condition, fixing the
  GSI 0 bypass.
- Adding seqcount protection in mshv_irqfd_resampler_ack() to prevent
  torn reads of interrupt_type.

Fixes: 621191d709b14 ("Drivers: hv: Introduce mshv_root module to expose 
/dev/mshv to VMMs")
Signed-off-by: Stanislav Kinsburskii <[email protected]>
---
 drivers/hv/mshv_eventfd.c |   62 +++++++++++++++++++++++++--------------------
 1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/drivers/hv/mshv_eventfd.c b/drivers/hv/mshv_eventfd.c
index 7275b9eaa7541..b92e7f05aa9cd 100644
--- a/drivers/hv/mshv_eventfd.c
+++ b/drivers/hv/mshv_eventfd.c
@@ -90,7 +90,15 @@ static void mshv_irqfd_resampler_ack(struct 
mshv_irq_ack_notifier *mian)
        hlist_for_each_entry_srcu(irqfd, &resampler->rsmplr_irqfd_list,
                                 irqfd_resampler_hnode,
                                 srcu_read_lock_held(&partition->pt_irq_srcu)) {
-               if 
(hv_should_clear_interrupt(irqfd->irqfd_lapic_irq.lapic_control.interrupt_type))
+               struct mshv_lapic_irq irq;
+               unsigned int seq;
+
+               do {
+                       seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
+                       irq = irqfd->irqfd_lapic_irq;
+               } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
+
+               if (hv_should_clear_interrupt(irq.lapic_control.interrupt_type))
                        hv_call_clear_virtual_interrupt(partition->pt_id);
 
                eventfd_signal(irqfd->irqfd_resamplefd);
@@ -198,36 +206,19 @@ static int mshv_try_assert_irq_fast(struct mshv_irqfd 
*irqfd,
 }
 #endif
 
-static void mshv_assert_irq_slow(struct mshv_irqfd *irqfd)
+static void mshv_assert_irq_slow(struct mshv_irqfd *irqfd,
+                                const struct mshv_lapic_irq *irq)
 {
        struct mshv_partition *partition = irqfd->irqfd_partn;
-       struct mshv_guest_irq_ent girq_ent;
-       struct mshv_lapic_irq irq;
-       unsigned int seq;
-       int idx;
-
-       idx = srcu_read_lock(&partition->pt_irq_srcu);
-
-       do {
-               seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
-               girq_ent = irqfd->irqfd_girq_ent;
-               irq = irqfd->irqfd_lapic_irq;
-       } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
-
-       if (girq_ent.guest_irq_num && !girq_ent.girq_entry_valid) {
-               srcu_read_unlock(&partition->pt_irq_srcu, idx);
-               return;
-       }
 
 #if IS_ENABLED(CONFIG_X86)
        WARN_ON(irqfd->irqfd_resampler &&
-               !irq.lapic_control.level_triggered);
+               !irq->lapic_control.level_triggered);
 #endif
 
        hv_call_assert_virtual_interrupt(partition->pt_id,
-                                        irq.lapic_vector, irq.lapic_apic_id,
-                                        irq.lapic_control);
-       srcu_read_unlock(&partition->pt_irq_srcu, idx);
+                                        irq->lapic_vector, irq->lapic_apic_id,
+                                        irq->lapic_control);
 }
 
 static void mshv_irqfd_resampler_shutdown(struct mshv_irqfd *irqfd)
@@ -316,6 +307,7 @@ static int mshv_irqfd_wakeup(wait_queue_entry_t *wait, 
unsigned int mode,
        int ret = 0;
 
        if (flags & EPOLLIN) {
+               struct mshv_guest_irq_ent girq_ent;
                struct mshv_lapic_irq irq;
                u64 cnt;
 
@@ -323,14 +315,18 @@ static int mshv_irqfd_wakeup(wait_queue_entry_t *wait, 
unsigned int mode,
                idx = srcu_read_lock(&pt->pt_irq_srcu);
                do {
                        seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
+                       girq_ent = irqfd->irqfd_girq_ent;
                        irq = irqfd->irqfd_lapic_irq;
                } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
 
+               if (!girq_ent.girq_entry_valid)
+                       goto out_unlock;
+
                /* An event has been signaled, raise an interrupt */
-               ret = mshv_try_assert_irq_fast(irqfd, &irq);
-               if (ret)
-                       mshv_assert_irq_slow(irqfd);
+               if (mshv_try_assert_irq_fast(irqfd, &irq))
+                       mshv_assert_irq_slow(irqfd, &irq);
 
+out_unlock:
                srcu_read_unlock(&pt->pt_irq_srcu, idx);
 
                ret = 1;
@@ -520,8 +516,18 @@ static int mshv_irqfd_assign(struct mshv_partition *pt,
         */
        events = vfs_poll(fd_file(f), &irqfd->irqfd_polltbl);
 
-       if (events & EPOLLIN)
-               mshv_assert_irq_slow(irqfd);
+       if (events & EPOLLIN) {
+               struct mshv_lapic_irq irq;
+               unsigned int seq;
+
+               do {
+                       seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
+                       irq = irqfd->irqfd_lapic_irq;
+               } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
+
+               if (irqfd->irqfd_girq_ent.girq_entry_valid)
+                       mshv_assert_irq_slow(irqfd, &irq);
+       }
 
        srcu_read_unlock(&pt->pt_irq_srcu, idx);
        return 0;



Reply via email to