mshv_irqfd_update() writes both irqfd_girq_ent and irqfd_lapic_irq as a
logical unit under seqcount write protection. Readers must snapshot these
fields inside the seqcount begin/retry loop to obtain a consistent
point-in-time view — otherwise a concurrent update can produce a torn
read where one field comes from the old state and the other from the new.

Both mshv_assert_irq_slow() and mshv_irqfd_wakeup() get this wrong: the
seqcount loop bodies are empty (just spinning until a stable sequence is
observed), and all reads of the protected fields happen after the loop
with no protection from concurrent writes. If mshv_irqfd_update() races
with interrupt assertion, the caller may use a stale or mixed
vector/apic_id/control combination — delivering an interrupt to the
wrong vCPU, with the wrong vector, or with the wrong trigger mode. This
can cause spurious or lost interrupts in the guest, or a stuck interrupt
line in the level-triggered case.

Fix mshv_assert_irq_slow() by snapshotting both irqfd_girq_ent and
irqfd_lapic_irq into local variables inside the seqcount loop, then
using those locals for the validity check and the hypercall.

Fix mshv_irqfd_wakeup() by snapshotting irqfd_lapic_irq inside its
seqcount loop and passing the snapshot to mshv_try_assert_irq_fast(),
so the fast path operates on the consistent copy rather than reading
the field directly outside seqcount protection.

Fixes: 621191d709b14 ("Drivers: hv: Introduce mshv_root module to expose 
/dev/mshv to VMMs")
Signed-off-by: Stanislav Kinsburskii <[email protected]>
---
 drivers/hv/mshv_eventfd.c |   47 +++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/drivers/hv/mshv_eventfd.c b/drivers/hv/mshv_eventfd.c
index 509911ffcbeee..7275b9eaa7541 100644
--- a/drivers/hv/mshv_eventfd.c
+++ b/drivers/hv/mshv_eventfd.c
@@ -151,10 +151,10 @@ static int mshv_vp_irq_set_vector(struct mshv_vp *vp, u32 
vector)
  * Try to raise irq for guest via shared vector array. hyp does the actual
  * inject of the interrupt.
  */
-static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd)
+static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd,
+                                   const struct mshv_lapic_irq *irq)
 {
        struct mshv_partition *partition = irqfd->irqfd_partn;
-       struct mshv_lapic_irq *irq = &irqfd->irqfd_lapic_irq;
        struct mshv_vp *vp;
 
        if (!(ms_hyperv.ext_features &
@@ -191,7 +191,8 @@ static int mshv_try_assert_irq_fast(struct mshv_irqfd 
*irqfd)
        return 0;
 }
 #else /* CONFIG_X86_64 */
-static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd)
+static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd,
+                                   const struct mshv_lapic_irq *irq)
 {
        return -EOPNOTSUPP;
 }
@@ -200,30 +201,32 @@ static int mshv_try_assert_irq_fast(struct mshv_irqfd 
*irqfd)
 static void mshv_assert_irq_slow(struct mshv_irqfd *irqfd)
 {
        struct mshv_partition *partition = irqfd->irqfd_partn;
-       struct mshv_lapic_irq *irq = &irqfd->irqfd_lapic_irq;
+       struct mshv_guest_irq_ent girq_ent;
+       struct mshv_lapic_irq irq;
        unsigned int seq;
        int idx;
 
-#if IS_ENABLED(CONFIG_X86)
-       WARN_ON(irqfd->irqfd_resampler &&
-               !irq->lapic_control.level_triggered);
-#endif
-
        idx = srcu_read_lock(&partition->pt_irq_srcu);
-       if (irqfd->irqfd_girq_ent.guest_irq_num) {
-               if (!irqfd->irqfd_girq_ent.girq_entry_valid) {
-                       srcu_read_unlock(&partition->pt_irq_srcu, idx);
-                       return;
-               }
 
-               do {
-                       seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
-               } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
+       do {
+               seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
+               girq_ent = irqfd->irqfd_girq_ent;
+               irq = irqfd->irqfd_lapic_irq;
+       } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
+
+       if (girq_ent.guest_irq_num && !girq_ent.girq_entry_valid) {
+               srcu_read_unlock(&partition->pt_irq_srcu, idx);
+               return;
        }
 
-       hv_call_assert_virtual_interrupt(irqfd->irqfd_partn->pt_id,
-                                        irq->lapic_vector, irq->lapic_apic_id,
-                                        irq->lapic_control);
+#if IS_ENABLED(CONFIG_X86)
+       WARN_ON(irqfd->irqfd_resampler &&
+               !irq.lapic_control.level_triggered);
+#endif
+
+       hv_call_assert_virtual_interrupt(partition->pt_id,
+                                        irq.lapic_vector, irq.lapic_apic_id,
+                                        irq.lapic_control);
        srcu_read_unlock(&partition->pt_irq_srcu, idx);
 }
 
@@ -313,16 +316,18 @@ static int mshv_irqfd_wakeup(wait_queue_entry_t *wait, 
unsigned int mode,
        int ret = 0;
 
        if (flags & EPOLLIN) {
+               struct mshv_lapic_irq irq;
                u64 cnt;
 
                eventfd_ctx_do_read(irqfd->irqfd_eventfd_ctx, &cnt);
                idx = srcu_read_lock(&pt->pt_irq_srcu);
                do {
                        seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
+                       irq = irqfd->irqfd_lapic_irq;
                } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
 
                /* An event has been signaled, raise an interrupt */
-               ret = mshv_try_assert_irq_fast(irqfd);
+               ret = mshv_try_assert_irq_fast(irqfd, &irq);
                if (ret)
                        mshv_assert_irq_slow(irqfd);
 



Reply via email to