mshv_irqfd_update() writes both irqfd_girq_ent and irqfd_lapic_irq as a
logical unit under seqcount write protection. Readers must snapshot these
fields inside the seqcount begin/retry loop to obtain a consistent
point-in-time view — otherwise a concurrent update can produce a torn
read where one field comes from the old state and the other from the new.
Both mshv_assert_irq_slow() and mshv_irqfd_wakeup() get this wrong: the
seqcount loop bodies are empty (just spinning until a stable sequence is
observed), and all reads of the protected fields happen after the loop
with no protection from concurrent writes. If mshv_irqfd_update() races
with interrupt assertion, the caller may use a stale or mixed
vector/apic_id/control combination — delivering an interrupt to the
wrong vCPU, with the wrong vector, or with the wrong trigger mode. This
can cause spurious or lost interrupts in the guest, or a stuck interrupt
line in the level-triggered case.
Fix mshv_assert_irq_slow() by snapshotting both irqfd_girq_ent and
irqfd_lapic_irq into local variables inside the seqcount loop, then
using those locals for the validity check and the hypercall.
Fix mshv_irqfd_wakeup() by snapshotting irqfd_lapic_irq inside its
seqcount loop and passing the snapshot to mshv_try_assert_irq_fast(),
so the fast path operates on the consistent copy rather than reading
the field directly outside seqcount protection.
Fixes: 621191d709b14 ("Drivers: hv: Introduce mshv_root module to expose
/dev/mshv to VMMs")
Signed-off-by: Stanislav Kinsburskii <[email protected]>
---
drivers/hv/mshv_eventfd.c | 47 +++++++++++++++++++++++++--------------------
1 file changed, 26 insertions(+), 21 deletions(-)
diff --git a/drivers/hv/mshv_eventfd.c b/drivers/hv/mshv_eventfd.c
index 509911ffcbeee..7275b9eaa7541 100644
--- a/drivers/hv/mshv_eventfd.c
+++ b/drivers/hv/mshv_eventfd.c
@@ -151,10 +151,10 @@ static int mshv_vp_irq_set_vector(struct mshv_vp *vp, u32
vector)
* Try to raise irq for guest via shared vector array. hyp does the actual
* inject of the interrupt.
*/
-static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd)
+static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd,
+ const struct mshv_lapic_irq *irq)
{
struct mshv_partition *partition = irqfd->irqfd_partn;
- struct mshv_lapic_irq *irq = &irqfd->irqfd_lapic_irq;
struct mshv_vp *vp;
if (!(ms_hyperv.ext_features &
@@ -191,7 +191,8 @@ static int mshv_try_assert_irq_fast(struct mshv_irqfd
*irqfd)
return 0;
}
#else /* CONFIG_X86_64 */
-static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd)
+static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd,
+ const struct mshv_lapic_irq *irq)
{
return -EOPNOTSUPP;
}
@@ -200,30 +201,32 @@ static int mshv_try_assert_irq_fast(struct mshv_irqfd
*irqfd)
static void mshv_assert_irq_slow(struct mshv_irqfd *irqfd)
{
struct mshv_partition *partition = irqfd->irqfd_partn;
- struct mshv_lapic_irq *irq = &irqfd->irqfd_lapic_irq;
+ struct mshv_guest_irq_ent girq_ent;
+ struct mshv_lapic_irq irq;
unsigned int seq;
int idx;
-#if IS_ENABLED(CONFIG_X86)
- WARN_ON(irqfd->irqfd_resampler &&
- !irq->lapic_control.level_triggered);
-#endif
-
idx = srcu_read_lock(&partition->pt_irq_srcu);
- if (irqfd->irqfd_girq_ent.guest_irq_num) {
- if (!irqfd->irqfd_girq_ent.girq_entry_valid) {
- srcu_read_unlock(&partition->pt_irq_srcu, idx);
- return;
- }
- do {
- seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
- } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
+ do {
+ seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
+ girq_ent = irqfd->irqfd_girq_ent;
+ irq = irqfd->irqfd_lapic_irq;
+ } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
+
+ if (girq_ent.guest_irq_num && !girq_ent.girq_entry_valid) {
+ srcu_read_unlock(&partition->pt_irq_srcu, idx);
+ return;
}
- hv_call_assert_virtual_interrupt(irqfd->irqfd_partn->pt_id,
- irq->lapic_vector, irq->lapic_apic_id,
- irq->lapic_control);
+#if IS_ENABLED(CONFIG_X86)
+ WARN_ON(irqfd->irqfd_resampler &&
+ !irq.lapic_control.level_triggered);
+#endif
+
+ hv_call_assert_virtual_interrupt(partition->pt_id,
+ irq.lapic_vector, irq.lapic_apic_id,
+ irq.lapic_control);
srcu_read_unlock(&partition->pt_irq_srcu, idx);
}
@@ -313,16 +316,18 @@ static int mshv_irqfd_wakeup(wait_queue_entry_t *wait,
unsigned int mode,
int ret = 0;
if (flags & EPOLLIN) {
+ struct mshv_lapic_irq irq;
u64 cnt;
eventfd_ctx_do_read(irqfd->irqfd_eventfd_ctx, &cnt);
idx = srcu_read_lock(&pt->pt_irq_srcu);
do {
seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc);
+ irq = irqfd->irqfd_lapic_irq;
} while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq));
/* An event has been signaled, raise an interrupt */
- ret = mshv_try_assert_irq_fast(irqfd);
+ ret = mshv_try_assert_irq_fast(irqfd, &irq);
if (ret)
mshv_assert_irq_slow(irqfd);