From: Jan Kiszka <[email protected]> Sent: Monday, February 16, 2026 8:25 
AM
> 
> Resolves the following lockdep report when booting PREEMPT_RT on Hyper-V
> with related guest support enabled:
> 
> [    1.127941] hv_vmbus: registering driver hyperv_drm
> 
> [    1.132518] =============================
> [    1.132519] [ BUG: Invalid wait context ]
> [    1.132521] 6.19.0-rc8+ #9 Not tainted
> [    1.132524] -----------------------------
> [    1.132525] swapper/0/0 is trying to lock:
> [    1.132526] ffff8b9381bb3c90 (&channel->sched_lock){....}-{3:3}, at: 
> vmbus_chan_sched+0xc4/0x2b0
> [    1.132543] other info that might help us debug this:
> [    1.132544] context-{2:2}
> [    1.132545] 1 lock held by swapper/0/0:
> [    1.132547]  #0: ffffffffa010c4c0 (rcu_read_lock){....}-{1:3}, at: 
> vmbus_chan_sched+0x31/0x2b0
> [    1.132557] stack backtrace:
> [    1.132560] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.19.0-rc8+ 
> #9 PREEMPT_{RT,(lazy)}
> [    1.132565] Hardware name: Microsoft Corporation Virtual Machine/Virtual 
> Machine, BIOS Hyper-V UEFI Release v4.1 09/25/2025
> [    1.132567] Call Trace:
> [    1.132570]  <IRQ>
> [    1.132573]  dump_stack_lvl+0x6e/0xa0
> [    1.132581]  __lock_acquire+0xee0/0x21b0
> [    1.132592]  lock_acquire+0xd5/0x2d0
> [    1.132598]  ? vmbus_chan_sched+0xc4/0x2b0
> [    1.132606]  ? lock_acquire+0xd5/0x2d0
> [    1.132613]  ? vmbus_chan_sched+0x31/0x2b0
> [    1.132619]  rt_spin_lock+0x3f/0x1f0
> [    1.132623]  ? vmbus_chan_sched+0xc4/0x2b0
> [    1.132629]  ? vmbus_chan_sched+0x31/0x2b0
> [    1.132634]  vmbus_chan_sched+0xc4/0x2b0
> [    1.132641]  vmbus_isr+0x2c/0x150
> [    1.132648]  __sysvec_hyperv_callback+0x5f/0xa0
> [    1.132654]  sysvec_hyperv_callback+0x88/0xb0
> [    1.132658]  </IRQ>
> [    1.132659]  <TASK>
> [    1.132660]  asm_sysvec_hyperv_callback+0x1a/0x20
> 
> As code paths that handle vmbus IRQs use sleepy locks under PREEMPT_RT,
> the vmbus_isr execution needs to be moved into thread context. Open-
> coding this allows to skip the IPI that irq_work would additionally
> bring and which we do not need, being an IRQ, never an NMI.
> 
> This affects both x86 and arm64, therefore hook into the common driver
> logic.
> 
> Signed-off-by: Jan Kiszka <[email protected]>

Tested this patch in combination with the related SCSI driver patch.
Tested three configurations with a recent linux-next kernel, either
20260128 or 20260205.

1) Normal Linux kernel
2) Normal Linux kernel plus CONFIG_PROVE_LOCKING
3) PREEMPT_RT kernel plus CONFIG_PROVE_LOCKING

Tested these three configurations in an x86/x64 VM on a local Hyper-V
and again in an ARM64 VM in the Azure public cloud. With all
combinations, ran the "stress-ng" command provided by Florian
Bezdeka for several minutes. Saw no issues related to these patches.
Presumably the normal kernel with CONFIG_PROVE_LOCKING produced
the lockdep report that Saurabh Sengar saw, and that also appears to be
fixed in this version of the patch due to adding lockdep_hardirq_threaded().

However, I noted one additional locking problem in the ARM64 Azure
VM, which has multiple PCI pass-thru devices -- one Mellanox NIC VF and
two NVMe controllers. The first PCI device to be brought online gets
this lockdep report, though Linux continues to run without problems:

[    8.128629] hv_vmbus: registering driver hv_pci
[    8.132276] hv_pci ad26ad39-fa5e-4d12-9825-fa62e9c88483: PCI VMBus probing: 
Using version 0x10004
[    8.142956] hv_pci ad26ad39-fa5e-4d12-9825-fa62e9c88483: PCI host bridge to 
bus fa5e:00
[    8.143231] pci_bus fa5e:00: root bus resource [mem 0xfc0000000-0xfc00fffff 
window]
[    8.143272] pci_bus fa5e:00: No busn resource found for root bus, will use 
[bus 00-ff]
[    8.154069] =============================
[    8.156609] [ BUG: Invalid wait context ]
[    8.159209] 6.19.0-rc7rt-next-20260128+ #9 Tainted: G            E
[    8.163582] -----------------------------
[    8.166323] systemd-udevd/575 is trying to lock:
[    8.169163] ffff00011fb62260 (&hbus->device_list_lock){+.+.}-{3:3}, at: 
get_pcichild_wslot+0x30/0xe0 [pci_hyperv]
[    8.175792] other info that might help us debug this:
[    8.179187] context-{5:5}
[    8.180954] 3 locks held by systemd-udevd/575:
[    8.183048]  #0: ffff000116e50100 (&dev->mutex){....}-{4:4}, at: 
__device_driver_lock+0x4c/0xb0
[    8.193285]  #1: ffff00011fb62118 (&hbus->state_lock){+.+.}-{4:4}, at: 
hv_pci_probe+0x32c/0x590 [pci_hyperv]
[    8.199565]  #2: ffffa40f7caa61e0 (pci_lock){....}-{2:2}, at: 
pci_bus_read_config_dword+0x64/0xf8
[    8.205112] stack backtrace:
[    8.207037] CPU: 0 UID: 0 PID: 575 Comm: systemd-udevd Tainted: G            
E       6.19.0-rc7rt-next-20260128+ #9 PREEMPT_RT
[    8.209134] Tainted: [E]=UNSIGNED_MODULE
[    8.219505] Hardware name: Microsoft Corporation Virtual Machine/Virtual 
Machine, BIOS Hyper-V UEFI Release v4.1 06/10/2025
[    8.226029] Call trace:
[    8.227433]  show_stack+0x20/0x38 (C)
[    8.229541]  dump_stack_lvl+0x9c/0x158
[    8.231698]  dump_stack+0x18/0x28
[    8.233799]  __lock_acquire+0x488/0x1e20
[    8.236373]  lock_acquire+0x11c/0x388
[    8.238783]  rt_spin_lock+0x54/0x230
[    8.241138]  get_pcichild_wslot+0x30/0xe0 [pci_hyperv]
[    8.244550]  hv_pcifront_read_config+0x3c/0x98 [pci_hyperv]
[    8.248323]  pci_bus_read_config_dword+0x88/0xf8
[    8.250419]  pci_bus_generic_read_dev_vendor_id+0x3c/0x1c0
[    8.252517]  pci_bus_read_dev_vendor_id+0x54/0x80
[    8.263922]  pci_scan_single_device+0x88/0x100
[    8.266903]  pci_scan_slot+0x74/0x1e0
[    8.269208]  pci_scan_child_bus_extend+0x50/0x328
[    8.271978]  pci_scan_root_bus_bridge+0xc4/0xf8
[    8.274705]  hv_pci_probe+0x390/0x590 [pci_hyperv]
[    8.277584]  vmbus_probe+0x4c/0xb0 [hv_vmbus]
[    8.279688]  really_probe+0xd4/0x3d8
[    8.285954]  __driver_probe_device+0x90/0x1a0
[    8.288645]  driver_probe_device+0x44/0x148
[    8.291011]  __driver_attach+0x154/0x290
[    8.293201]  bus_for_each_dev+0x80/0xf0
[    8.295407]  driver_attach+0x2c/0x40
[    8.297478]  bus_add_driver+0x128/0x270
[    8.299607]  driver_register+0x68/0x138
[    8.302179]  __vmbus_driver_register+0x98/0xc0 [hv_vmbus]
[    8.305535]  init_hv_pci_drv+0x198/0xff8 [pci_hyperv]
[    8.308566]  do_one_initcall+0x70/0x400
[    8.310957]  do_init_module+0x60/0x280
[    8.313393]  load_module+0x2308/0x2680
[    8.315535]  init_module_from_file+0xe0/0x110
[    8.318432]  idempotent_init_module+0x194/0x280
[    8.321141]  __arm64_sys_finit_module+0x74/0xf8
[    8.323874]  invoke_syscall+0x6c/0xf8
[    8.326213]  el0_svc_common.constprop.0+0xe0/0xf0
[    8.329068]  do_el0_svc+0x24/0x38
[    8.331070]  el0_svc+0x164/0x3c8
[    8.333137]  el0t_64_sync_handler+0xd0/0xe8
[    8.335599]  el0t_64_sync+0x1b0/0x1b8
[    8.338598] pci fa5e:00:00.0: [1414:b111] type 00 class 0x010802 PCIe 
Endpoint
[    8.340646] pci fa5e:00:00.0: BAR 0 [mem 0xfc0000000-0xfc00fffff 64bit]
[    8.357759] pci_bus fa5e:00: busn_res: [bus 00-ff] end is updated to 00

The lockdep report would also be seen in an x86/x64 VM in Azure, though I
did not explicitly test that combination. I have not looked at what it would
take to fix this for PREEMPT_RT. But the fix would be a separate patch that
does not affect the validity of this patch.

So for this patch,
Reviewed-by: Michael Kelley <[email protected]>
Tested-by: Michael Kelley <[email protected]>

> ---
> 
> Changes in v3:
>  - move logic to generic vmbus driver, targeting arm64 as well
>  - annotate non-RT path with lockdep_hardirq_threaded
>  - only teardown if setup ran
> 
> Changes in v2:
>  - reorder vmbus_irq_pending clearing to fix a race condition
> 
>  drivers/hv/vmbus_drv.c | 66 +++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 65 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> index 6785ad63a9cb..749a2e68af05 100644
> --- a/drivers/hv/vmbus_drv.c
> +++ b/drivers/hv/vmbus_drv.c
> @@ -25,6 +25,7 @@
>  #include <linux/cpu.h>
>  #include <linux/sched/isolation.h>
>  #include <linux/sched/task_stack.h>
> +#include <linux/smpboot.h>
> 
>  #include <linux/delay.h>
>  #include <linux/panic_notifier.h>
> @@ -1350,7 +1351,7 @@ static void vmbus_message_sched(struct
> hv_per_cpu_context *hv_cpu, void *message
>       }
>  }
> 
> -void vmbus_isr(void)
> +static void __vmbus_isr(void)
>  {
>       struct hv_per_cpu_context *hv_cpu
>               = this_cpu_ptr(hv_context.cpu_context);
> @@ -1363,6 +1364,53 @@ void vmbus_isr(void)
> 
>       add_interrupt_randomness(vmbus_interrupt);
>  }
> +
> +static DEFINE_PER_CPU(bool, vmbus_irq_pending);
> +static DEFINE_PER_CPU(struct task_struct *, vmbus_irqd);
> +
> +static void vmbus_irqd_wake(void)
> +{
> +     struct task_struct *tsk = __this_cpu_read(vmbus_irqd);
> +
> +     __this_cpu_write(vmbus_irq_pending, true);
> +     wake_up_process(tsk);
> +}
> +
> +static void vmbus_irqd_setup(unsigned int cpu)
> +{
> +     sched_set_fifo(current);
> +}
> +
> +static int vmbus_irqd_should_run(unsigned int cpu)
> +{
> +     return __this_cpu_read(vmbus_irq_pending);
> +}
> +
> +static void run_vmbus_irqd(unsigned int cpu)
> +{
> +     __this_cpu_write(vmbus_irq_pending, false);
> +     __vmbus_isr();
> +}
> +
> +static bool vmbus_irq_initialized;
> +
> +static struct smp_hotplug_thread vmbus_irq_threads = {
> +     .store                  = &vmbus_irqd,
> +     .setup                  = vmbus_irqd_setup,
> +     .thread_should_run      = vmbus_irqd_should_run,
> +     .thread_fn              = run_vmbus_irqd,
> +     .thread_comm            = "vmbus_irq/%u",
> +};
> +
> +void vmbus_isr(void)
> +{
> +     if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
> +             vmbus_irqd_wake();
> +     } else {
> +             lockdep_hardirq_threaded();
> +             __vmbus_isr();
> +     }
> +}
>  EXPORT_SYMBOL_FOR_MODULES(vmbus_isr, "mshv_vtl");
> 
>  static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
> @@ -1462,6 +1510,13 @@ static int vmbus_bus_init(void)
>        * the VMbus interrupt handler.
>        */
> 
> +     if (IS_ENABLED(CONFIG_PREEMPT_RT) && !vmbus_irq_initialized) {
> +             ret = smpboot_register_percpu_thread(&vmbus_irq_threads);
> +             if (ret)
> +                     goto err_kthread;
> +             vmbus_irq_initialized = true;
> +     }
> +
>       if (vmbus_irq == -1) {
>               hv_setup_vmbus_handler(vmbus_isr);
>       } else {
> @@ -1507,6 +1562,11 @@ static int vmbus_bus_init(void)
>               free_percpu(vmbus_evt);
>       }
>  err_setup:
> +     if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) {
> +             smpboot_unregister_percpu_thread(&vmbus_irq_threads);
> +             vmbus_irq_initialized = false;
> +     }
> +err_kthread:
>       bus_unregister(&hv_bus);
>       return ret;
>  }
> @@ -2976,6 +3036,10 @@ static void __exit vmbus_exit(void)
>               free_percpu_irq(vmbus_irq, vmbus_evt);
>               free_percpu(vmbus_evt);
>       }
> +     if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) {
> +             smpboot_unregister_percpu_thread(&vmbus_irq_threads);
> +             vmbus_irq_initialized = false;
> +     }
>       for_each_online_cpu(cpu) {
>               struct hv_per_cpu_context *hv_cpu
>                       = per_cpu_ptr(hv_context.cpu_context, cpu);
> --
> 2.47.3

Reply via email to