On Tue, Sep 09, 2025 at 05:10:08PM -0700, Mukesh Rathor wrote:

<snip>

> +static noinline __noclone void crash_nmi_callback(struct pt_regs *regs)
> +{
> +     struct hv_input_disable_hyp_ex *input;
> +     u64 status;
> +     int msecs = 1000, ccpu = smp_processor_id();
> +
> +     if (ccpu == 0) {
> +             /* crash_save_cpu() will be done in the kexec path */
> +             cpu_emergency_stop_pt();        /* disable performance trace */
> +             atomic_inc(&crash_cpus_wait);
> +     } else {
> +             crash_save_cpu(regs, ccpu);
> +             cpu_emergency_stop_pt();        /* disable performance trace */
> +             atomic_inc(&crash_cpus_wait);
> +             for (;;);                       /* cause no vmexits */
> +     }
> +
> +     while (atomic_read(&crash_cpus_wait) < num_online_cpus() && msecs--)
> +             mdelay(1);
> +
> +     stop_nmi();
> +     if (!hv_has_crashed)
> +             hv_notify_prepare_hyp();
> +
> +     if (crashing_cpu == -1)
> +             crashing_cpu = ccpu;            /* crash cmd uses this */
> +
> +     hv_hvcrash_ctxt_save();
> +     hv_mark_tss_not_busy();
> +     hv_crash_fixup_kernpt();
> +
> +     input = *this_cpu_ptr(hyperv_pcpu_input_arg);
> +     memset(input, 0, sizeof(*input));
> +     input->rip = trampoline_pa;     /* PA of hv_crash_asm32 */
> +     input->arg = devirt_cr3arg;     /* PA of trampoline page table L4 */
> +
> +     status = hv_do_hypercall(HVCALL_DISABLE_HYP_EX, input, NULL);
> +
> +     /* Devirt failed, just reboot as things are in very bad state now */
> +     native_wrmsrq(HV_X64_MSR_RESET, 1);    /* get hv to reboot */

AFAIU here ...

> +}
> +
> +/*
> + * Generic nmi callback handler: could be called without any crash also.
> + *   hv crash: hypervisor injects nmi's into all cpus
> + *   lx crash: panicing cpu sends nmi to all but self via 
> crash_stop_other_cpus
> + */
> +static int hv_crash_nmi_local(unsigned int cmd, struct pt_regs *regs)
> +{
> +     int ccpu = smp_processor_id();
> +
> +     if (!hv_has_crashed && hv_cda && hv_cda->cda_valid)
> +             hv_has_crashed = 1;
> +
> +     if (!hv_has_crashed && !lx_has_crashed)
> +             return NMI_DONE;        /* ignore the nmi */
> +
> +     if (hv_has_crashed) {
> +             if (!kexec_crash_loaded() || !hv_crash_enabled) {
> +                     if (ccpu == 0) {
> +                             native_wrmsrq(HV_X64_MSR_RESET, 1); /* reboot */

and here the machine will be reset, which in both cases won't allow to
collect the VMRS file, thus not allowing to debug nested hypervisor
failures.

Perhaps it worth keeping the state for any case (not just nested), but
the nested state should be preserved.

Thanks,
Stanislav

> -- 
> 2.36.1.vfs.0.0
> 

Reply via email to