David Woodhouse <dw...@infradead.org> writes: > As discussed at length already, one potential 'workaround' for KVM > brokenness in old kernels (<3.9) with old CPUs (without 'unrestricted > guest' support), is to properly reset the PAM registers in the chipset. > > I say 'workaround' but this would be a proper fix in its own right, and > as a side-effect would help to work around the problem we're actually > suffering. > > To do it properly, we need to distinguish between a 'hard' reset > triggered by the PIIX3 RCR register, which resets the PAM configuration, > and a 'soft' reset triggered for example by the keyboard controller, > which doesn't. > > This patch attempts to introduce a ResetType into the code logic. Rather > than propagating that ResetType through the entire stack of device reset > functions, I've added a 'qemu_last_reset_get()' function so that the > devices which *do* care can look for it. > > Comments? There are a whole bunch more qemu_system_reset_request() calls > which will need a ResetType added, if I do it this way...
The existing API does suck, but there also isn't a universal "hard" and "soft" reset. If we're going to touch a bunch of code, we should try to come up with the right interface. qemu_system_reset_request() today does the following: 1) indicate that a reset is pending 2) break the vcpus out of their loops and wait for them to all quiesce 3) invoke qemu_devices_reset() (or a machine specific reset handler) a) this also resets the cpus 4) resume all vcpus I think what we really need is a way to have finer control over what happens in step (3). Instead of passing a ResetType, I think we should introduce a new function that takes a function pointer/opaque that gets invoked for step (3). The existing qemu_system_reset_request() would call this function with a function pointer that invokes qemu_devices_reset(). But you could also invoke this new function with your own callback that let you control the behavior of reset. It's a bit ugly, but you would still need to set some sort of global flag to tell the PIIX to do a soft reset. However, over time, as we eliminate the list of reset notifiers, we could call a different reset method. Plus, with this approach, you don't have to touch a bunch of different devices... Regards, Anthony Liguori > > diff --git a/hw/apb_pci.c b/hw/apb_pci.c > index 7eb0c2b..79d7466 100644 > --- a/hw/apb_pci.c > +++ b/hw/apb_pci.c > @@ -135,9 +135,9 @@ static void apb_config_writel (void *opaque, hwaddr addr, > s->reset_control |= val & RESET_WMASK; > if (val & SOFT_POR) { > s->nr_resets = 0; > - qemu_system_reset_request(); > + qemu_system_reset_request(QEMU_RESET_SOFT); > } else if (val & SOFT_XIR) { > - qemu_system_reset_request(); > + qemu_system_reset_request(QEMU_RESET_SOFT); > } > } > break; > diff --git a/hw/arm_sysctl.c b/hw/arm_sysctl.c > index 7ecb7da..b553539 100644 > --- a/hw/arm_sysctl.c > +++ b/hw/arm_sysctl.c > @@ -239,7 +239,7 @@ static void arm_sysctl_write(void *opaque, hwaddr offset, > if (s->lockval == LOCK_VALUE) { > s->resetlevel = val; > if (val & 0x100) { > - qemu_system_reset_request(); > + qemu_system_reset_request(QEMU_RESET_HARD); > } > } > break; > @@ -248,7 +248,7 @@ static void arm_sysctl_write(void *opaque, hwaddr offset, > if (s->lockval == LOCK_VALUE) { > s->resetlevel = val; > if (val & 0x04) { > - qemu_system_reset_request(); > + qemu_system_reset_request(QEMU_RESET_HARD); > } > } > break; > diff --git a/hw/cuda.c b/hw/cuda.c > index b36c535..20c0894 100644 > --- a/hw/cuda.c > +++ b/hw/cuda.c > @@ -529,7 +529,7 @@ static void cuda_receive_packet(CUDAState *s, > obuf[0] = CUDA_PACKET; > obuf[1] = 0; > cuda_send_packet_to_host(s, obuf, 2); > - qemu_system_reset_request(); > + qemu_system_reset_request(QEMU_RESET_HARD); > break; > default: > break; > diff --git a/hw/m48t59.c b/hw/m48t59.c > index 427d95b..5a21701 100644 > --- a/hw/m48t59.c > +++ b/hw/m48t59.c > @@ -166,7 +166,7 @@ static void watchdog_cb (void *opaque) > NVRAM->buffer[0x1FF7] = 0x00; > NVRAM->buffer[0x1FFC] &= ~0x40; > /* May it be a hw CPU Reset instead ? */ > - qemu_system_reset_request(); > + qemu_system_reset_request(QEMU_RESET_HARD); > } else { > qemu_set_irq(NVRAM->IRQ, 1); > qemu_set_irq(NVRAM->IRQ, 0); > diff --git a/hw/pc.c b/hw/pc.c > index 53cc173..d138a92 100644 > --- a/hw/pc.c > +++ b/hw/pc.c > @@ -440,7 +440,7 @@ static void port92_write(void *opaque, hwaddr addr, > uint64_t val, > s->outport = val; > qemu_set_irq(*s->a20_out, (val >> 1) & 1); > if (val & 1) { > - qemu_system_reset_request(); > + qemu_system_reset_request(QEMU_RESET_SOFT); > } > } > > diff --git a/hw/pckbd.c b/hw/pckbd.c > index 3bad09b..b55d61e 100644 > --- a/hw/pckbd.c > +++ b/hw/pckbd.c > @@ -220,7 +220,8 @@ static void outport_write(KBDState *s, uint32_t val) > qemu_set_irq(*s->a20_out, (val >> 1) & 1); > } > if (!(val & 1)) { > - qemu_system_reset_request(); > + /* This should be a soft reset, not full chipset reset. */ > + qemu_system_reset_request(QEMU_RESET_SOFT); > } > } > > @@ -299,7 +300,7 @@ static void kbd_write_command(void *opaque, hwaddr addr, > s->outport &= ~KBD_OUT_A20; > break; > case KBD_CCMD_RESET: > - qemu_system_reset_request(); > + qemu_system_reset_request(QEMU_RESET_SOFT); > break; > case KBD_CCMD_NO_OP: > /* ignore that */ > diff --git a/hw/piix_pci.c b/hw/piix_pci.c > index 6c77e49..55654c0 100644 > --- a/hw/piix_pci.c > +++ b/hw/piix_pci.c > @@ -171,6 +171,27 @@ static int i440fx_load_old(QEMUFile* f, void *opaque, > int version_id) > return 0; > } > > +static void i440fx_reset(DeviceState *ds) > +{ > + PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, ds); > + PCII440FXState *d = DO_UPCAST(PCII440FXState, dev, dev); > + uint8_t *pci_conf = d->dev.config; > + > + if (qemu_last_reset_get() < QEMU_RESET_HARD) > + return; > + > + pci_conf[0x59] = 0x00; // Reset PAM setup > + pci_conf[0x5a] = 0x00; > + pci_conf[0x5b] = 0x00; > + pci_conf[0x5c] = 0x00; > + pci_conf[0x5d] = 0x00; > + pci_conf[0x5e] = 0x00; > + pci_conf[0x5f] = 0x00; > + pci_conf[0x72] = 0x02; // And SMM > + > + i440fx_update_memory_mappings(d); > +} > + > static int i440fx_post_load(void *opaque, int version_id) > { > PCII440FXState *d = opaque; > @@ -521,7 +542,10 @@ static void rcr_write(void *opaque, hwaddr addr, > uint64_t val, unsigned len) > PIIX3State *d = opaque; > > if (val & 4) { > - qemu_system_reset_request(); > + if (val & 2) > + qemu_system_reset_request(QEMU_RESET_HARD); > + else > + qemu_system_reset_request(QEMU_RESET_SOFT); > return; > } > d->rcr = val & 2; /* keep System Reset type only */ > @@ -615,6 +639,7 @@ static void i440fx_class_init(ObjectClass *klass, void > *data) > dc->desc = "Host bridge"; > dc->no_user = 1; > dc->vmsd = &vmstate_i440fx; > + dc->reset = i440fx_reset; > } > > static const TypeInfo i440fx_info = { > diff --git a/hw/watchdog.c b/hw/watchdog.c > index 072d256..ba5aa7e 100644 > --- a/hw/watchdog.c > +++ b/hw/watchdog.c > @@ -117,7 +117,7 @@ void watchdog_perform_action(void) > switch(watchdog_action) { > case WDT_RESET: /* same as 'system_reset' in monitor */ > watchdog_mon_event("reset"); > - qemu_system_reset_request(); > + qemu_system_reset_request(QEMU_RESET_HARD); > break; > > case WDT_SHUTDOWN: /* same as 'system_powerdown' in monitor */ > diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h > index 1d9599e..90635a5 100644 > --- a/include/sysemu/sysemu.h > +++ b/include/sysemu/sysemu.h > @@ -33,6 +33,14 @@ void vm_state_notify(int running, RunState state); > #define VMRESET_SILENT false > #define VMRESET_REPORT true > > +typedef enum ResetType { > + QEMU_RESET_NONE = 0, > + QEMU_RESET_WAKEUP, > + QEMU_RESET_SOFT, > + QEMU_RESET_HARD, > + QEMU_RESET_POWERON, > +} ResetType; > + > void vm_start(void); > void vm_stop(RunState state); > void vm_stop_force_state(RunState state); > @@ -43,7 +51,7 @@ typedef enum WakeupReason { > QEMU_WAKEUP_REASON_PMTIMER, > } WakeupReason; > > -void qemu_system_reset_request(void); > +void qemu_system_reset_request(ResetType type); > void qemu_system_suspend_request(void); > void qemu_register_suspend_notifier(Notifier *notifier); > void qemu_system_wakeup_request(WakeupReason reason); > @@ -55,10 +63,11 @@ void qemu_register_powerdown_notifier(Notifier *notifier); > void qemu_system_debug_request(void); > void qemu_system_vmstop_request(RunState reason); > int qemu_shutdown_requested_get(void); > -int qemu_reset_requested_get(void); > +ResetType qemu_reset_requested_get(void); > +ResetType qemu_last_reset_get(void); > void qemu_system_killed(int signal, pid_t pid); > void qemu_devices_reset(void); > -void qemu_system_reset(bool report); > +void qemu_system_reset(ResetType type, bool report); > > void qemu_add_exit_notifier(Notifier *notify); > void qemu_remove_exit_notifier(Notifier *notify); > diff --git a/kvm-all.c b/kvm-all.c > index 04ec2d5..1177ca1 100644 > --- a/kvm-all.c > +++ b/kvm-all.c > @@ -1599,7 +1599,7 @@ int kvm_cpu_exec(CPUArchState *env) > break; > case KVM_EXIT_SHUTDOWN: > DPRINTF("shutdown\n"); > - qemu_system_reset_request(); > + qemu_system_reset_request(QEMU_RESET_HARD); > ret = EXCP_INTERRUPT; > break; > case KVM_EXIT_UNKNOWN: > diff --git a/qmp.c b/qmp.c > index 55b056b..34c0429 100644 > --- a/qmp.c > +++ b/qmp.c > @@ -95,7 +95,7 @@ void qmp_stop(Error **errp) > > void qmp_system_reset(Error **errp) > { > - qemu_system_reset_request(); > + qemu_system_reset_request(QEMU_RESET_HARD); > } > > void qmp_system_powerdown(Error **erp) > diff --git a/qtest.c b/qtest.c > index 4663a38..61190f4 100644 > --- a/qtest.c > +++ b/qtest.c > @@ -391,7 +391,7 @@ static void qtest_event(void *opaque, int event) > > switch (event) { > case CHR_EVENT_OPENED: > - qemu_system_reset(false); > + qemu_system_reset(QEMU_RESET_POWERON, false); > for (i = 0; i < ARRAY_SIZE(irq_levels); i++) { > irq_levels[i] = 0; > } > diff --git a/savevm.c b/savevm.c > index a8a53ef..035901b 100644 > --- a/savevm.c > +++ b/savevm.c > @@ -2299,7 +2299,7 @@ int load_vmstate(const char *name) > return -EINVAL; > } > > - qemu_system_reset(VMRESET_SILENT); > + qemu_system_reset(QEMU_RESET_WAKEUP, VMRESET_SILENT); > ret = qemu_loadvm_state(f); > > qemu_fclose(f); > diff --git a/target-i386/excp_helper.c b/target-i386/excp_helper.c > index 179ea82..2faf53f 100644 > --- a/target-i386/excp_helper.c > +++ b/target-i386/excp_helper.c > @@ -64,7 +64,7 @@ static int check_exception(CPUX86State *env, int intno, int > *error_code) > > qemu_log_mask(CPU_LOG_RESET, "Triple fault\n"); > > - qemu_system_reset_request(); > + qemu_system_reset_request(QEMU_RESET_SOFT); > return EXCP_HLT; > } > #endif > diff --git a/target-i386/helper.c b/target-i386/helper.c > index d1cb4e2..cc0de0e 100644 > --- a/target-i386/helper.c > +++ b/target-i386/helper.c > @@ -1147,7 +1147,7 @@ static void do_inject_x86_mce(void *data) > " triple fault\n", > cpu->cpu_index); > qemu_log_mask(CPU_LOG_RESET, "Triple fault\n"); > - qemu_system_reset_request(); > + qemu_system_reset_request(QEMU_RESET_SOFT); > return; > } > if (banks[1] & MCI_STATUS_VAL) { > diff --git a/target-i386/kvm.c b/target-i386/kvm.c > index 9ebf181..f52a3d6 100644 > --- a/target-i386/kvm.c > +++ b/target-i386/kvm.c > @@ -1846,7 +1846,7 @@ int kvm_arch_process_async_events(CPUState *cs) > > if (env->exception_injected == EXCP08_DBLE) { > /* this means triple fault */ > - qemu_system_reset_request(); > + qemu_system_reset_request(QEMU_RESET_SOFT); > env->exit_request = 1; > return 0; > } > diff --git a/vl.c b/vl.c > index c5b0eea..948eb99 100644 > --- a/vl.c > +++ b/vl.c > @@ -1696,7 +1696,8 @@ typedef struct QEMUResetEntry { > > static QTAILQ_HEAD(reset_handlers, QEMUResetEntry) reset_handlers = > QTAILQ_HEAD_INITIALIZER(reset_handlers); > -static int reset_requested; > +static ResetType reset_requested; > +static ResetType last_reset = QEMU_RESET_POWERON; > static int shutdown_requested, shutdown_signal = -1; > static pid_t shutdown_pid; > static int powerdown_requested; > @@ -1717,11 +1718,16 @@ int qemu_shutdown_requested_get(void) > return shutdown_requested; > } > > -int qemu_reset_requested_get(void) > +ResetType qemu_reset_requested_get(void) > { > return reset_requested; > } > > +ResetType qemu_last_reset_get(void) > +{ > + return last_reset; > +} > + > static int qemu_shutdown_requested(void) > { > int r = shutdown_requested; > @@ -1745,10 +1751,10 @@ static void qemu_kill_report(void) > } > } > > -static int qemu_reset_requested(void) > +static ResetType qemu_reset_requested(void) > { > - int r = reset_requested; > - reset_requested = 0; > + ResetType r = reset_requested; > + reset_requested = QEMU_RESET_NONE; > return r; > } > > @@ -1824,8 +1830,9 @@ void qemu_devices_reset(void) > } > } > > -void qemu_system_reset(bool report) > +void qemu_system_reset(ResetType type, bool report) > { > + last_reset = type; > if (current_machine && current_machine->reset) { > current_machine->reset(); > } else { > @@ -1837,12 +1844,12 @@ void qemu_system_reset(bool report) > cpu_synchronize_all_post_reset(); > } > > -void qemu_system_reset_request(void) > +void qemu_system_reset_request(ResetType type) > { > if (no_reboot) { > shutdown_requested = 1; > } else { > - reset_requested = 1; > + reset_requested = type; > } > cpu_stop_current(); > qemu_notify_event(); > @@ -1945,6 +1952,7 @@ void qemu_system_vmstop_request(RunState state) > static bool main_loop_should_exit(void) > { > RunState r; > + ResetType rt; > if (qemu_debug_requested()) { > vm_stop(RUN_STATE_DEBUG); > } > @@ -1960,10 +1968,11 @@ static bool main_loop_should_exit(void) > return true; > } > } > - if (qemu_reset_requested()) { > + rt = qemu_reset_requested(); > + if (rt) { > pause_all_vcpus(); > cpu_synchronize_all_states(); > - qemu_system_reset(VMRESET_REPORT); > + qemu_system_reset(rt, VMRESET_REPORT); > resume_all_vcpus(); > if (runstate_check(RUN_STATE_INTERNAL_ERROR) || > runstate_check(RUN_STATE_SHUTDOWN)) { > @@ -1973,7 +1982,7 @@ static bool main_loop_should_exit(void) > if (qemu_wakeup_requested()) { > pause_all_vcpus(); > cpu_synchronize_all_states(); > - qemu_system_reset(VMRESET_SILENT); > + qemu_system_reset(QEMU_RESET_WAKEUP, VMRESET_SILENT); > resume_all_vcpus(); > monitor_protocol_event(QEVENT_WAKEUP, NULL); > } > @@ -4308,7 +4317,7 @@ int main(int argc, char **argv, char **envp) > qemu_register_reset(qbus_reset_all_fn, sysbus_get_default()); > qemu_run_machine_init_done_notifiers(); > > - qemu_system_reset(VMRESET_SILENT); > + qemu_system_reset(QEMU_RESET_POWERON, VMRESET_SILENT); > if (loadvm) { > if (load_vmstate(loadvm) < 0) { > autostart = 0; > diff --git a/xen-all.c b/xen-all.c > index 110f958..4be465e 100644 > --- a/xen-all.c > +++ b/xen-all.c > @@ -883,11 +883,13 @@ static void cpu_handle_ioreq(void *opaque) > * causes Xen to powerdown the domain. > */ > if (runstate_is_running()) { > + ResetType rt; > if (qemu_shutdown_requested_get()) { > destroy_hvm_domain(false); > } > - if (qemu_reset_requested_get()) { > - qemu_system_reset(VMRESET_REPORT); > + rt = qemu_reset_requested_get(); > + if (rt) { > + qemu_system_reset(rt, VMRESET_REPORT); > destroy_hvm_domain(true); > } > } > > > -- > David Woodhouse Open Source Technology Centre > david.woodho...@intel.com Intel Corporation