Re: [PATCH v2] KVM: nVMX: Reset RFLAGS on VM-exit
On Sun, Mar 03, 2013 at 08:47:11PM +0100, Jan Kiszka wrote: From: Jan Kiszka jan.kis...@siemens.com Ouch, how could this work so well that far? We need to clear RFLAGS to the reset value as specified by the SDM. Particularly, IF must be off after VM-exit! Signed-off-by: Jan Kiszka jan.kis...@siemens.com Reviewed-by: Gleb Natapov g...@redhat.com --- Changes in v2: - use symbolic constant for rflags reset value arch/x86/kvm/vmx.c |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 486078d..eece4c8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7454,6 +7454,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12-host_rsp); kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12-host_rip); + vmx_set_rflags(vcpu, X86_EFLAGS_BIT1); /* * Note that calling vmx_set_cr0 is important, even if cr0 hasn't * actually changed, because it depends on the current state of -- 1.7.3.4 -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Where to download kvm forum 2012 presentations? Thanks
- 本邮件及其附件含有杭州华三通信技术有限公司的保密信息,仅限于发送给上面地址中列出 的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制、 或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本 邮件! This e-mail and its attachments contain confidential information from H3C, which is intended only for the person or entity whose address is listed above. Any use of the information contained herein in any way (including, but not limited to, total or partial disclosure, reproduction, or dissemination) by persons other than the intended recipient(s) is prohibited. If you receive this e-mail in error, please notify the sender by phone or email immediately and delete it!
Re: [PATCH v13 1/8] save/load cpu runstate
Il 28/02/2013 13:13, Hu Tao ha scritto: This patch enables preservation of cpu runstate during save/load vm. So when a vm is restored from snapshot, the cpu runstate is restored, too. I don't think this feature is worth breaking backwards migration compatibility. It is usually handled at a higher-level (management, like libvirt). Please make this a separate patch. Paolo See following example: # save two vms: one is running, the other is paused (qemu) info status VM status: running (qemu) savevm running (qemu) stop (qemu) info status VM status: paused (qemu) savevm paused # restore the one running (qemu) info status VM status: paused (qemu) loadvm running (qemu) info status VM status: running # restore the one paused (qemu) loadvm paused (qemu) info status VM status: paused (qemu) cont (qemu)info status VM status: running Signed-off-by: Hu Tao hu...@cn.fujitsu.com --- include/sysemu/sysemu.h | 2 ++ migration.c | 6 +- monitor.c | 5 ++--- savevm.c| 1 + vl.c| 34 ++ 5 files changed, 40 insertions(+), 8 deletions(-) diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index b19ec95..f121213 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -19,6 +19,8 @@ extern uint8_t qemu_uuid[]; int qemu_uuid_parse(const char *str, uint8_t *uuid); #define UUID_FMT %02hhx%02hhx%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx +void save_run_state(void); +void load_run_state(void); bool runstate_check(RunState state); void runstate_set(RunState new_state); int runstate_is_running(void); diff --git a/migration.c b/migration.c index 11725ae..c29830e 100644 --- a/migration.c +++ b/migration.c @@ -107,11 +107,7 @@ static void process_incoming_migration_co(void *opaque) /* Make sure all file formats flush their mutable metadata */ bdrv_invalidate_cache_all(); -if (autostart) { -vm_start(); -} else { -runstate_set(RUN_STATE_PAUSED); -} +load_run_state(); } void process_incoming_migration(QEMUFile *f) diff --git a/monitor.c b/monitor.c index 32a6e74..bf974b4 100644 --- a/monitor.c +++ b/monitor.c @@ -2059,13 +2059,12 @@ void qmp_closefd(const char *fdname, Error **errp) static void do_loadvm(Monitor *mon, const QDict *qdict) { -int saved_vm_running = runstate_is_running(); const char *name = qdict_get_str(qdict, name); vm_stop(RUN_STATE_RESTORE_VM); -if (load_vmstate(name) == 0 saved_vm_running) { -vm_start(); +if (load_vmstate(name) == 0) { +load_run_state(); } } diff --git a/savevm.c b/savevm.c index a8a53ef..aa631eb 100644 --- a/savevm.c +++ b/savevm.c @@ -2143,6 +2143,7 @@ void do_savevm(Monitor *mon, const QDict *qdict) } saved_vm_running = runstate_is_running(); +save_run_state(); vm_stop(RUN_STATE_SAVE_VM); memset(sn, 0, sizeof(*sn)); diff --git a/vl.c b/vl.c index febd2ea..7991f2e 100644 --- a/vl.c +++ b/vl.c @@ -523,6 +523,7 @@ static int default_driver_check(QemuOpts *opts, void *opaque) /* QEMU state */ static RunState current_run_state = RUN_STATE_PRELAUNCH; +static RunState saved_run_state = RUN_STATE_RUNNING; typedef struct { RunState from; @@ -546,6 +547,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_PAUSED, RUN_STATE_FINISH_MIGRATE }, { RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING }, +{ RUN_STATE_POSTMIGRATE, RUN_STATE_PAUSED }, { RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE }, { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING }, @@ -556,6 +558,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_FINISH_MIGRATE, RUN_STATE_POSTMIGRATE }, { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING }, +{ RUN_STATE_RESTORE_VM, RUN_STATE_PAUSED }, { RUN_STATE_RUNNING, RUN_STATE_DEBUG }, { RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR }, @@ -585,11 +588,39 @@ static const RunStateTransition runstate_transitions_def[] = { static bool runstate_valid_transitions[RUN_STATE_MAX][RUN_STATE_MAX]; +void save_run_state(void) +{ +saved_run_state = current_run_state; +} + +void load_run_state(void) +{ +if (saved_run_state == RUN_STATE_RUNNING) { +vm_start(); +} else if (!runstate_check(saved_run_state)) { +runstate_set(saved_run_state); +} else { +; /* leave unchanged */ +} +} + bool runstate_check(RunState state) { return current_run_state == state; } +static void runstate_save(QEMUFile *f, void *opaque) +{ +qemu_put_byte(f, saved_run_state); +} + +static int runstate_load(QEMUFile *f, void *opaque, int version_id) +{ +saved_run_state = qemu_get_byte(f); + +return 0; +} +
Re: [PATCH v13 2/8] start vm after resetting it
Il 28/02/2013 13:13, Hu Tao ha scritto: From: Wen Congyang we...@cn.fujitsu.com The guest should run after resetting it, but it does not run if its old state is RUN_STATE_INTERNAL_ERROR or RUN_STATE_PAUSED. We don't set runstate to RUN_STATE_PAUSED when resetting the guest, so the runstate will be changed from RUN_STATE_INTERNAL_ERROR or RUN_STATE_PAUSED to RUN_STATE_RUNNING(not RUN_STATE_PAUSED). This is also debatable. In particular, restarting an INTERNAL_ERROR guest makes it harder to inspect the state at the time of the failure. INTERNAL_ERROR should never happen, let's separate this patch too. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v13 4/8] add a new runstate: RUN_STATE_GUEST_PANICKED
Il 28/02/2013 13:13, Hu Tao ha scritto: The guest will be in this state when it is panicked. Signed-off-by: Wen Congyang we...@cn.fujitsu.com Signed-off-by: Hu Tao hu...@cn.fujitsu.com --- migration.c | 1 + qapi-schema.json | 6 +- qmp.c| 3 ++- vl.c | 11 ++- 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/migration.c b/migration.c index c29830e..fa17b82 100644 --- a/migration.c +++ b/migration.c @@ -698,6 +698,7 @@ static void *buffered_file_thread(void *opaque) int64_t start_time, end_time; DPRINTF(done iterating\n); +save_run_state(); start_time = qemu_get_clock_ms(rt_clock); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); if (old_vm_running) { diff --git a/qapi-schema.json b/qapi-schema.json index 28b070f..8f1d138 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -174,11 +174,15 @@ # @suspended: guest is suspended (ACPI S3) # # @watchdog: the watchdog action is configured to pause and has been triggered +# +# @guest-panicked: the panicked action is configured to pause and has been +# triggered. ## { 'enum': 'RunState', 'data': [ 'debug', 'inmigrate', 'internal-error', 'io-error', 'paused', 'postmigrate', 'prelaunch', 'finish-migrate', 'restore-vm', -'running', 'save-vm', 'shutdown', 'suspended', 'watchdog' ] } +'running', 'save-vm', 'shutdown', 'suspended', 'watchdog', +'guest-panicked' ] } ## # @SnapshotInfo diff --git a/qmp.c b/qmp.c index 5f1bed1..f5027f6 100644 --- a/qmp.c +++ b/qmp.c @@ -150,7 +150,8 @@ void qmp_cont(Error **errp) Error *local_err = NULL; if (runstate_check(RUN_STATE_INTERNAL_ERROR) || - runstate_check(RUN_STATE_SHUTDOWN)) { +runstate_check(RUN_STATE_SHUTDOWN) || +runstate_check(RUN_STATE_GUEST_PANICKED)) { error_set(errp, QERR_RESET_REQUIRED); return; } else if (runstate_check(RUN_STATE_SUSPENDED)) { diff --git a/vl.c b/vl.c index 3d08e1a..51d4922 100644 --- a/vl.c +++ b/vl.c @@ -536,6 +536,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING }, { RUN_STATE_INMIGRATE, RUN_STATE_PAUSED }, +{ RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED }, Is this a consequence of the first patch? { RUN_STATE_INTERNAL_ERROR, RUN_STATE_RUNNING }, { RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE }, @@ -549,6 +550,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING }, { RUN_STATE_POSTMIGRATE, RUN_STATE_PAUSED }, { RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE }, +{ RUN_STATE_POSTMIGRATE, RUN_STATE_GUEST_PANICKED }, Impossible. GUEST_PANICKED requires an instruction to be executed in the guest, so it should first go to RUNNING. { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING }, { RUN_STATE_PRELAUNCH, RUN_STATE_FINISH_MIGRATE }, @@ -559,6 +561,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING }, { RUN_STATE_RESTORE_VM, RUN_STATE_PAUSED }, +{ RUN_STATE_RESTORE_VM, RUN_STATE_GUEST_PANICKED }, Is it also for the first patch? { RUN_STATE_RUNNING, RUN_STATE_DEBUG }, { RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR }, @@ -569,6 +572,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_RUNNING, RUN_STATE_SAVE_VM }, { RUN_STATE_RUNNING, RUN_STATE_SHUTDOWN }, { RUN_STATE_RUNNING, RUN_STATE_WATCHDOG }, +{ RUN_STATE_RUNNING, RUN_STATE_GUEST_PANICKED }, This one is obviously ok. { RUN_STATE_SAVE_VM, RUN_STATE_RUNNING }, @@ -583,6 +587,10 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_WATCHDOG, RUN_STATE_RUNNING }, { RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE }, +{ RUN_STATE_GUEST_PANICKED, RUN_STATE_RUNNING }, +{ RUN_STATE_GUEST_PANICKED, RUN_STATE_PAUSED }, +{ RUN_STATE_GUEST_PANICKED, RUN_STATE_FINISH_MIGRATE }, Like SHUTDOWN, it should go first to PAUSED and then to RUNNING. A GUEST_PANICKED - RUNNING transition is not possible. You're seeing it because you lack the addition of GUEST_PANICKED here: if (runstate_check(RUN_STATE_INTERNAL_ERROR) || runstate_check(RUN_STATE_SHUTDOWN)) { runstate_set(RUN_STATE_PAUSED); } I think you should first move the INTERNAL_ERROR || SHUTDOWN checks to a separate function, so that you can then add GUEST_PANICKED. Paolo { RUN_STATE_MAX, RUN_STATE_MAX }, }; @@ -2001,7 +2009,8 @@ static bool main_loop_should_exit(void) qemu_system_reset(VMRESET_REPORT); resume_all_vcpus(); if
Re: [PATCH v13 5/8] add a new qevent: QEVENT_GUEST_PANICKED
Il 28/02/2013 13:13, Hu Tao ha scritto: This event will be emited when the guest is panicked. Signed-off-by: Wen Congyang we...@cn.fujitsu.com --- include/monitor/monitor.h | 1 + monitor.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h index 87fb49c..4006905 100644 --- a/include/monitor/monitor.h +++ b/include/monitor/monitor.h @@ -45,6 +45,7 @@ typedef enum MonitorEvent { QEVENT_WAKEUP, QEVENT_BALLOON_CHANGE, QEVENT_SPICE_MIGRATE_COMPLETED, +QEVENT_GUEST_PANICKED, /* Add to 'monitor_event_names' array in monitor.c when * defining new events here */ diff --git a/monitor.c b/monitor.c index bf974b4..d65218d 100644 --- a/monitor.c +++ b/monitor.c @@ -463,6 +463,7 @@ static const char *monitor_event_names[] = { [QEVENT_WAKEUP] = WAKEUP, [QEVENT_BALLOON_CHANGE] = BALLOON_CHANGE, [QEVENT_SPICE_MIGRATE_COMPLETED] = SPICE_MIGRATE_COMPLETED, +[QEVENT_GUEST_PANICKED] = GUEST_PANICKED, }; QEMU_BUILD_BUG_ON(ARRAY_SIZE(monitor_event_names) != QEVENT_MAX) Reviewed-by: Paolo Bonzini pbonz...@redhat.com Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v13 0/8] pv event interface between host and guest
Il 03/03/2013 10:17, Gleb Natapov ha scritto: On Thu, Feb 28, 2013 at 08:13:10PM +0800, Hu Tao wrote: This series implements a new interface, kvm pv event, to notify host when some events happen in guest. Right now there is one supported event: guest panic. What other event do you have in mind? Is interface generic enough to accommodate future, yet unknown, events. It allows to pass only one integer specifying even type, what if additional info is needed? My be stop pretending that device is generic and make it do once thing but do it well? For generic even passing interface (whatever it may be needed for) much more powerful virtio should be used. On implementation itself I do not understand why is this kvm specific. The only thing that makes it so is that you hook device initialization into guest kvm initialization code, but this is obviously incorrect. What stops QEMU tcg or Xen from reusing the same device for the same purpose except the artificial limitation in a guest. Agreed. Reading data from a random ioports is not how you discover platform devices in 21 century (and the data you read from unassigned port is not guarantied to be zero, it may depend on QEMU version), you use ACPI for that and Marcelo already pointed that to you. Having little knowledge of ACPI (we all do) is not a good reason to not doing it. We probably need to reserve QEMU specific ACPI Plug and Play hardware ID to define our own devices. After that you will be able to create device with _HID(QEMU0001) in DSDT that supplies address information (ioport to use) and capability supported. Please also document this HID in a new file in the QEMU docs/ directory. Guest uses acpi_get_devices() to discover a platform device by its name (QEMU0001). Then you put the driver for the platform device into drivers/platform/x86/ and QEMU/kvm/Xen all will be able to use it. Just to clarify it for Hu Tao, the read from a random ioport is how the ACPI code will detect presence of the device. Something like this should work (in SeaBIOS's src/acpi-dsdt-isa.dsl): Device(PEVT) { Name(_HID, EisaId(QEMU0001)) OperationRegion(PEOR, SystemIO, 0x505, 0x01) Field(PEOR, ByteAcc, NoLock, Preserve) { PEPT, 8, } Method(_STA, 0, NotSerialized) { Store(PEPT, Local0) If (LEqual(Local0, Zero)) { Return (0x00) } Else { Return (0x0F) } } Name(_CRS, ResourceTemplate() { IO(Decode16, 0x505, 0x505, 0x01, 0x01) }) } Please test this with a QEMU option like -M pc-1.4. The device should _not_ be detected if you're doing it right. On QEMU side of things I cannot comment much on how QOMified the device is (it should be), Please make the device target-independent. It can be used on non-x86 architectures that have I/O ports. You should make the port configurable using a property (DEFINE_PROPERTY_INT16 or something like that), with a default of 0x505. All the panicked_action is not necessary in my opinion. We have it for watchdogs, but that's really a legacy thing. Let libvirt do it, and always make the guest panic perform the PANICKED_PAUSE action. If you do it properly, a lot (really a lot) of code will go away. I hope other reviews will verify it, but I noticed that device is only initialized for PIIX, what about Q35? Yup. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v13 0/8] pv event interface between host and guest
On Mon, Mar 04, 2013 at 11:28:05AM +0100, Paolo Bonzini wrote: Il 04/03/2013 11:21, Gleb Natapov ha scritto: Just to clarify it for Hu Tao, the read from a random ioport is how the ACPI code will detect presence of the device. Actually no (at least in the long run, for the first version it may be OK). Agreed. Since we want to move DSDT generation into QEMU if device will not be present QEMU will not generate corresponded Device() in DSDT, or it will generate it with _STA() { Return (0x00)} hard coded. Yes, this would be good. Seabios can do the same if we will pass it info about device presence via fw_cfg. True, but I don't like this a lot. I don't like splitting decisions between SeaBIOS and the DSDT, you end up touching code all over the place and writing ASL is simpler than patching---even with all the machinery that we have. That's the main argument in favor of moving DSDT into QEMU regardless of this patch series, but as long as we have it in Seabios, have infrastructure for patching and use it for many things already I do not see why avoiding it. It is also simpler to move ASL from SeaBIOS to OVMF and/or viceversa. I don't recall what was the opposition to a fw_cfg driver directly in the DSDT, but I think this would be a good usage for it. Basically fw_cfg was not designed with this in mind. It was really meant to be simple interface for FW running one one CPU to use. You probably may do locking with AML too to guaranty atomic access, but things get complicated. Also may option that was added lately use file interface (since this is what Kevin prefers) and manipulating strings in AML is probably not what we want. Splitting it between QEMU and DSDT is a bit better, since you have to touch QEMU anyway to implement the feature. Anyhow, this does not apply to the next submission of this series. I think we can agree to the compromise of using ACPI but still read the port in _STA. If you want to make ioport configurable I do not see how can we avoid patching. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v13 0/8] pv event interface between host and guest
On Mon, Mar 04, 2013 at 11:49:07AM +0100, Paolo Bonzini wrote: Il 04/03/2013 11:43, Gleb Natapov ha scritto: Anyhow, this does not apply to the next submission of this series. I think we can agree to the compromise of using ACPI but still read the port in _STA. If you want to make ioport configurable I do not see how can we avoid patching. I want to make the ioport configurable in the device, but the PIIX and ICH9 (which are what the DSDT is written for) will always use port 0x505. But the device is not part of PIIX or ICH9. It is additional device that may or may not be present depending on a command line. So what if someone configures debugcon or debugexit to use this port? We can always blame the users, but I fill that we are making unnecessary compromises. You can configure a different iobase for your serial ports, the guest can still use them but not discover them via ACPI. This is the same thing. Probably we should patch DSDT too when it will be in QEMU :) of force iobase to spec values if device is used as part of a chipset. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [PATCH v7 07/11] hw/arm_gic: Convert ARM GIC classes to use init/realize
Am 26.02.2013 18:40, schrieb Peter Maydell: Convert the ARM GIC classes to use init/realize rather than SysBusDevice::init. (We have to do them all in one patch to avoid unconverted subclasses calling a nonexistent SysBusDevice init function in the base class and crashing.) Signed-off-by: Peter Maydell peter.mayd...@linaro.org --- hw/arm_gic.c | 23 +-- hw/arm_gic_common.c | 26 +++--- hw/arm_gic_internal.h |2 +- hw/armv7m_nvic.c | 15 --- 4 files changed, 37 insertions(+), 29 deletions(-) diff --git a/hw/arm_gic.c b/hw/arm_gic.c index 90e43d0..250e720 100644 --- a/hw/arm_gic.c +++ b/hw/arm_gic.c @@ -659,14 +659,18 @@ void gic_init_irqs_and_distributor(GICState *s, int num_irq) memory_region_init_io(s-iomem, gic_dist_ops, s, gic_dist, 0x1000); } -static int arm_gic_init(SysBusDevice *dev) +static void arm_gic_realize(DeviceState *dev, Error **errp) { -/* Device instance init function for the GIC sysbus device */ +/* Device instance realize function for the GIC sysbus device */ int i; -GICState *s = FROM_SYSBUS(GICState, dev); +GICState *s = ARM_GIC(dev); +SysBusDevice *sbd = SYS_BUS_DEVICE(dev); ARMGICClass *agc = ARM_GIC_GET_CLASS(s); -agc-parent_init(dev); +agc-parent_realize(dev, errp); +if (error_is_set(errp)) { +return; +} gic_init_irqs_and_distributor(s, s-num_irq); @@ -686,22 +690,21 @@ static int arm_gic_init(SysBusDevice *dev) gic_cpu, 0x100); } /* Distributor */ -sysbus_init_mmio(dev, s-iomem); +sysbus_init_mmio(sbd, s-iomem); /* cpu interfaces (one for current cpu plus one per cpu) */ for (i = 0; i = NUM_CPU(s); i++) { -sysbus_init_mmio(dev, s-cpuiomem[i]); +sysbus_init_mmio(sbd, s-cpuiomem[i]); } -return 0; } static void arm_gic_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); -SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass); ARMGICClass *agc = ARM_GIC_CLASS(klass); -agc-parent_init = sbc-init; -sbc-init = arm_gic_init; + dc-no_user = 1; +agc-parent_realize = dc-realize; +dc-realize = arm_gic_realize; } static const TypeInfo arm_gic_info = { diff --git a/hw/arm_gic_common.c b/hw/arm_gic_common.c index 2947622..3b2955c 100644 --- a/hw/arm_gic_common.c +++ b/hw/arm_gic_common.c @@ -104,31 +104,35 @@ static int gic_load(QEMUFile *f, void *opaque, int version_id) return 0; } -static int arm_gic_common_init(SysBusDevice *dev) +static void arm_gic_common_realize(DeviceState *dev, Error **errp) { -GICState *s = FROM_SYSBUS(GICState, dev); +GICState *s = ARM_GIC_COMMON(dev); int num_irq = s-num_irq; if (s-num_cpu NCPU) { -hw_error(requested %u CPUs exceeds GIC maximum %d\n, - s-num_cpu, NCPU); +error_setg(errp, requested %u CPUs exceeds GIC maximum %d\n, Please drop \n for error_setg(). Probably would be worth adding to a convert-to-realize section on the Wiki. + s-num_cpu, NCPU); +return; } s-num_irq += GIC_BASE_IRQ; if (s-num_irq GIC_MAXIRQ) { -hw_error(requested %u interrupt lines exceeds GIC maximum %d\n, - num_irq, GIC_MAXIRQ); +error_setg(errp, + requested %u interrupt lines exceeds GIC maximum %d\n, + num_irq, GIC_MAXIRQ); +return; } /* ITLinesNumber is represented as (N / 32) - 1 (see * gic_dist_readb) so this is an implementation imposed * restriction, not an architectural one: */ if (s-num_irq 32 || (s-num_irq % 32)) { -hw_error(%d interrupt lines unsupported: not divisible by 32\n, - num_irq); +error_setg(errp, + %d interrupt lines unsupported: not divisible by 32\n, + num_irq); +return; } register_savevm(NULL, arm_gic, -1, 3, gic_save, gic_load, s); -return 0; } static void arm_gic_common_reset(DeviceState *dev) @@ -173,12 +177,12 @@ static Property arm_gic_common_properties[] = { static void arm_gic_common_class_init(ObjectClass *klass, void *data) { -SysBusDeviceClass *sc = SYS_BUS_DEVICE_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); + dc-reset = arm_gic_common_reset; +dc-realize = arm_gic_common_realize; dc-props = arm_gic_common_properties; dc-no_user = 1; -sc-init = arm_gic_common_init; } static const TypeInfo arm_gic_common_type = { diff --git a/hw/arm_gic_internal.h b/hw/arm_gic_internal.h index 3640be0..3ba37f3 100644 --- a/hw/arm_gic_internal.h +++ b/hw/arm_gic_internal.h @@ -132,7 +132,7 @@ typedef struct ARMGICCommonClass { typedef struct ARMGICClass {
Re: [Qemu-devel] [PATCH v7 03/11] target-arm: Drop CPUARMState* argument from bank_number()
Am 26.02.2013 18:40, schrieb Peter Maydell: Drop the CPUARMState* argument from bank_number(), since we only use it for passing to cpu_abort(). Use hw_error() instead. This avoids propagating further interfaces using env pointers. In the long term this function's callers need auditing to fix problems where badly behaved guests can pass invalid bank numbers. Signed-off-by: Peter Maydell peter.mayd...@linaro.org Reviewed-by: Andreas Färber afaer...@suse.de Thanks, Andreas -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v13 0/8] pv event interface between host and guest
Il 04/03/2013 11:59, Gleb Natapov ha scritto: I want to make the ioport configurable in the device, but the PIIX and ICH9 (which are what the DSDT is written for) will always use port 0x505. But the device is not part of PIIX or ICH9. So is kvmclock, or kvmvapic. I think it makes sense to add this device to PIIX or ICH9 since it is an ISA device. It is additional device that may or may not be present depending on a command line. So what if someone configures debugcon or debugexit to use this port? I haven't checked if debug{con,exit} will pass the _STA test, but if they do, the user will get a Ctrl-A or respectively an exit of QEMU when the guest panics. What if someone configures debugcon on port 0x3f8? Some guest will use it, some will not. We can always blame the users, but I fill that we are making unnecessary compromises. Once we choose an ISA device, where the user has full control of the address space, we already know we'll have to accept compromises. I don't think this compromise is particularly bad: do discovery via ACPI (nice), accept that the user can trick the AML (ugly). Paolo You can configure a different iobase for your serial ports, the guest can still use them but not discover them via ACPI. This is the same thing. Probably we should patch DSDT too when it will be in QEMU :) of force iobase to spec values if device is used as part of a chipset. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v13 0/8] pv event interface between host and guest
On Mon, Mar 04, 2013 at 11:05:37AM +0100, Paolo Bonzini wrote: Guest uses acpi_get_devices() to discover a platform device by its name (QEMU0001). Then you put the driver for the platform device into drivers/platform/x86/ and QEMU/kvm/Xen all will be able to use it. Just to clarify it for Hu Tao, the read from a random ioport is how the ACPI code will detect presence of the device. Actually no (at least in the long run, for the first version it may be OK). Since we want to move DSDT generation into QEMU if device will not be present QEMU will not generate corresponded Device() in DSDT, or it will generate it with _STA() { Return (0x00)} hard coded. Seabios can do the same if we will pass it info about device presence via fw_cfg. Not sure Kevin will like it now when we plan to move DSDT into QEMU anyway :) Something like this should work (in SeaBIOS's src/acpi-dsdt-isa.dsl): Device(PEVT) { Name(_HID, EisaId(QEMU0001)) OperationRegion(PEOR, SystemIO, 0x505, 0x01) Field(PEOR, ByteAcc, NoLock, Preserve) { PEPT, 8, } Method(_STA, 0, NotSerialized) { Store(PEPT, Local0) If (LEqual(Local0, Zero)) { Return (0x00) } Else { Return (0x0F) } } Name(_CRS, ResourceTemplate() { IO(Decode16, 0x505, 0x505, 0x01, 0x01) }) } Please test this with a QEMU option like -M pc-1.4. The device should _not_ be detected if you're doing it right. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v13 0/8] pv event interface between host and guest
On Mon, Mar 04, 2013 at 12:10:58PM +0100, Paolo Bonzini wrote: Il 04/03/2013 11:59, Gleb Natapov ha scritto: I want to make the ioport configurable in the device, but the PIIX and ICH9 (which are what the DSDT is written for) will always use port 0x505. But the device is not part of PIIX or ICH9. So is kvmclock, or kvmvapic. I think it makes sense to add this device to PIIX or ICH9 since it is an ISA device. Those are CPU interfaces, not chipset. fw_cfg or our PIIX ACPI additions would be better examples, but since they are always present and non configurable they are in a different category. It is additional device that may or may not be present depending on a command line. So what if someone configures debugcon or debugexit to use this port? I haven't checked if debug{con,exit} will pass the _STA test, but if they do, the user will get a Ctrl-A or respectively an exit of QEMU when the guest panics. What if someone configures debugcon on port 0x3f8? Some guest will use it, some will not. Qemu should fail to start since conflict will be detected during initialization. We can always blame the users, but I fill that we are making unnecessary compromises. Once we choose an ISA device, where the user has full control of the address space, we already know we'll have to accept compromises. I don't think this compromise is particularly bad: do discovery via ACPI (nice), accept that the user can trick the AML (ugly). Why would have we accept compromises, we may, but I disagree that it is necessary? If user configures conflicting ports QEMU will detect it during init, if configuration is correct DSDT should provide enough information for guests to use configured devices. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v13 0/8] pv event interface between host and guest
Il 04/03/2013 12:20, Gleb Natapov ha scritto: On Mon, Mar 04, 2013 at 12:10:58PM +0100, Paolo Bonzini wrote: It is additional device that may or may not be present depending on a command line. So what if someone configures debugcon or debugexit to use this port? I haven't checked if debug{con,exit} will pass the _STA test, but if they do, the user will get a Ctrl-A or respectively an exit of QEMU when the guest panics. What if someone configures debugcon on port 0x3f8? Some guest will use it, some will not. Qemu should fail to start since conflict will be detected during initialization. Not if you _remove_ the serial port and place debugcon at 0x3f8. Same here, you can remove the panic event port and add debugcon at 0x505. That's the problematic case. But if the user goes to that length, I think we can honestly say we don't care. Paolo We can always blame the users, but I fill that we are making unnecessary compromises. Once we choose an ISA device, where the user has full control of the address space, we already know we'll have to accept compromises. I don't think this compromise is particularly bad: do discovery via ACPI (nice), accept that the user can trick the AML (ugly). Why would have we accept compromises, we may, but I disagree that it is necessary? If user configures conflicting ports QEMU will detect it during init, if configuration is correct DSDT should provide enough information for guests to use configured devices. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [PATCH v7 07/11] hw/arm_gic: Convert ARM GIC classes to use init/realize
On 4 March 2013 19:10, Andreas Färber afaer...@suse.de wrote: Am 26.02.2013 18:40, schrieb Peter Maydell: if (s-num_cpu NCPU) { -hw_error(requested %u CPUs exceeds GIC maximum %d\n, - s-num_cpu, NCPU); +error_setg(errp, requested %u CPUs exceeds GIC maximum %d\n, Please drop \n for error_setg(). Probably would be worth adding to a convert-to-realize section on the Wiki. Doh. That's such a trivial change I intend to just make it in passing when I put these changes into target-arm.next rather than sending out an entire fresh round of patches, unless you object. Otherwise looks fine, thanks. Should I mark such a fixed-up patch with your reviewed-by tag? thanks -- PMM -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [PATCH 00/12] KVM Support for MIPS32 Processors
Hello, Am 02.03.2013 16:18, schrieb Sanjay Lal: The following patchset implements KVM support for MIPS32 processors, using Trap Emulate, with basic runtime binary translation to improve performance. [snip] Please see http://wiki.qemu.org/Contribute/SubmitAPatch for some hints on how to improve submission of your QEMU patchset. In particular we require Signed-off-bys just like Linux, subjects should use target-mips: or similar based on file/directory names, subject line should be one short statement and commit message should give further explanations of what the patch is doing and why, where appropriate. Also a fair warning: I am refactoring the core CPU code, so you should be tracking qemu.git and/or mailing list for possible conflicts and rebasing necessary. In that context please prefer use of MIPSCPU over CPUMIPSState (e.g., in GIC state and functions). Please adopt our Coding Style, which among other things asks for CamelCase struct naming (e.g., MIPSGICState rather than gic_t). Please learn about QOM usage and its conventions. Your GIC should probably be a SysBusDevice, not a pre-qdev collection of manually allocated state. http://wiki.qemu.org/QOMConventions There's also an ongoing discussion about DPRINTF()s defined as no-op do {} while(0) leading to format string breakages over time. Recommended replacement is a macro using do { if (FOO) { ... } } while (0), with FOO evaluating to 0 in the no-debug case, so that everything gets compile-tested but optimized out. Regards, Andreas -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v13 0/8] pv event interface between host and guest
Il 04/03/2013 12:52, Gleb Natapov ha scritto: Same here, you can remove the panic event port and add debugcon at 0x505. That's the problematic case. But if the user goes to that length, I think we can honestly say we don't care. IMO there is a big difference between well know serial ISA ports and PIO ports we allocate for our devices. Later have to be discoverable without resorting to probing. On CPU level we do the same with CPUID bits instead of relaying on MSRs #GP. On KVM API level we do the same with capabilities instead of relying on ioctls returning errors. This is not different. Ok, I see your point now. Yes, this is a good reason why patching is better in the long run. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: nVMX: Fix setting of CR0 and CR4 in guest mode
On Thu, Feb 28, 2013 at 10:44:47AM +0100, Jan Kiszka wrote: The logic for calculating the value with which we call kvm_set_cr0/4 was broken (will definitely be visible with nested unrestricted guest mode support). Also, we performed the check regarding CR0_ALWAYSON too early when in guest mode. What really needs to be done on both CR0 and CR4 is to mask out L1-owned bits and merge them in from GUEST_CR0/4. In contrast, arch.cr0/4 and arch.cr0/4_guest_owned_bits contain the mangled L0+L1 state and, thus, are not suited as input. For both CRs, we can then apply the check against VMXON_CRx_ALWAYSON and refuse the update if it fails. To be fully consistent, we implement this check now also for CR4. Finally, we have to set the shadow to the value L2 wanted to write originally. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- Found while making unrestricted guest mode working. Not sure what impact the bugs had on current feature level, if any. For interested folks, I've pushed my nEPT environment here: git://git.kiszka.org/linux-kvm.git nept-hacking arch/x86/kvm/vmx.c | 49 ++--- 1 files changed, 30 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7cc566b..d1dac08 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4605,37 +4605,48 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) { - if (to_vmx(vcpu)-nested.vmxon - ((val VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) - return 1; - if (is_guest_mode(vcpu)) { - /* - * We get here when L2 changed cr0 in a way that did not change - * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), - * but did change L0 shadowed bits. This can currently happen - * with the TS bit: L0 may want to leave TS on (for lazy fpu - * loading) while pretending to allow the guest to change it. - */ Can't say I understand this patch yet, but it looks like the comment is still valid. Why have you removed it? - if (kvm_set_cr0(vcpu, (val vcpu-arch.cr0_guest_owned_bits) | - (vcpu-arch.cr0 ~vcpu-arch.cr0_guest_owned_bits))) + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned long orig_val = val; + + val = (val ~vmcs12-cr0_guest_host_mask) | + (vmcs_read64(GUEST_CR0) vmcs12-cr0_guest_host_mask); + if ((val VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) + return 1; + + if (kvm_set_cr0(vcpu, val)) return 1; - vmcs_writel(CR0_READ_SHADOW, val); + vmcs_writel(CR0_READ_SHADOW, orig_val); return 0; - } else + } else { + if (to_vmx(vcpu)-nested.vmxon + ((val VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) + return 1; return kvm_set_cr0(vcpu, val); + } } static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) { if (is_guest_mode(vcpu)) { - if (kvm_set_cr4(vcpu, (val vcpu-arch.cr4_guest_owned_bits) | - (vcpu-arch.cr4 ~vcpu-arch.cr4_guest_owned_bits))) + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned long orig_val = val; + + val = (val ~vmcs12-cr4_guest_host_mask) | + (vmcs_readl(GUEST_CR4) vmcs12-cr4_guest_host_mask); + if ((val VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON) + return 1; + + if (kvm_set_cr4(vcpu, val)) return 1; - vmcs_writel(CR4_READ_SHADOW, val); + vmcs_writel(CR4_READ_SHADOW, orig_val); return 0; - } else + } else { + if (to_vmx(vcpu)-nested.vmxon + ((val VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) + return 1; return kvm_set_cr4(vcpu, val); + } } /* called to set cr0 as approriate for clts instruction exit. */ -- 1.7.3.4 -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: nVMX: Fix content of MSR_IA32_VMX_ENTRY/EXIT_CTLS
Il 03/03/2013 13:04, Jan Kiszka ha scritto: From: Jan Kiszka jan.kis...@siemens.com Properly set those bits to 1 that the spec demands in case bit 55 of VMX_BASIC is 0 - like in our case. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- arch/x86/kvm/vmx.c | 13 ++--- 1 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 631cdb3..c204f0d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2050,21 +2050,28 @@ static __init void nested_vmx_setup_ctls_msrs(void) PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; - /* exit controls */ - nested_vmx_exit_ctls_low = 0; + /* + * Exit controls + * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and + * 17 must be 1. + */ + nested_vmx_exit_ctls_low = 0x36dff; /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ #ifdef CONFIG_X86_64 nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE; #else nested_vmx_exit_ctls_high = 0; #endif + nested_vmx_exit_ctls_high |= 0x36dff; Can you use nested_vmx_exit_ctls_low on the RHS, or define a constant? /* entry controls */ rdmsr(MSR_IA32_VMX_ENTRY_CTLS, nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); - nested_vmx_entry_ctls_low = 0; + /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ + nested_vmx_entry_ctls_low = 0x11ff; nested_vmx_entry_ctls_high = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; + nested_vmx_entry_ctls_high |= 0x11ff; Same here. Paolo /* cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM: x86: Racy mp_state manipulations
Il 03/03/2013 17:48, Jan Kiszka ha scritto: Hi all, KVM's mp_state on x86 is usually manipulated over the context of the VCPU. Therefore, no locking is required. There are unfortunately two exceptions, and one of them is definitely broken: INIT and SIPI delivery. The lapic may set mp_state over the context of the sending VCPU. For SIPI, it first checks if the mp_state is INIT_RECEIVED before updating it to SIPI_RECEIVED. We can only race here with user space setting the state in parallel, I suppose. Probably harmless in practice. Still it would be better to add an smp_wmb/smp_rmb pair between accesses of mp_state and sipi_vector. Also, Io What is critical is the update on INIT. That signal is asynchronous to the target VCPU state. And we can loose it: vcpu 1vcpu 2 ---- hlt; vmexit __apic_accept_irq(APIC_DM_INIT) mp_state = KVM_MP_STATE_INIT_RECEIVED mp_state = KVM_MP_STATE_HALTED And there it goes, our INIT state. I've triggered this under heavy INIT load and my nVMX patch for processing it while in VMXON. I'm currently considering options to fix this: - through a lock at mp_state manipulations, check under the lock that we don't perform invalid state transitions (e.g. INIT-HLT) - signal the INIT via some KVM_REQ_INIT to the target VCPU, fully localizing mp_state updates, the same could be done with SIPI, just to play safe I'm leaning toward the latter ATM, Any thoughts or other idea? The latter makes sense since it's not a fast path, but the only transition that is acceptable to KVM_MP_STATE_HALTED is from KVM_MP_STATE_RUNNABLE: from \ toRUNNABLE UNINIT INIT HALTED SIPI RUNNABLE n/a yesyes yes NO UNINIT NO n/ayes NO NO INIT NO yesn/a NO yes HALTED yes yesyes n/a NO SIPI yes yesyes NO n/a so for this particular bug it should also work to use a cmpxchg when setting KVM_MP_STATE_HALTED. Same for INIT-SIPI, since writes to sipi_vector are harmless. BTW, what happens when you send an INIT IPI to the bootstrap processor? This may be interesting if we want to emulate soft resets correctly in QEMU; KVM makes it go to wait-for-SIPI state if I read the code correctly, but that is wrong. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: x86: Convert INIT and SIPI signals into synchronously handled requests
Il 03/03/2013 21:21, Jan Kiszka ha scritto: From: Jan Kiszka jan.kis...@siemens.com A VCPU sending INIT or SIPI to some other VCPU races for setting the remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED was overwritten by kvm_emulate_halt and, thus, got lost. Fix this by raising requests on the sender side that will then be handled synchronously over the target VCPU context. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- Turned out to be simpler than expected. I'm no longer able to reproduce the race I saw before. arch/x86/kvm/lapic.c |9 - arch/x86/kvm/x86.c | 16 +++- include/linux/kvm_host.h |2 ++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 02b51dd..be1e37a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_INIT: if (!trig_mode || level) { result = 1; - vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; - kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_make_request(KVM_REQ_INIT, vcpu); kvm_vcpu_kick(vcpu); } else { apic_debug(Ignoring de-assert INIT to vcpu %d\n, @@ -743,11 +742,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_STARTUP: apic_debug(SIPI to vcpu %d vector 0x%02x\n, vcpu-vcpu_id, vector); - if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED || + test_bit(KVM_REQ_INIT, vcpu-requests)) { result = 1; vcpu-arch.sipi_vector = vector; - vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; - kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_make_request(KVM_REQ_SIPI, vcpu); kvm_vcpu_kick(vcpu); } break; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d0cf737..8c8843c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5641,6 +5641,18 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu) kvm_x86_ops-load_eoi_exitmap(vcpu, eoi_exit_bitmap); } +static bool kvm_check_init_and_sipi(struct kvm_vcpu *vcpu) +{ + if (kvm_check_request(KVM_REQ_INIT, vcpu)) + vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + if (kvm_check_request(KVM_REQ_SIPI, vcpu) + vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; Do you need KVM_MP_STATE_SIPI_RECEIVED at all anymore? Perhaps you can call kvm_check_init_and_sipi from __vcpu_run, before the call to kvm_vcpu_block (and move the reset from __vcpu_run to kvm_check_init_and_sipi too)? Then you do not even need to touch kvm_arch_vcpu_runnable. + return true; + } + return false; +} + static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; @@ -5649,6 +5661,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = 0; if (vcpu-requests) { + kvm_check_init_and_sipi(vcpu); Does this need to return 1 if kvm_check_init_and_sipi returns 1? Otherwise the guest is entered in INIT state. I think. Paolo if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) kvm_mmu_unload(vcpu); if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) @@ -6977,10 +6990,11 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { + if (kvm_check_init_and_sipi(vcpu)) + return 1; return (vcpu-arch.mp_state == KVM_MP_STATE_RUNNABLE !vcpu-arch.apf.halted) || !list_empty_careful(vcpu-async_pf.done) - || vcpu-arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED || atomic_read(vcpu-arch.nmi_queued) || (kvm_arch_interrupt_allowed(vcpu) kvm_cpu_has_interrupt(vcpu)); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 722cae7..1a191c9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -124,6 +124,8 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_MCLOCK_INPROGRESS 20 #define KVM_REQ_EPR_EXIT 21 #define KVM_REQ_EOIBITMAP 22 +#define KVM_REQ_INIT 23 +#define KVM_REQ_SIPI 24 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to
Re: KVM: x86: Racy mp_state manipulations
On 2013-03-04 15:12, Paolo Bonzini wrote: Il 03/03/2013 17:48, Jan Kiszka ha scritto: Hi all, KVM's mp_state on x86 is usually manipulated over the context of the VCPU. Therefore, no locking is required. There are unfortunately two exceptions, and one of them is definitely broken: INIT and SIPI delivery. The lapic may set mp_state over the context of the sending VCPU. For SIPI, it first checks if the mp_state is INIT_RECEIVED before updating it to SIPI_RECEIVED. We can only race here with user space setting the state in parallel, I suppose. Probably harmless in practice. Still it would be better to add an smp_wmb/smp_rmb pair between accesses of mp_state and sipi_vector. Do we need a mb between sipi_vector assignment and kvm_make_request (see my patch to fix this issue)? Also, Io What is critical is the update on INIT. That signal is asynchronous to the target VCPU state. And we can loose it: vcpu 1 vcpu 2 -- -- hlt; vmexit __apic_accept_irq(APIC_DM_INIT) mp_state = KVM_MP_STATE_INIT_RECEIVED mp_state = KVM_MP_STATE_HALTED And there it goes, our INIT state. I've triggered this under heavy INIT load and my nVMX patch for processing it while in VMXON. I'm currently considering options to fix this: - through a lock at mp_state manipulations, check under the lock that we don't perform invalid state transitions (e.g. INIT-HLT) - signal the INIT via some KVM_REQ_INIT to the target VCPU, fully localizing mp_state updates, the same could be done with SIPI, just to play safe I'm leaning toward the latter ATM, Any thoughts or other idea? The latter makes sense since it's not a fast path, but the only transition that is acceptable to KVM_MP_STATE_HALTED is from KVM_MP_STATE_RUNNABLE: from \ toRUNNABLE UNINIT INIT HALTED SIPI RUNNABLE n/a yesyes yes NO UNINIT NO n/ayes NO NO INIT NO yesn/a NO yes HALTED yes yesyes n/a NO SIPI yes yesyes NO n/a so for this particular bug it should also work to use a cmpxchg when setting KVM_MP_STATE_HALTED. Same for INIT-SIPI, since writes to sipi_vector are harmless. OK, but I already went for request bits. :) BTW, what happens when you send an INIT IPI to the bootstrap processor? This may be interesting if we want to emulate soft resets correctly in QEMU; KVM makes it go to wait-for-SIPI state if I read the code correctly, but that is wrong. Where is this restriction specified? How do you reset the BP without resetting the while system then? Jan -- Siemens AG, Corporate Technology, CT RTC ITP SDP-DE Corporate Competence Center Embedded Linux -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: x86: Convert INIT and SIPI signals into synchronously handled requests
On 2013-03-04 15:28, Paolo Bonzini wrote: Il 03/03/2013 21:21, Jan Kiszka ha scritto: From: Jan Kiszka jan.kis...@siemens.com A VCPU sending INIT or SIPI to some other VCPU races for setting the remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED was overwritten by kvm_emulate_halt and, thus, got lost. Fix this by raising requests on the sender side that will then be handled synchronously over the target VCPU context. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- Turned out to be simpler than expected. I'm no longer able to reproduce the race I saw before. arch/x86/kvm/lapic.c |9 - arch/x86/kvm/x86.c | 16 +++- include/linux/kvm_host.h |2 ++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 02b51dd..be1e37a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_INIT: if (!trig_mode || level) { result = 1; -vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; -kvm_make_request(KVM_REQ_EVENT, vcpu); +kvm_make_request(KVM_REQ_INIT, vcpu); kvm_vcpu_kick(vcpu); } else { apic_debug(Ignoring de-assert INIT to vcpu %d\n, @@ -743,11 +742,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_STARTUP: apic_debug(SIPI to vcpu %d vector 0x%02x\n, vcpu-vcpu_id, vector); -if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { +if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED || +test_bit(KVM_REQ_INIT, vcpu-requests)) { result = 1; vcpu-arch.sipi_vector = vector; -vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; -kvm_make_request(KVM_REQ_EVENT, vcpu); +kvm_make_request(KVM_REQ_SIPI, vcpu); kvm_vcpu_kick(vcpu); } break; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d0cf737..8c8843c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5641,6 +5641,18 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu) kvm_x86_ops-load_eoi_exitmap(vcpu, eoi_exit_bitmap); } +static bool kvm_check_init_and_sipi(struct kvm_vcpu *vcpu) +{ +if (kvm_check_request(KVM_REQ_INIT, vcpu)) +vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; +if (kvm_check_request(KVM_REQ_SIPI, vcpu) +vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { +vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; Do you need KVM_MP_STATE_SIPI_RECEIVED at all anymore? Perhaps you can call kvm_check_init_and_sipi from __vcpu_run, before the call to kvm_vcpu_block (and move the reset from __vcpu_run to kvm_check_init_and_sipi too)? Then you do not even need to touch kvm_arch_vcpu_runnable. Haven't thought about this in details yet as I first wanted to fix within the existing infrastructure. But maybe the change below requires more refactoring anyway. Let's see. +return true; +} +return false; +} + static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; @@ -5649,6 +5661,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = 0; if (vcpu-requests) { +kvm_check_init_and_sipi(vcpu); Does this need to return 1 if kvm_check_init_and_sipi returns 1? Otherwise the guest is entered in INIT state. I think. Hmm, true... Need to refactor things a bit more as kvm_check_init_and_sipi is designed to return true only for wait-on-sipi-runnable transition. Thanks, Jan -- Siemens AG, Corporate Technology, CT RTC ITP SDP-DE Corporate Competence Center Embedded Linux -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: nVMX: Fix setting of CR0 and CR4 in guest mode
On Mon, Mar 04, 2013, Jan Kiszka wrote about Re: [PATCH] KVM: nVMX: Fix setting of CR0 and CR4 in guest mode: if (is_guest_mode(vcpu)) { -/* - * We get here when L2 changed cr0 in a way that did not change - * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), - * but did change L0 shadowed bits. This can currently happen - * with the TS bit: L0 may want to leave TS on (for lazy fpu - * loading) while pretending to allow the guest to change it. - */ Can't say I understand this patch yet, but it looks like the comment is still valid. Why have you removed it? L0 allows L1 or L2 at most to own TS, the rest is host-owned. I think the comment was always misleading. I do not see how it is misleading. For everything but TS we will not get here (if L1 is kvm). For TS we will get here if L1 allows L2 to change it, but L0 does not. For everything *but guest-owned* we will get here, thus for most CR0 accesses (bit-wise, not regarding frequency). For most CR0 bits, L1 (at least, a KVM one) will shadow (trap) them, so we won't get to this point you modified at all... Instead, nested_vmx_exit_handled_cr() would notice that a shadowed-by-L1 bit was modified so an exit to L1 is required. We only get to that code you changed if a bit was modified that L1 did *not* want to trap, but L0 did. This is definitely not the bit-wise majority of the cases - unless you have an L1 that does not trap most of the CR0 bits. But I'm more worried about the actual code change :-) I didn't understand if there's a situation where the existing code did something wrong, or why it was wrong. Did you check the lazy-FPU-loading (TS bit) aspect of your new code? To effectively check this, what I had to do is to run on all of L0, L1, and L2, long runs of parallel make (make -j3) - concurrently. Even code which doesn't do floating-point calculations uses the FPU sometimes for its wide registers, so all these processes, guests and guest's guests, compete for the FPU, exercising very well this code path. If the TS bit is handled wrongly, some of these make processes will die, when one of the compilations dies of SIGSEGV (forgetting to set the FPU registers leads to some uninitialized pointers being used), so it's quite easy to exercise this. -- Nadav Har'El| Monday, Mar 4 2013, 22 Adar 5773 n...@math.technion.ac.il |- Phone +972-523-790466, ICQ 13349191 |A witty saying proves nothing. -- http://nadav.harel.org.il |Voltaire -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: nVMX: Fix content of MSR_IA32_VMX_ENTRY/EXIT_CTLS
On Sun, Mar 03, 2013, Jan Kiszka wrote about [PATCH] KVM: nVMX: Fix content of MSR_IA32_VMX_ENTRY/EXIT_CTLS: /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ #ifdef CONFIG_X86_64 nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE; #else nested_vmx_exit_ctls_high = 0; #endif + nested_vmx_exit_ctls_high |= 0x36dff; Can you please compose this 0x36dff out of constants? Is VM_EXIT_HOST_ADDR_SPACE_SIZE one of them? It's important to verify that we actually support all these bits - even if we *should* support them, it doesn't mean we actually do (but if we do, we should say we do). - nested_vmx_entry_ctls_low = 0; + /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ + nested_vmx_entry_ctls_low = 0x11ff; Setting nested_vmx_entry_ctls_low = 0 just means that although the spec says only 1 setting is supported, we *also* support 0 setting. I'm not sure why this is a bad thing. Our VMX will be even better than the real processors' ;-) -- Nadav Har'El| Monday, Mar 4 2013, 22 Adar 5773 n...@math.technion.ac.il |- Phone +972-523-790466, ICQ 13349191 |My opinions may have changed, but not the http://nadav.harel.org.il |fact that I am right. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2] KVM: nVMX: Fix content of MSR_IA32_VMX_ENTRY/EXIT_CTLS
Properly set those bits to 1 that the spec demands in case bit 55 of VMX_BASIC is 0 - like in our case. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- Changes in v2: - use symbolic constants arch/x86/include/asm/vmx.h |4 arch/x86/kvm/vmx.c | 13 ++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index f6b599b..a0c2ad2 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -169,6 +169,8 @@ #define VM_EXIT_LOAD_IA32_EFER 0x0020 #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x0040 +#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff + #define VM_ENTRY_LOAD_DEBUG_CONTROLS0x0002 #define VM_ENTRY_IA32E_MODE 0x0200 #define VM_ENTRY_SMM0x0400 @@ -177,6 +179,8 @@ #define VM_ENTRY_LOAD_IA32_PAT 0x4000 #define VM_ENTRY_LOAD_IA32_EFER 0x8000 +#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x11ff + #define VMX_MISC_SAVE_EFER_LMA 0x0020 /* VMCS Encodings */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 631cdb3..1f917de 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2050,21 +2050,28 @@ static __init void nested_vmx_setup_ctls_msrs(void) PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; - /* exit controls */ - nested_vmx_exit_ctls_low = 0; + /* +* Exit controls +* If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and +* 17 must be 1. +*/ + nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ #ifdef CONFIG_X86_64 nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE; #else nested_vmx_exit_ctls_high = 0; #endif + nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; /* entry controls */ rdmsr(MSR_IA32_VMX_ENTRY_CTLS, nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); - nested_vmx_entry_ctls_low = 0; + /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ + nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; nested_vmx_entry_ctls_high = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; + nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; /* cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, -- 1.7.3.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] KVM: nVMX: Fix content of MSR_IA32_VMX_ENTRY/EXIT_CTLS
Il 04/03/2013 17:15, Jan Kiszka ha scritto: Properly set those bits to 1 that the spec demands in case bit 55 of VMX_BASIC is 0 - like in our case. Signed-off-by: Jan Kiszka jan.kis...@siemens.com Reviewed-by: Paolo Bonzini pbonz...@redhat.com --- Changes in v2: - use symbolic constants arch/x86/include/asm/vmx.h |4 arch/x86/kvm/vmx.c | 13 ++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index f6b599b..a0c2ad2 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -169,6 +169,8 @@ #define VM_EXIT_LOAD_IA32_EFER 0x0020 #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x0040 +#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR0x00036dff + #define VM_ENTRY_LOAD_DEBUG_CONTROLS0x0002 #define VM_ENTRY_IA32E_MODE 0x0200 #define VM_ENTRY_SMM0x0400 @@ -177,6 +179,8 @@ #define VM_ENTRY_LOAD_IA32_PAT 0x4000 #define VM_ENTRY_LOAD_IA32_EFER 0x8000 +#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x11ff + #define VMX_MISC_SAVE_EFER_LMA 0x0020 /* VMCS Encodings */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 631cdb3..1f917de 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2050,21 +2050,28 @@ static __init void nested_vmx_setup_ctls_msrs(void) PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; - /* exit controls */ - nested_vmx_exit_ctls_low = 0; + /* + * Exit controls + * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and + * 17 must be 1. + */ + nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ #ifdef CONFIG_X86_64 nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE; #else nested_vmx_exit_ctls_high = 0; #endif + nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; /* entry controls */ rdmsr(MSR_IA32_VMX_ENTRY_CTLS, nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); - nested_vmx_entry_ctls_low = 0; + /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ + nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; nested_vmx_entry_ctls_high = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; + nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; /* cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC 0/2] kvm: Better yield_to candidate using preemption notifiers
This patch series further filters better vcpu candidate to yield to in PLE handler. The main idea is to record the preempted vcpus using preempt notifiers and iterate only those preempted vcpus in the handler. Note that the vcpus which were in spinloop during pause loop exit are already filtered. Thanks Jiannan, Avi for bringing the idea and Gleb, PeterZ for precious suggestions during the discussion. Thanks Srikar for suggesting to avoid rcu lock while checking task state that has improved overcommit cases. There are basically two approches for the implementation. Method 1: Uses per vcpu preempt flag (this series). Method 2: We keep a bitmap of preempted vcpus. using this we can easily iterate over preempted vcpus. Note that method 2 needs an extra index variable to identify/map bitmap to vcpu and it also needs static vcpu allocation. I am also posting Method 2 approach for reference in case it interests. Result: decent improvement for kernbench and ebizzy. base = 3.8.0 + undercommit patches patched = base + preempt patches Tested on 32 core (no HT) mx3850 machine with 32 vcpu guest 8GB RAM --+---+---+---++---+ kernbench (exec time in sec lower is beter) --+---+---+---++---+ base stdev patched stdev %improve --+---+---+---++---+ 1x47.0383 4.6977 44.2584 1.2899 5.90986 2x96.0071 7.1873 91.2605 7.3567 4.94401 3x 164.015710.3613156.675011.4267 4.47561 4x 212.576823.7326204.480013.2908 3.80888 --+---+---+---++---+ no ple kernbench 1x result for reference: 46.056133 --+---+---+---++---+ ebizzy (record/sec higher is better) --+---+---+---++---+ base stdev patched stdev %improve --+---+---+---++---+ 1x 5609.200056.93436263.700064.7097 11.66833 2x 2071.9000 108.48292653.5000 181.8395 28.07085 3x 1557.4167 109.71411993.5000 166.3176 28.00043 4x 1254.750091.29971765.5000 237.5410 40.70532 --+---+---+---++---+ no ple ebizzy 1x result for reference : 7394.9 rec/sec Please let me know if you have any suggestions and comments. Raghavendra K T (2): kvm: Record the preemption status of vcpus using preempt notifiers kvm: Iterate over only vcpus that are preempted include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 7 +++ 2 files changed, 8 insertions(+) Reference patch for Method 2 ---8--- Use preempt bitmap and optimize vcpu iteration using preempt notifiers From: Raghavendra K T raghavendra...@linux.vnet.ibm.com Record the preempted vcpus in a bit map using preempt notifiers. Add the logic of iterating over only preempted vcpus thus making vcpu iteration fast. Thanks Jiannan, Avi for initially proposing patch. Gleb, Peter for precious suggestions. Thanks srikar for suggesting to remove rcu lock while checking task state that helped in reducing overcommit overhead Not-yet-signed-off-by: Raghavendra K T raghavendra...@linux.vnet.ibm.com --- include/linux/kvm_host.h |7 +++ virt/kvm/kvm_main.c | 15 --- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cad77fe..8c4a2409 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -252,6 +252,7 @@ struct kvm_vcpu { bool dy_eligible; } spin_loop; #endif + int idx; struct kvm_vcpu_arch arch; }; @@ -385,6 +386,7 @@ struct kvm { long mmu_notifier_count; #endif long tlbs_dirty; + DECLARE_BITMAP(preempt_bitmap, KVM_MAX_VCPUS); }; #define kvm_err(fmt, ...) \ @@ -413,6 +415,11 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \ idx++) +#define kvm_for_each_preempted_vcpu(idx, vcpup, kvm, n) \ + for (idx = find_first_bit(kvm-preempt_bitmap, KVM_MAX_VCPUS); \ +idx n (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \ +idx = find_next_bit(kvm-preempt_bitmap, KVM_MAX_VCPUS, idx+1)) + #define kvm_for_each_memslot(memslot, slots) \ for (memslot = slots-memslots[0]; \ memslot slots-memslots + KVM_MEM_SLOTS_NUM memslot-npages;\ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index adc68fe..1db16b3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1770,10 +1770,12 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) struct kvm_vcpu *vcpu; int last_boosted_vcpu = me-kvm-last_boosted_vcpu; int yielded = 0; +
Re: [PATCH] KVM: x86: Convert INIT and SIPI signals into synchronously handled requests
On Sun, Mar 03, 2013 at 09:21:43PM +0100, Jan Kiszka wrote: From: Jan Kiszka jan.kis...@siemens.com A VCPU sending INIT or SIPI to some other VCPU races for setting the remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED was overwritten by kvm_emulate_halt and, thus, got lost. Fix this by raising requests on the sender side that will then be handled synchronously over the target VCPU context. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- Turned out to be simpler than expected. I'm no longer able to reproduce the race I saw before. arch/x86/kvm/lapic.c |9 - arch/x86/kvm/x86.c | 16 +++- include/linux/kvm_host.h |2 ++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 02b51dd..be1e37a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_INIT: if (!trig_mode || level) { result = 1; - vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; - kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_make_request(KVM_REQ_INIT, vcpu); kvm_vcpu_kick(vcpu); } else { apic_debug(Ignoring de-assert INIT to vcpu %d\n, @@ -743,11 +742,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_STARTUP: apic_debug(SIPI to vcpu %d vector 0x%02x\n, vcpu-vcpu_id, vector); - if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED || + test_bit(KVM_REQ_INIT, vcpu-requests)) { result = 1; vcpu-arch.sipi_vector = vector; - vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; - kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_make_request(KVM_REQ_SIPI, vcpu); kvm_vcpu_kick(vcpu); } break; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d0cf737..8c8843c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5641,6 +5641,18 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu) kvm_x86_ops-load_eoi_exitmap(vcpu, eoi_exit_bitmap); } +static bool kvm_check_init_and_sipi(struct kvm_vcpu *vcpu) +{ + if (kvm_check_request(KVM_REQ_INIT, vcpu)) + vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + if (kvm_check_request(KVM_REQ_SIPI, vcpu) + vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; + return true; + } + return false; +} + static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; @@ -5649,6 +5661,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = 0; if (vcpu-requests) { + kvm_check_init_and_sipi(vcpu); if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) kvm_mmu_unload(vcpu); if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) @@ -6977,10 +6990,11 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { + if (kvm_check_init_and_sipi(vcpu)) + return 1; return (vcpu-arch.mp_state == KVM_MP_STATE_RUNNABLE !vcpu-arch.apf.halted) || !list_empty_careful(vcpu-async_pf.done) - || vcpu-arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED || atomic_read(vcpu-arch.nmi_queued) || (kvm_arch_interrupt_allowed(vcpu) kvm_cpu_has_interrupt(vcpu)); This makes two subsequent calls to kvm_arch_vcpu_runnable() return different values if SIPI is pending. While it may not cause problem to current code (I haven't thought it through) with such semantics you gonna have a bad time. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: x86: Convert INIT and SIPI signals into synchronously handled requests
On 2013-03-04 19:08, Gleb Natapov wrote: On Sun, Mar 03, 2013 at 09:21:43PM +0100, Jan Kiszka wrote: From: Jan Kiszka jan.kis...@siemens.com A VCPU sending INIT or SIPI to some other VCPU races for setting the remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED was overwritten by kvm_emulate_halt and, thus, got lost. Fix this by raising requests on the sender side that will then be handled synchronously over the target VCPU context. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- Turned out to be simpler than expected. I'm no longer able to reproduce the race I saw before. arch/x86/kvm/lapic.c |9 - arch/x86/kvm/x86.c | 16 +++- include/linux/kvm_host.h |2 ++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 02b51dd..be1e37a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_INIT: if (!trig_mode || level) { result = 1; -vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; -kvm_make_request(KVM_REQ_EVENT, vcpu); +kvm_make_request(KVM_REQ_INIT, vcpu); kvm_vcpu_kick(vcpu); } else { apic_debug(Ignoring de-assert INIT to vcpu %d\n, @@ -743,11 +742,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_STARTUP: apic_debug(SIPI to vcpu %d vector 0x%02x\n, vcpu-vcpu_id, vector); -if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { +if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED || +test_bit(KVM_REQ_INIT, vcpu-requests)) { result = 1; vcpu-arch.sipi_vector = vector; -vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; -kvm_make_request(KVM_REQ_EVENT, vcpu); +kvm_make_request(KVM_REQ_SIPI, vcpu); kvm_vcpu_kick(vcpu); } break; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d0cf737..8c8843c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5641,6 +5641,18 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu) kvm_x86_ops-load_eoi_exitmap(vcpu, eoi_exit_bitmap); } +static bool kvm_check_init_and_sipi(struct kvm_vcpu *vcpu) +{ +if (kvm_check_request(KVM_REQ_INIT, vcpu)) +vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; +if (kvm_check_request(KVM_REQ_SIPI, vcpu) +vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { +vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; +return true; +} +return false; +} + static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; @@ -5649,6 +5661,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = 0; if (vcpu-requests) { +kvm_check_init_and_sipi(vcpu); if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) kvm_mmu_unload(vcpu); if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) @@ -6977,10 +6990,11 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { +if (kvm_check_init_and_sipi(vcpu)) +return 1; return (vcpu-arch.mp_state == KVM_MP_STATE_RUNNABLE !vcpu-arch.apf.halted) || !list_empty_careful(vcpu-async_pf.done) -|| vcpu-arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED || atomic_read(vcpu-arch.nmi_queued) || (kvm_arch_interrupt_allowed(vcpu) kvm_cpu_has_interrupt(vcpu)); This makes two subsequent calls to kvm_arch_vcpu_runnable() return different values if SIPI is pending. While it may not cause problem to current code (I haven't thought it through) with such semantics you gonna have a bad time. If I manage to follow Paolo's suggestion to eliminate the SIPI_RECEIVED state and all the staged logic around it, that might change. Will be more invasive but likely cleaner in its result. Jan -- Siemens AG, Corporate Technology, CT RTC ITP SDP-DE Corporate Competence Center Embedded Linux -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: nVMX: Fix setting of CR0 and CR4 in guest mode
On Mon, Mar 04, 2013 at 07:08:08PM +0100, Jan Kiszka wrote: On 2013-03-04 18:56, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 03:25:47PM +0100, Jan Kiszka wrote: On 2013-03-04 15:15, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 03:09:51PM +0100, Jan Kiszka wrote: On 2013-03-04 14:22, Gleb Natapov wrote: On Thu, Feb 28, 2013 at 10:44:47AM +0100, Jan Kiszka wrote: The logic for calculating the value with which we call kvm_set_cr0/4 was broken (will definitely be visible with nested unrestricted guest mode support). Also, we performed the check regarding CR0_ALWAYSON too early when in guest mode. What really needs to be done on both CR0 and CR4 is to mask out L1-owned bits and merge them in from GUEST_CR0/4. In contrast, arch.cr0/4 and arch.cr0/4_guest_owned_bits contain the mangled L0+L1 state and, thus, are not suited as input. For both CRs, we can then apply the check against VMXON_CRx_ALWAYSON and refuse the update if it fails. To be fully consistent, we implement this check now also for CR4. Finally, we have to set the shadow to the value L2 wanted to write originally. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- Found while making unrestricted guest mode working. Not sure what impact the bugs had on current feature level, if any. For interested folks, I've pushed my nEPT environment here: git://git.kiszka.org/linux-kvm.git nept-hacking arch/x86/kvm/vmx.c | 49 ++--- 1 files changed, 30 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7cc566b..d1dac08 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4605,37 +4605,48 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) { - if (to_vmx(vcpu)-nested.vmxon - ((val VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) - return 1; - if (is_guest_mode(vcpu)) { - /* - * We get here when L2 changed cr0 in a way that did not change - * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), - * but did change L0 shadowed bits. This can currently happen - * with the TS bit: L0 may want to leave TS on (for lazy fpu - * loading) while pretending to allow the guest to change it. - */ Can't say I understand this patch yet, but it looks like the comment is still valid. Why have you removed it? L0 allows L1 or L2 at most to own TS, the rest is host-owned. I think the comment was always misleading. I do not see how it is misleading. For everything but TS we will not get here (if L1 is kvm). For TS we will get here if L1 allows L2 to change it, but L0 does not. For everything *but guest-owned* we will get here, thus for most CR0 accesses (bit-wise, not regarding frequency). I do not see how. If bit is trapped by L1 we will not get here. We will do vmexit to L1 instead. nested_vmx_exit_handled_cr() check this condition. I am not arguing about you code (didn't grok it yet), but the comment still make sense to me. We get here when L2 changed cr0 in a way that did not change any of L1's shadowed bits (see nested_vmx_exit_handled_cr), but did change L0 shadowed bits. That I can sign. But the rest about TS is just misleading as we trap _every_ change in L0 - except for TS under certain conditions. The old code was tested against TS only, that's what the comment witness. TS is just an example of how we can get here with KVM on KVM. Obviously other hypervisors may have different configuration. L2 may allow full guest access to CR0 and then each CR0 write by L2 will be handled here. Under what other condition we trap _every_ change in L0 - except for TS here? If you prefer, I'll leave part one in. Please do so. Without the comment it is not obvious why exit condition is not checked here. Still do not see why you object to TS part. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: nVMX: Fix setting of CR0 and CR4 in guest mode
On 2013-03-04 19:39, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 07:08:08PM +0100, Jan Kiszka wrote: On 2013-03-04 18:56, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 03:25:47PM +0100, Jan Kiszka wrote: On 2013-03-04 15:15, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 03:09:51PM +0100, Jan Kiszka wrote: On 2013-03-04 14:22, Gleb Natapov wrote: On Thu, Feb 28, 2013 at 10:44:47AM +0100, Jan Kiszka wrote: The logic for calculating the value with which we call kvm_set_cr0/4 was broken (will definitely be visible with nested unrestricted guest mode support). Also, we performed the check regarding CR0_ALWAYSON too early when in guest mode. What really needs to be done on both CR0 and CR4 is to mask out L1-owned bits and merge them in from GUEST_CR0/4. In contrast, arch.cr0/4 and arch.cr0/4_guest_owned_bits contain the mangled L0+L1 state and, thus, are not suited as input. For both CRs, we can then apply the check against VMXON_CRx_ALWAYSON and refuse the update if it fails. To be fully consistent, we implement this check now also for CR4. Finally, we have to set the shadow to the value L2 wanted to write originally. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- Found while making unrestricted guest mode working. Not sure what impact the bugs had on current feature level, if any. For interested folks, I've pushed my nEPT environment here: git://git.kiszka.org/linux-kvm.git nept-hacking arch/x86/kvm/vmx.c | 49 ++--- 1 files changed, 30 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7cc566b..d1dac08 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4605,37 +4605,48 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) { - if (to_vmx(vcpu)-nested.vmxon - ((val VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) - return 1; - if (is_guest_mode(vcpu)) { - /* - * We get here when L2 changed cr0 in a way that did not change - * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), - * but did change L0 shadowed bits. This can currently happen - * with the TS bit: L0 may want to leave TS on (for lazy fpu - * loading) while pretending to allow the guest to change it. - */ Can't say I understand this patch yet, but it looks like the comment is still valid. Why have you removed it? L0 allows L1 or L2 at most to own TS, the rest is host-owned. I think the comment was always misleading. I do not see how it is misleading. For everything but TS we will not get here (if L1 is kvm). For TS we will get here if L1 allows L2 to change it, but L0 does not. For everything *but guest-owned* we will get here, thus for most CR0 accesses (bit-wise, not regarding frequency). I do not see how. If bit is trapped by L1 we will not get here. We will do vmexit to L1 instead. nested_vmx_exit_handled_cr() check this condition. I am not arguing about you code (didn't grok it yet), but the comment still make sense to me. We get here when L2 changed cr0 in a way that did not change any of L1's shadowed bits (see nested_vmx_exit_handled_cr), but did change L0 shadowed bits. That I can sign. But the rest about TS is just misleading as we trap _every_ change in L0 - except for TS under certain conditions. The old code was tested against TS only, that's what the comment witness. TS is just an example of how we can get here with KVM on KVM. Obviously other hypervisors may have different configuration. L2 may allow full guest access to CR0 and then each CR0 write by L2 will be handled here. Under what other condition we trap _every_ change in L0 - except for TS here? On FPU activation: cr0_guest_owned_bits = X86_CR0_TS; And on FPU deactivation: cr0_guest_owned_bits = 0; If you prefer, I'll leave part one in. Please do so. Without the comment it is not obvious why exit condition is not checked here. Still do not see why you object to TS part. It describes a corner case in a way that suggests this is the only reason why we get here. Jan signature.asc Description: OpenPGP digital signature
Re: [PATCH] KVM: nVMX: Fix setting of CR0 and CR4 in guest mode
On Mon, Mar 04, 2013 at 08:23:52PM +0100, Jan Kiszka wrote: On 2013-03-04 19:39, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 07:08:08PM +0100, Jan Kiszka wrote: On 2013-03-04 18:56, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 03:25:47PM +0100, Jan Kiszka wrote: On 2013-03-04 15:15, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 03:09:51PM +0100, Jan Kiszka wrote: On 2013-03-04 14:22, Gleb Natapov wrote: On Thu, Feb 28, 2013 at 10:44:47AM +0100, Jan Kiszka wrote: The logic for calculating the value with which we call kvm_set_cr0/4 was broken (will definitely be visible with nested unrestricted guest mode support). Also, we performed the check regarding CR0_ALWAYSON too early when in guest mode. What really needs to be done on both CR0 and CR4 is to mask out L1-owned bits and merge them in from GUEST_CR0/4. In contrast, arch.cr0/4 and arch.cr0/4_guest_owned_bits contain the mangled L0+L1 state and, thus, are not suited as input. For both CRs, we can then apply the check against VMXON_CRx_ALWAYSON and refuse the update if it fails. To be fully consistent, we implement this check now also for CR4. Finally, we have to set the shadow to the value L2 wanted to write originally. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- Found while making unrestricted guest mode working. Not sure what impact the bugs had on current feature level, if any. For interested folks, I've pushed my nEPT environment here: git://git.kiszka.org/linux-kvm.git nept-hacking arch/x86/kvm/vmx.c | 49 ++--- 1 files changed, 30 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7cc566b..d1dac08 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4605,37 +4605,48 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) { -if (to_vmx(vcpu)-nested.vmxon -((val VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) -return 1; - if (is_guest_mode(vcpu)) { -/* - * We get here when L2 changed cr0 in a way that did not change - * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), - * but did change L0 shadowed bits. This can currently happen - * with the TS bit: L0 may want to leave TS on (for lazy fpu - * loading) while pretending to allow the guest to change it. - */ Can't say I understand this patch yet, but it looks like the comment is still valid. Why have you removed it? L0 allows L1 or L2 at most to own TS, the rest is host-owned. I think the comment was always misleading. I do not see how it is misleading. For everything but TS we will not get here (if L1 is kvm). For TS we will get here if L1 allows L2 to change it, but L0 does not. For everything *but guest-owned* we will get here, thus for most CR0 accesses (bit-wise, not regarding frequency). I do not see how. If bit is trapped by L1 we will not get here. We will do vmexit to L1 instead. nested_vmx_exit_handled_cr() check this condition. I am not arguing about you code (didn't grok it yet), but the comment still make sense to me. We get here when L2 changed cr0 in a way that did not change any of L1's shadowed bits (see nested_vmx_exit_handled_cr), but did change L0 shadowed bits. That I can sign. But the rest about TS is just misleading as we trap _every_ change in L0 - except for TS under certain conditions. The old code was tested against TS only, that's what the comment witness. TS is just an example of how we can get here with KVM on KVM. Obviously other hypervisors may have different configuration. L2 may allow full guest access to CR0 and then each CR0 write by L2 will be handled here. Under what other condition we trap _every_ change in L0 - except for TS here? On FPU activation: cr0_guest_owned_bits = X86_CR0_TS; And on FPU deactivation: cr0_guest_owned_bits = 0; That's exactly TS case that comment explains. Note that CR0_GUEST_HOST_MASK = ~cr0_guest_owned_bits. If you prefer, I'll leave part one in. Please do so. Without the comment it is not obvious why exit condition is not checked here. Still do not see why you object to TS part. It describes a corner case in a way that suggests this is the only reason why we get here. For KVM on KVM it is. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: nVMX: Fix setting of CR0 and CR4 in guest mode
On 2013-03-04 21:00, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 08:37:38PM +0100, Jan Kiszka wrote: On 2013-03-04 20:33, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 08:23:52PM +0100, Jan Kiszka wrote: On 2013-03-04 19:39, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 07:08:08PM +0100, Jan Kiszka wrote: On 2013-03-04 18:56, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 03:25:47PM +0100, Jan Kiszka wrote: On 2013-03-04 15:15, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 03:09:51PM +0100, Jan Kiszka wrote: On 2013-03-04 14:22, Gleb Natapov wrote: On Thu, Feb 28, 2013 at 10:44:47AM +0100, Jan Kiszka wrote: The logic for calculating the value with which we call kvm_set_cr0/4 was broken (will definitely be visible with nested unrestricted guest mode support). Also, we performed the check regarding CR0_ALWAYSON too early when in guest mode. What really needs to be done on both CR0 and CR4 is to mask out L1-owned bits and merge them in from GUEST_CR0/4. In contrast, arch.cr0/4 and arch.cr0/4_guest_owned_bits contain the mangled L0+L1 state and, thus, are not suited as input. For both CRs, we can then apply the check against VMXON_CRx_ALWAYSON and refuse the update if it fails. To be fully consistent, we implement this check now also for CR4. Finally, we have to set the shadow to the value L2 wanted to write originally. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- Found while making unrestricted guest mode working. Not sure what impact the bugs had on current feature level, if any. For interested folks, I've pushed my nEPT environment here: git://git.kiszka.org/linux-kvm.git nept-hacking arch/x86/kvm/vmx.c | 49 ++--- 1 files changed, 30 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7cc566b..d1dac08 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4605,37 +4605,48 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) { - if (to_vmx(vcpu)-nested.vmxon - ((val VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) - return 1; - if (is_guest_mode(vcpu)) { - /* - * We get here when L2 changed cr0 in a way that did not change - * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), - * but did change L0 shadowed bits. This can currently happen - * with the TS bit: L0 may want to leave TS on (for lazy fpu - * loading) while pretending to allow the guest to change it. - */ Can't say I understand this patch yet, but it looks like the comment is still valid. Why have you removed it? L0 allows L1 or L2 at most to own TS, the rest is host-owned. I think the comment was always misleading. I do not see how it is misleading. For everything but TS we will not get here (if L1 is kvm). For TS we will get here if L1 allows L2 to change it, but L0 does not. For everything *but guest-owned* we will get here, thus for most CR0 accesses (bit-wise, not regarding frequency). I do not see how. If bit is trapped by L1 we will not get here. We will do vmexit to L1 instead. nested_vmx_exit_handled_cr() check this condition. I am not arguing about you code (didn't grok it yet), but the comment still make sense to me. We get here when L2 changed cr0 in a way that did not change any of L1's shadowed bits (see nested_vmx_exit_handled_cr), but did change L0 shadowed bits. That I can sign. But the rest about TS is just misleading as we trap _every_ change in L0 - except for TS under certain conditions. The old code was tested against TS only, that's what the comment witness. TS is just an example of how we can get here with KVM on KVM. Obviously other hypervisors may have different configuration. L2 may allow full guest access to CR0 and then each CR0 write by L2 will be handled here. Under what other condition we trap _every_ change in L0 - except for TS here? On FPU activation: cr0_guest_owned_bits = X86_CR0_TS; And on FPU deactivation: cr0_guest_owned_bits = 0; That's exactly TS case that comment explains. Note that CR0_GUEST_HOST_MASK = ~cr0_guest_owned_bits. Again, it's the inverse of what the comment suggest: we enter handle_set_cr0 for every change on CR0 that doesn't match the shadow - except TS was given to the guest by both L1 and L0 (or TS isn't changed as well). That doesn't make sense to me. I do not even sure what you are saying since you do not specify what shadow is matched. From the code I see that on CR0 exit to L0 from L2 we check if L2 tries to change CR0 bits that L1 claims to belong to it and do #vmexit to L1 if it is: if (vmcs12-cr0_guest_host_mask (val ^ vmcs12-cr0_read_shadow)) return 1; We never reach
Re: [PATCH] KVM: nVMX: Fix setting of CR0 and CR4 in guest mode
On Mon, Mar 04, 2013 at 09:12:25PM +0100, Jan Kiszka wrote: On 2013-03-04 21:00, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 08:37:38PM +0100, Jan Kiszka wrote: On 2013-03-04 20:33, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 08:23:52PM +0100, Jan Kiszka wrote: On 2013-03-04 19:39, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 07:08:08PM +0100, Jan Kiszka wrote: On 2013-03-04 18:56, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 03:25:47PM +0100, Jan Kiszka wrote: On 2013-03-04 15:15, Gleb Natapov wrote: On Mon, Mar 04, 2013 at 03:09:51PM +0100, Jan Kiszka wrote: On 2013-03-04 14:22, Gleb Natapov wrote: On Thu, Feb 28, 2013 at 10:44:47AM +0100, Jan Kiszka wrote: The logic for calculating the value with which we call kvm_set_cr0/4 was broken (will definitely be visible with nested unrestricted guest mode support). Also, we performed the check regarding CR0_ALWAYSON too early when in guest mode. What really needs to be done on both CR0 and CR4 is to mask out L1-owned bits and merge them in from GUEST_CR0/4. In contrast, arch.cr0/4 and arch.cr0/4_guest_owned_bits contain the mangled L0+L1 state and, thus, are not suited as input. For both CRs, we can then apply the check against VMXON_CRx_ALWAYSON and refuse the update if it fails. To be fully consistent, we implement this check now also for CR4. Finally, we have to set the shadow to the value L2 wanted to write originally. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- Found while making unrestricted guest mode working. Not sure what impact the bugs had on current feature level, if any. For interested folks, I've pushed my nEPT environment here: git://git.kiszka.org/linux-kvm.git nept-hacking arch/x86/kvm/vmx.c | 49 ++--- 1 files changed, 30 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7cc566b..d1dac08 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4605,37 +4605,48 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) { -if (to_vmx(vcpu)-nested.vmxon -((val VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) -return 1; - if (is_guest_mode(vcpu)) { -/* - * We get here when L2 changed cr0 in a way that did not change - * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), - * but did change L0 shadowed bits. This can currently happen - * with the TS bit: L0 may want to leave TS on (for lazy fpu - * loading) while pretending to allow the guest to change it. - */ Can't say I understand this patch yet, but it looks like the comment is still valid. Why have you removed it? L0 allows L1 or L2 at most to own TS, the rest is host-owned. I think the comment was always misleading. I do not see how it is misleading. For everything but TS we will not get here (if L1 is kvm). For TS we will get here if L1 allows L2 to change it, but L0 does not. For everything *but guest-owned* we will get here, thus for most CR0 accesses (bit-wise, not regarding frequency). I do not see how. If bit is trapped by L1 we will not get here. We will do vmexit to L1 instead. nested_vmx_exit_handled_cr() check this condition. I am not arguing about you code (didn't grok it yet), but the comment still make sense to me. We get here when L2 changed cr0 in a way that did not change any of L1's shadowed bits (see nested_vmx_exit_handled_cr), but did change L0 shadowed bits. That I can sign. But the rest about TS is just misleading as we trap _every_ change in L0 - except for TS under certain conditions. The old code was tested against TS only, that's what the comment witness. TS is just an example of how we can get here with KVM on KVM. Obviously other hypervisors may have different configuration. L2 may allow full guest access to CR0 and then each CR0 write by L2 will be handled here. Under what other condition we trap _every_ change in L0 - except for TS here? On FPU activation: cr0_guest_owned_bits = X86_CR0_TS; And on FPU deactivation: cr0_guest_owned_bits = 0; That's exactly TS case that comment explains. Note that CR0_GUEST_HOST_MASK = ~cr0_guest_owned_bits. Again, it's the inverse of what the comment suggest: we enter handle_set_cr0 for every change on CR0 that doesn't match the shadow - except TS was given to the guest by both L1 and L0 (or TS isn't changed as well). That doesn't make sense to me. I do not even sure what you are saying since you do not specify what shadow is
Re: [PATCH] KVM: x86: Convert INIT and SIPI signals into synchronously handled requests
On 2013-03-04 15:28, Paolo Bonzini wrote: Il 03/03/2013 21:21, Jan Kiszka ha scritto: From: Jan Kiszka jan.kis...@siemens.com A VCPU sending INIT or SIPI to some other VCPU races for setting the remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED was overwritten by kvm_emulate_halt and, thus, got lost. Fix this by raising requests on the sender side that will then be handled synchronously over the target VCPU context. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- Turned out to be simpler than expected. I'm no longer able to reproduce the race I saw before. arch/x86/kvm/lapic.c |9 - arch/x86/kvm/x86.c | 16 +++- include/linux/kvm_host.h |2 ++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 02b51dd..be1e37a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_INIT: if (!trig_mode || level) { result = 1; -vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; -kvm_make_request(KVM_REQ_EVENT, vcpu); +kvm_make_request(KVM_REQ_INIT, vcpu); kvm_vcpu_kick(vcpu); } else { apic_debug(Ignoring de-assert INIT to vcpu %d\n, @@ -743,11 +742,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_STARTUP: apic_debug(SIPI to vcpu %d vector 0x%02x\n, vcpu-vcpu_id, vector); -if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { +if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED || +test_bit(KVM_REQ_INIT, vcpu-requests)) { result = 1; vcpu-arch.sipi_vector = vector; -vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; -kvm_make_request(KVM_REQ_EVENT, vcpu); +kvm_make_request(KVM_REQ_SIPI, vcpu); kvm_vcpu_kick(vcpu); } break; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d0cf737..8c8843c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5641,6 +5641,18 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu) kvm_x86_ops-load_eoi_exitmap(vcpu, eoi_exit_bitmap); } +static bool kvm_check_init_and_sipi(struct kvm_vcpu *vcpu) +{ +if (kvm_check_request(KVM_REQ_INIT, vcpu)) +vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; +if (kvm_check_request(KVM_REQ_SIPI, vcpu) +vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { +vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; Do you need KVM_MP_STATE_SIPI_RECEIVED at all anymore? Unfortunately, we cannot kill it as it was user-visible: When a VCPU receives KVM_MP_STATE_SIPI_RECEIVED, it leaves __vcpu_run with -EINTR and, thus, KVM_RUN. We actually return to userspace, allowing it to see this mp_state and also migrate the guest in this state. I could avoid this userspace exit (not sure what it is good for) but we will have to keep the logic to accept and convert the state into KVM_MP_STATE_RUNNABLE. So there is not much to simplify here, I'm afraid. Jan signature.asc Description: OpenPGP digital signature
Re: [PATCH v2] KVM: x86: Convert INIT and SIPI signals into synchronously handled requests
On 2013-03-04 22:41, Jan Kiszka wrote: From: Jan Kiszka jan.kis...@siemens.com A VCPU sending INIT or SIPI to some other VCPU races for setting the remote VCPU's mp_state. When we were unlucky, KVM_MP_STATE_INIT_RECEIVED was overwritten by kvm_emulate_halt and, thus, got lost. Fix this by raising requests on the sender side that will then be handled synchronously over the target VCPU context. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- Changes in v2: - check transition to INIT_RECEIVED in vcpu_enter_guest - removed return value of kvm_check_init_and_sipi - caller has to check for relevant transition afterward - add write barrier after setting sipi_vector arch/x86/kvm/lapic.c | 11 ++- arch/x86/kvm/x86.c | 15 +++ include/linux/kvm_host.h |2 ++ 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 02b51dd..7986c9f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -731,8 +731,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_INIT: if (!trig_mode || level) { result = 1; - vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; - kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_make_request(KVM_REQ_INIT, vcpu); kvm_vcpu_kick(vcpu); } else { apic_debug(Ignoring de-assert INIT to vcpu %d\n, @@ -743,11 +742,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_STARTUP: apic_debug(SIPI to vcpu %d vector 0x%02x\n, vcpu-vcpu_id, vector); - if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED || + test_bit(KVM_REQ_INIT, vcpu-requests)) { result = 1; vcpu-arch.sipi_vector = vector; - vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; - kvm_make_request(KVM_REQ_EVENT, vcpu); + /* make sure sipi_vector is visible for the receiver */ + smp_wmb(); + kvm_make_request(KVM_REQ_SIPI, vcpu); kvm_vcpu_kick(vcpu); } break; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d0cf737..0be04b9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5641,6 +5641,15 @@ static void update_eoi_exitmap(struct kvm_vcpu *vcpu) kvm_x86_ops-load_eoi_exitmap(vcpu, eoi_exit_bitmap); } +static void kvm_check_init_and_sipi(struct kvm_vcpu *vcpu) +{ + if (kvm_check_request(KVM_REQ_INIT, vcpu)) + vcpu-arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; And here is a small race between clearing REQ_INIT and setting INIT_RECEIVED. It can make the LAPIC drop the SIPI incorrectly. Need to break up test and clear, doing the clear after mp_state update. Yeah... Jan + if (kvm_check_request(KVM_REQ_SIPI, vcpu) + vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) + vcpu-arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; +} + static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; @@ -5649,6 +5658,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = 0; if (vcpu-requests) { + kvm_check_init_and_sipi(vcpu); + if (vcpu-arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + r = 1; + goto out; + } if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) kvm_mmu_unload(vcpu); if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) @@ -6977,6 +6991,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { + kvm_check_init_and_sipi(vcpu); return (vcpu-arch.mp_state == KVM_MP_STATE_RUNNABLE !vcpu-arch.apf.halted) || !list_empty_careful(vcpu-async_pf.done) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 722cae7..1a191c9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -124,6 +124,8 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_MCLOCK_INPROGRESS 20 #define KVM_REQ_EPR_EXIT 21 #define KVM_REQ_EOIBITMAP 22 +#define KVM_REQ_INIT 23 +#define KVM_REQ_SIPI 24 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 signature.asc Description: OpenPGP digital signature
Re: virtio PCI on KVM without IO BARs
On Thu, Feb 28, 2013 at 05:24:33PM +0200, Michael S. Tsirkin wrote: OK we talked about this a while ago, here's a summary and some proposals: At the moment, virtio PCI uses IO BARs for all accesses. The reason for IO use is the cost of different VM exit types of transactions and their emulation on KVM on x86 (it would be trivial to use memory BARs on non x86 platforms if they don't have PIO). Example benchmark (cycles per transaction): (io access) outw 1737 (memory access) movw 4341 for comparison: (hypercall access): vmcall 1566 (pv memory access) movw_fast 1817 (*explanation what this is below) This creates a problem if we want to make virtio devices proper PCI express devices with native hotplug support. This is because each hotpluggable PCI express device always has a PCI express port (port per device), where each port is represented by a PCI to PCI bridge. In turn, a PCI to PCI bridge claims a 4Kbyte aligned range of IO addresses. This means that we can have at most 15 such devices, this is a nasty limitation. Another problem with PIO is support for physical virtio devices, and nested virt: KVM currently programs all PIO accesses to cause vm exit, so using this device in a VM will be slow. So we really want to stop using IO BARs completely if at all possible, but looking at the table above, switching to memory BAR and movw for notifications will not work well. Possible solutions: 1. hypercall instead of PIO basically add a hypercall that gets an MMIO address/data and does an MMIO write for us. We'll want some capability in the device to let guest know this is what it should do. Pros: even faster than PIO Cons: this won't help nested or assigned devices (won't hurt them either as it will be conditional on the capability above). Cons: need host kernel support, which then has to be maintained forever, even if intel speeds up MMIO exits. 2. pv memory access There are two reasons that memory access is slower: - one is that it's handled as an EPT misconfiguration error so handled by cpu slow path - one is that we need to decode the x86 instruction in software, to calculate address/data for the access. We could agree that guests would use a specific instruction for virtio accesses, and fast-path it specifically. This is the pv memory access option above. Pros: helps assigned devices and nested virt Pros: easy to drop if hardware support is there Cons: a bit slower than IO Cons: need host kernel support 3. hypervisor assigned IO address qemu can reserve IO addresses and assign to virtio devices. 2 bytes per device (for notification and ISR access) will be enough. So we can reserve 4K and this gets us 2000 devices. From KVM perspective, nothing changes. We'll want some capability in the device to let guest know this is what it should do, and pass the io address. One way to reserve the addresses is by using the bridge. Pros: no need for host kernel support Pros: regular PIO so fast Cons: does not help assigned devices, breaks nested virt Simply counting pros/cons, option 3 seems best. It's also the easiest to implement. Agree. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v5 6/6] KVM: s390: Wire up ioeventfd.
On Thu, Feb 28, 2013 at 12:33:21PM +0100, Cornelia Huck wrote: Enable ioeventfd support on s390 and hook up diagnose 500 virtio-ccw notifications. Signed-off-by: Cornelia Huck cornelia.h...@de.ibm.com --- arch/s390/kvm/Kconfig| 1 + arch/s390/kvm/Makefile | 2 +- arch/s390/kvm/diag.c | 26 ++ arch/s390/kvm/kvm-s390.c | 1 + 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index b58dd86..3c43e30 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -22,6 +22,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT + select HAVE_KVM_EVENTFD ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 3975722..8fe9d65 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License (version 2 only) # as published by the Free Software Foundation. -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o) ccflags-y := -Ivirt/kvm -Iarch/s390/kvm diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index a390687..1c01a99 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -13,6 +13,7 @@ #include linux/kvm.h #include linux/kvm_host.h +#include asm/virtio-ccw.h #include kvm-s390.h #include trace.h #include trace-s390.h @@ -104,6 +105,29 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) return -EREMOTE; } +static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) +{ + int ret, idx; + + /* No virtio-ccw notification? Get out quickly. */ + if (!vcpu-kvm-arch.css_support || + (vcpu-run-s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) + return -EOPNOTSUPP; + + idx = srcu_read_lock(vcpu-kvm-srcu); + /* + * The layout is as follows: + * - gpr 2 contains the subchannel id (passed as addr) + * - gpr 3 contains the virtqueue index (passed as datamatch) + */ + ret = kvm_io_bus_write(vcpu-kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, + vcpu-run-s.regs.gprs[2], + 8, vcpu-run-s.regs.gprs[3]); + srcu_read_unlock(vcpu-kvm-srcu, idx); + /* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */ + return ret 0 ? ret : 0; +} + What about the cookie? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] arch/x86/kvm: beautify source code for __u32 irq which is never 0
于 2013年02月27日 22:08, Gleb Natapov 写道: On Wed, Feb 27, 2013 at 11:33:25AM +0800, Chen Gang wrote: irp-irq is __u32 which is never 0. Signed-off-by: Chen Gang gang.c...@asianux.com Applied, thanks. thank you, too. :-) -- Chen Gang Asianux Corporation -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: in-kernel interrupt controller steering
On 03/04/2013 04:20:47 PM, Alexander Graf wrote: Howdy, We just sat down to discuss the proposed XICS and MPIC interfaces and how we can take bits of each and create an interface that works for everyone. In this, it feels like we came to some conclusions. Some of which we already reached earlier, but forgot in between :). I hope I didn't forget too many pieces. Scott, Paul and Stuart, please add whatever you find missing in here. It looks about right. 1) We need to set the generic interrupt type of the system before we create vcpus. This is a new ioctl that sets the overall system interrupt controller type to a specific model. This used so that when we create vcpus, we can create the appended local interrupt controller state without the actual interrupt controller device available yet. It is also used later to switch between interrupt controller implementations. This interrupt type is write once and frozen after the first vcpu got created. Who is going to write up this patch? 2) Interrupt controllers (XICS / MPIC) get created by the device create api Getting and setting state of an interrupt controller also happens through this. Getting and setting state from vcpus happens through ONE_REG. Injecting interrupt happens through the normal irqchip ioctl (we probably need to encode the target device id in there somehow). This fits in nicely with a model where the interrupt controller is a proper QOM device in QEMU, since we can create it long after vcpus have been created. 3) We open code interrupt controller distinction There is no need for function pointers. We just switch() based on the type that gets set in the initial ioctl to determine which code to call. The retrieval of the irq type happens through a static inline function in a header that can return a constant number for configurations that don't support multiple in-kernel irqchips. 4) The device attribute API has separate groups that target different use cases Paul needs live migration, so he will implement device attributes that enable him to do live migration. Scott doesn't implement live migration, so his MPIC attribute groups are solely for debugging purposes today. 5) There is no need for atomic device control accessors today. Live migration happens with vcpus stopped, so we don't need to be atomic in the kernel - user space interface. 6) The device attribute API will keep read and write (get / set) accessors. There is no specific need for a generic command ioctl. Gleb, is this OK? A bidirectional command accessor could be added later if a need arises. Will attributes still be renamed to commands, even if the get/set approach is retained? 7) Interrupt line connections to vcpus are implicit We don't explicitly mark which in-kernel irqchip interrupt line goes to which vcpu. This is done implicitly. If we see a need for it, we create a new irqchip device type that allows us to explicitly configure vcpu connections. Are there any changes needed to the device control api patch (just patch 1/6, not the rest of the patchset), besides Christoffer's request to tone down one of the comments, and whatever the response is to the questions in #6? Should we add a size field in kvm_device, both for error checking and to assist tools such as strace? -Scott -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
buildbot failure in qemu-kvm on default_x86_64_out_of_tree
The Buildbot has detected a new failure on builder default_x86_64_out_of_tree while building qemu-kvm. Full details are available at: http://buildbot.b1-systems.de/qemu-kvm/builders/default_x86_64_out_of_tree/builds/1433 Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/ Buildslave for this Build: b1_qemu_kvm_1 Build Reason: The Nightly scheduler named 'nightly_default' triggered this build Build Source Stamp: [branch master] HEAD Blamelist: BUILD FAILED: failed compile sincerely, -The Buildbot
buildbot failure in qemu-kvm on default_i386_debian_5_0
The Buildbot has detected a new failure on builder default_i386_debian_5_0 while building qemu-kvm. Full details are available at: http://buildbot.b1-systems.de/qemu-kvm/builders/default_i386_debian_5_0/builds/1494 Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/ Buildslave for this Build: b1_qemu_kvm_2 Build Reason: The Nightly scheduler named 'nightly_default' triggered this build Build Source Stamp: [branch master] HEAD Blamelist: BUILD FAILED: failed compile sincerely, -The Buildbot
buildbot failure in qemu-kvm on default_i386_out_of_tree
The Buildbot has detected a new failure on builder default_i386_out_of_tree while building qemu-kvm. Full details are available at: http://buildbot.b1-systems.de/qemu-kvm/builders/default_i386_out_of_tree/builds/1431 Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/ Buildslave for this Build: b1_qemu_kvm_2 Build Reason: The Nightly scheduler named 'nightly_default' triggered this build Build Source Stamp: [branch master] HEAD Blamelist: BUILD FAILED: failed compile sincerely, -The Buildbot
Re: [PATCH v13 1/8] save/load cpu runstate
On Mon, Mar 04, 2013 at 10:30:48AM +0100, Paolo Bonzini wrote: Il 28/02/2013 13:13, Hu Tao ha scritto: This patch enables preservation of cpu runstate during save/load vm. So when a vm is restored from snapshot, the cpu runstate is restored, too. I don't think this feature is worth breaking backwards migration compatibility. It is usually handled at a higher-level (management, like libvirt). If guest panic happens during migration, runstate will still be running on destination host without this patch. But, it does be a problem to break backwards migration compatibility. Please make this a separate patch. Sure. Paolo See following example: # save two vms: one is running, the other is paused (qemu) info status VM status: running (qemu) savevm running (qemu) stop (qemu) info status VM status: paused (qemu) savevm paused # restore the one running (qemu) info status VM status: paused (qemu) loadvm running (qemu) info status VM status: running # restore the one paused (qemu) loadvm paused (qemu) info status VM status: paused (qemu) cont (qemu)info status VM status: running Signed-off-by: Hu Tao hu...@cn.fujitsu.com --- include/sysemu/sysemu.h | 2 ++ migration.c | 6 +- monitor.c | 5 ++--- savevm.c| 1 + vl.c| 34 ++ 5 files changed, 40 insertions(+), 8 deletions(-) diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index b19ec95..f121213 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -19,6 +19,8 @@ extern uint8_t qemu_uuid[]; int qemu_uuid_parse(const char *str, uint8_t *uuid); #define UUID_FMT %02hhx%02hhx%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx +void save_run_state(void); +void load_run_state(void); bool runstate_check(RunState state); void runstate_set(RunState new_state); int runstate_is_running(void); diff --git a/migration.c b/migration.c index 11725ae..c29830e 100644 --- a/migration.c +++ b/migration.c @@ -107,11 +107,7 @@ static void process_incoming_migration_co(void *opaque) /* Make sure all file formats flush their mutable metadata */ bdrv_invalidate_cache_all(); -if (autostart) { -vm_start(); -} else { -runstate_set(RUN_STATE_PAUSED); -} +load_run_state(); } void process_incoming_migration(QEMUFile *f) diff --git a/monitor.c b/monitor.c index 32a6e74..bf974b4 100644 --- a/monitor.c +++ b/monitor.c @@ -2059,13 +2059,12 @@ void qmp_closefd(const char *fdname, Error **errp) static void do_loadvm(Monitor *mon, const QDict *qdict) { -int saved_vm_running = runstate_is_running(); const char *name = qdict_get_str(qdict, name); vm_stop(RUN_STATE_RESTORE_VM); -if (load_vmstate(name) == 0 saved_vm_running) { -vm_start(); +if (load_vmstate(name) == 0) { +load_run_state(); } } diff --git a/savevm.c b/savevm.c index a8a53ef..aa631eb 100644 --- a/savevm.c +++ b/savevm.c @@ -2143,6 +2143,7 @@ void do_savevm(Monitor *mon, const QDict *qdict) } saved_vm_running = runstate_is_running(); +save_run_state(); vm_stop(RUN_STATE_SAVE_VM); memset(sn, 0, sizeof(*sn)); diff --git a/vl.c b/vl.c index febd2ea..7991f2e 100644 --- a/vl.c +++ b/vl.c @@ -523,6 +523,7 @@ static int default_driver_check(QemuOpts *opts, void *opaque) /* QEMU state */ static RunState current_run_state = RUN_STATE_PRELAUNCH; +static RunState saved_run_state = RUN_STATE_RUNNING; typedef struct { RunState from; @@ -546,6 +547,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_PAUSED, RUN_STATE_FINISH_MIGRATE }, { RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING }, +{ RUN_STATE_POSTMIGRATE, RUN_STATE_PAUSED }, { RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE }, { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING }, @@ -556,6 +558,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_FINISH_MIGRATE, RUN_STATE_POSTMIGRATE }, { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING }, +{ RUN_STATE_RESTORE_VM, RUN_STATE_PAUSED }, { RUN_STATE_RUNNING, RUN_STATE_DEBUG }, { RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR }, @@ -585,11 +588,39 @@ static const RunStateTransition runstate_transitions_def[] = { static bool runstate_valid_transitions[RUN_STATE_MAX][RUN_STATE_MAX]; +void save_run_state(void) +{ +saved_run_state = current_run_state; +} + +void load_run_state(void) +{ +if (saved_run_state == RUN_STATE_RUNNING) { +vm_start(); +} else if (!runstate_check(saved_run_state)) { +
buildbot failure in qemu-kvm on disable_kvm_i386_out_of_tree
The Buildbot has detected a new failure on builder disable_kvm_i386_out_of_tree while building qemu-kvm. Full details are available at: http://buildbot.b1-systems.de/qemu-kvm/builders/disable_kvm_i386_out_of_tree/builds/1431 Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/ Buildslave for this Build: b1_qemu_kvm_2 Build Reason: The Nightly scheduler named 'nightly_disable_kvm' triggered this build Build Source Stamp: [branch master] HEAD Blamelist: BUILD FAILED: failed compile sincerely, -The Buildbot
buildbot failure in qemu-kvm on disable_kvm_x86_64_out_of_tree
The Buildbot has detected a new failure on builder disable_kvm_x86_64_out_of_tree while building qemu-kvm. Full details are available at: http://buildbot.b1-systems.de/qemu-kvm/builders/disable_kvm_x86_64_out_of_tree/builds/1431 Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/ Buildslave for this Build: b1_qemu_kvm_1 Build Reason: The Nightly scheduler named 'nightly_disable_kvm' triggered this build Build Source Stamp: [branch master] HEAD Blamelist: BUILD FAILED: failed compile sincerely, -The Buildbot
buildbot failure in qemu-kvm on disable_kvm_x86_64_debian_5_0
The Buildbot has detected a new failure on builder disable_kvm_x86_64_debian_5_0 while building qemu-kvm. Full details are available at: http://buildbot.b1-systems.de/qemu-kvm/builders/disable_kvm_x86_64_debian_5_0/builds/1482 Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/ Buildslave for this Build: b1_qemu_kvm_1 Build Reason: The Nightly scheduler named 'nightly_disable_kvm' triggered this build Build Source Stamp: [branch master] HEAD Blamelist: BUILD FAILED: failed compile sincerely, -The Buildbot N�r��yb�X��ǧv�^�){.n�+h����ܨ}���Ơz�j:+v���zZ+��+zf���h���~i���z��w���?��)ߢf
buildbot failure in qemu-kvm on disable_kvm_i386_debian_5_0
The Buildbot has detected a new failure on builder disable_kvm_i386_debian_5_0 while building qemu-kvm. Full details are available at: http://buildbot.b1-systems.de/qemu-kvm/builders/disable_kvm_i386_debian_5_0/builds/1483 Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/ Buildslave for this Build: b1_qemu_kvm_2 Build Reason: The Nightly scheduler named 'nightly_disable_kvm' triggered this build Build Source Stamp: [branch master] HEAD Blamelist: BUILD FAILED: failed compile sincerely, -The Buildbot
Re: [PATCH v13 4/8] add a new runstate: RUN_STATE_GUEST_PANICKED
On Mon, Mar 04, 2013 at 10:40:15AM +0100, Paolo Bonzini wrote: Il 28/02/2013 13:13, Hu Tao ha scritto: The guest will be in this state when it is panicked. Signed-off-by: Wen Congyang we...@cn.fujitsu.com Signed-off-by: Hu Tao hu...@cn.fujitsu.com --- migration.c | 1 + qapi-schema.json | 6 +- qmp.c| 3 ++- vl.c | 11 ++- 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/migration.c b/migration.c index c29830e..fa17b82 100644 --- a/migration.c +++ b/migration.c @@ -698,6 +698,7 @@ static void *buffered_file_thread(void *opaque) int64_t start_time, end_time; DPRINTF(done iterating\n); +save_run_state(); start_time = qemu_get_clock_ms(rt_clock); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); if (old_vm_running) { diff --git a/qapi-schema.json b/qapi-schema.json index 28b070f..8f1d138 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -174,11 +174,15 @@ # @suspended: guest is suspended (ACPI S3) # # @watchdog: the watchdog action is configured to pause and has been triggered +# +# @guest-panicked: the panicked action is configured to pause and has been +# triggered. ## { 'enum': 'RunState', 'data': [ 'debug', 'inmigrate', 'internal-error', 'io-error', 'paused', 'postmigrate', 'prelaunch', 'finish-migrate', 'restore-vm', -'running', 'save-vm', 'shutdown', 'suspended', 'watchdog' ] } +'running', 'save-vm', 'shutdown', 'suspended', 'watchdog', +'guest-panicked' ] } ## # @SnapshotInfo diff --git a/qmp.c b/qmp.c index 5f1bed1..f5027f6 100644 --- a/qmp.c +++ b/qmp.c @@ -150,7 +150,8 @@ void qmp_cont(Error **errp) Error *local_err = NULL; if (runstate_check(RUN_STATE_INTERNAL_ERROR) || - runstate_check(RUN_STATE_SHUTDOWN)) { +runstate_check(RUN_STATE_SHUTDOWN) || +runstate_check(RUN_STATE_GUEST_PANICKED)) { error_set(errp, QERR_RESET_REQUIRED); return; } else if (runstate_check(RUN_STATE_SUSPENDED)) { diff --git a/vl.c b/vl.c index 3d08e1a..51d4922 100644 --- a/vl.c +++ b/vl.c @@ -536,6 +536,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING }, { RUN_STATE_INMIGRATE, RUN_STATE_PAUSED }, +{ RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED }, Is this a consequence of the first patch? Yes. { RUN_STATE_INTERNAL_ERROR, RUN_STATE_RUNNING }, { RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE }, @@ -549,6 +550,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING }, { RUN_STATE_POSTMIGRATE, RUN_STATE_PAUSED }, { RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE }, +{ RUN_STATE_POSTMIGRATE, RUN_STATE_GUEST_PANICKED }, Impossible. GUEST_PANICKED requires an instruction to be executed in the guest, so it should first go to RUNNING. { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING }, { RUN_STATE_PRELAUNCH, RUN_STATE_FINISH_MIGRATE }, @@ -559,6 +561,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING }, { RUN_STATE_RESTORE_VM, RUN_STATE_PAUSED }, +{ RUN_STATE_RESTORE_VM, RUN_STATE_GUEST_PANICKED }, Is it also for the first patch? Yes. { RUN_STATE_RUNNING, RUN_STATE_DEBUG }, { RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR }, @@ -569,6 +572,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_RUNNING, RUN_STATE_SAVE_VM }, { RUN_STATE_RUNNING, RUN_STATE_SHUTDOWN }, { RUN_STATE_RUNNING, RUN_STATE_WATCHDOG }, +{ RUN_STATE_RUNNING, RUN_STATE_GUEST_PANICKED }, This one is obviously ok. { RUN_STATE_SAVE_VM, RUN_STATE_RUNNING }, @@ -583,6 +587,10 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_WATCHDOG, RUN_STATE_RUNNING }, { RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE }, +{ RUN_STATE_GUEST_PANICKED, RUN_STATE_RUNNING }, +{ RUN_STATE_GUEST_PANICKED, RUN_STATE_PAUSED }, +{ RUN_STATE_GUEST_PANICKED, RUN_STATE_FINISH_MIGRATE }, Like SHUTDOWN, it should go first to PAUSED and then to RUNNING. A GUEST_PANICKED - RUNNING transition is not possible. You're seeing it because you lack the addition of GUEST_PANICKED here: if (runstate_check(RUN_STATE_INTERNAL_ERROR) || runstate_check(RUN_STATE_SHUTDOWN)) { runstate_set(RUN_STATE_PAUSED); } I think you should first move the INTERNAL_ERROR || SHUTDOWN checks to a separate function, so that you can then add GUEST_PANICKED. Will if
[PATCH] ARM: KVM: add support for minimal host vs guest profiling
In order to be able to correctly profile what is happening on the host, we need to be able to identify when we're running on the guest, and log these events differently. Perf offers a simple way to register callbacks into KVM. Mimic what x86 does and enjoy being able to profile your KVM host. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- This patch is on top of the pre-arm64 rework series posted earlier. arch/arm/include/asm/kvm_host.h | 3 ++ arch/arm/kvm/Makefile | 2 +- arch/arm/kvm/arm.c | 4 +++ arch/arm/kvm/perf.c | 68 + 4 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 arch/arm/kvm/perf.c diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 0c4e643..78813b8 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -207,4 +207,7 @@ static inline void __cpu_init_hyp_mode(unsigned long long pgd_ptr, kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr); } +int kvm_perf_init(void); +int kvm_perf_teardown(void); + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 8dc5e76..53c5ed8 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -18,6 +18,6 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o -obj-y += coproc.o coproc_a15.o mmio.o psci.o +obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index c10a45f..e821c37 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -928,7 +928,10 @@ static int init_hyp_mode(void) if (err) goto out_free_mappings; + kvm_perf_init(); + kvm_info(Hyp mode initialized successfully\n); + return 0; out_free_vfp: free_percpu(kvm_host_vfp_state); @@ -972,6 +975,7 @@ out_err: /* NOP: Compiling as a module not supported */ void kvm_arch_exit(void) { + kvm_perf_teardown(); } static int arm_init(void) diff --git a/arch/arm/kvm/perf.c b/arch/arm/kvm/perf.c new file mode 100644 index 000..1a3849d --- /dev/null +++ b/arch/arm/kvm/perf.c @@ -0,0 +1,68 @@ +/* + * Based on the x86 implementation. + * + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier marc.zyng...@arm.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#include linux/perf_event.h +#include linux/kvm_host.h + +#include asm/kvm_emulate.h + +static int kvm_is_in_guest(void) +{ +return kvm_arm_get_running_vcpu() != NULL; +} + +static int kvm_is_user_mode(void) +{ + struct kvm_vcpu *vcpu; + + vcpu = kvm_arm_get_running_vcpu(); + + if (vcpu) + return !vcpu_mode_priv(vcpu); + + return 0; +} + +static unsigned long kvm_get_guest_ip(void) +{ + struct kvm_vcpu *vcpu; + + vcpu = kvm_arm_get_running_vcpu(); + + if (vcpu) + return *vcpu_pc(vcpu); + + return 0; +} + +static struct perf_guest_info_callbacks kvm_guest_cbs = { + .is_in_guest= kvm_is_in_guest, + .is_user_mode = kvm_is_user_mode, + .get_guest_ip = kvm_get_guest_ip, +}; + +int kvm_perf_init(void) +{ + return perf_register_guest_info_callbacks(kvm_guest_cbs); +} + +int kvm_perf_teardown(void) +{ + return perf_unregister_guest_info_callbacks(kvm_guest_cbs); +} -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 28/29] ARM: KVM: change kvm_tlb_flush_vmid to kvm_tlb_flush_vmid_ipa
v8 is capable of invalidating Stage-2 by IPA, but v7 is not. Change kvm_tlb_flush_vmid() to take an IPA parameter, which is then ignored by the invalidation code (and nuke the whole TLB as it always did). This allows v8 to implement a more optimized strategy. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/include/asm/kvm_asm.h | 2 +- arch/arm/kvm/interrupts.S | 9 ++--- arch/arm/kvm/mmu.c | 8 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index e4956f4..18d5032 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -75,7 +75,7 @@ extern char __kvm_hyp_code_end[]; extern void __kvm_tlb_flush_vmid(struct kvm *kvm); extern void __kvm_flush_vm_context(void); -extern void __kvm_tlb_flush_vmid(struct kvm *kvm); +extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); #endif diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 8ca87ab..941aabc 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -35,15 +35,18 @@ __kvm_hyp_code_start: / * Flush per-VMID TLBs * - * void __kvm_tlb_flush_vmid(struct kvm *kvm); + * void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); * * We rely on the hardware to broadcast the TLB invalidation to all CPUs * inside the inner-shareable domain (which is the case for all v7 * implementations). If we come across a non-IS SMP implementation, we'll * have to use an IPI based mechanism. Until then, we stick to the simple * hardware assisted version. + * + * As v7 does not support flushing per IPA, just nuke the whole TLB + * instead, ignoring the ipa value. */ -ENTRY(__kvm_tlb_flush_vmid) +ENTRY(__kvm_tlb_flush_vmid_ipa) push{r2, r3} add r0, r0, #KVM_VTTBR @@ -60,7 +63,7 @@ ENTRY(__kvm_tlb_flush_vmid) pop {r2, r3} bx lr -ENDPROC(__kvm_tlb_flush_vmid) +ENDPROC(__kvm_tlb_flush_vmid_ipa) / * Flush TLBs and instruction caches of all CPUs inside the inner-shareable diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index b694f58..2f12e40 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -34,9 +34,9 @@ extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; static DEFINE_MUTEX(kvm_hyp_pgd_mutex); -static void kvm_tlb_flush_vmid(struct kvm *kvm) +static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { - kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); } static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, @@ -457,7 +457,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, old_pte = *pte; kvm_set_pte(pte, *new_pte); if (pte_present(old_pte)) - kvm_tlb_flush_vmid(kvm); + kvm_tlb_flush_vmid_ipa(kvm, addr); else get_page(virt_to_page(pte)); @@ -674,7 +674,7 @@ static void handle_hva_to_gpa(struct kvm *kvm, static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) { unmap_stage2_range(kvm, gpa, PAGE_SIZE); - kvm_tlb_flush_vmid(kvm); + kvm_tlb_flush_vmid_ipa(kvm, gpa); } int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 25/29] ARM: KVM: fix fault_ipa computing
The ARM ARM says that HPFAR reports bits [39:12] of the faulting IPA, and we need to complement it with the bottom 12 bits of the faulting VA. This is always 12 bits, irrespective of the page size. Makes it clearer in the code. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/kvm/mmu.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 71d15bc..f601e6f 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -614,8 +614,13 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) goto out_unlock; } - /* Adjust page offset */ - fault_ipa |= kvm_vcpu_get_hfar(vcpu) ~PAGE_MASK; + /* +* The IPA is reported as [MAX:12], so we need to +* complement it with the bottom 12 bits from the +* faulting VA. This is always 12 bits, irrespective +* of the page size. +*/ + fault_ipa |= kvm_vcpu_get_hfar(vcpu) ((1 12) - 1); ret = io_mem_abort(vcpu, run, fault_ipa); goto out_unlock; } -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 21/29] ARM: KVM: allow HYP mappings to be at an offset from kernel mappings
arm64 cannot represent the kernel VAs in HYP mode, because of the lack of TTBR1 at EL2. A way to cope with this situation is to have HYP VAs to be an offset from the kernel VAs. Introduce macros to convert a kernel VA to a HYP VA, make the HYP mapping functions use these conversion macros. Also change the documentation to reflect the existence of the offset. On ARM, where we can have an identity mapping between kernel and HYP, the macros are without any effect. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/include/asm/kvm_mmu.h | 8 arch/arm/kvm/mmu.c | 43 ++ 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index ac78493..3c71a1d 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -22,6 +22,14 @@ #include asm/cacheflush.h #include asm/pgalloc.h +/* + * We directly use the kernel VA for the HYP, as we can directly share + * the mapping (HTTBR covers TTBR1). + */ +#define HYP_PAGE_OFFSET_MASK (~0UL) +#define HYP_PAGE_OFFSETPAGE_OFFSET +#define KERN_TO_HYP(kva) (kva) + int create_hyp_mappings(void *from, void *to); int create_hyp_io_mappings(void *from, void *to, phys_addr_t); void free_hyp_pmds(void); diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 6b4ea18..ead6b16 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -101,14 +101,15 @@ void free_hyp_pmds(void) mutex_lock(kvm_hyp_pgd_mutex); for (addr = PAGE_OFFSET; addr != 0; addr += PGDIR_SIZE) { - pgd = hyp_pgd + pgd_index(addr); - pud = pud_offset(pgd, addr); + unsigned long hyp_addr = KERN_TO_HYP(addr); + pgd = hyp_pgd + pgd_index(hyp_addr); + pud = pud_offset(pgd, hyp_addr); if (pud_none(*pud)) continue; BUG_ON(pud_bad(*pud)); - pmd = pmd_offset(pud, addr); + pmd = pmd_offset(pud, hyp_addr); free_ptes(pmd, addr); pmd_free(NULL, pmd); pud_clear(pud); @@ -124,7 +125,9 @@ static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, struct page *page; for (addr = start PAGE_MASK; addr end; addr += PAGE_SIZE) { - pte = pte_offset_kernel(pmd, addr); + unsigned long hyp_addr = KERN_TO_HYP(addr); + + pte = pte_offset_kernel(pmd, hyp_addr); BUG_ON(!virt_addr_valid(addr)); page = virt_to_page(addr); kvm_set_pte(pte, mk_pte(page, PAGE_HYP)); @@ -139,7 +142,9 @@ static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start, unsigned long addr; for (addr = start PAGE_MASK; addr end; addr += PAGE_SIZE) { - pte = pte_offset_kernel(pmd, addr); + unsigned long hyp_addr = KERN_TO_HYP(addr); + + pte = pte_offset_kernel(pmd, hyp_addr); BUG_ON(pfn_valid(*pfn_base)); kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE)); (*pfn_base)++; @@ -154,12 +159,13 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, unsigned long addr, next; for (addr = start; addr end; addr = next) { - pmd = pmd_offset(pud, addr); + unsigned long hyp_addr = KERN_TO_HYP(addr); + pmd = pmd_offset(pud, hyp_addr); BUG_ON(pmd_sect(*pmd)); if (pmd_none(*pmd)) { - pte = pte_alloc_one_kernel(NULL, addr); + pte = pte_alloc_one_kernel(NULL, hyp_addr); if (!pte) { kvm_err(Cannot allocate Hyp pte\n); return -ENOMEM; @@ -200,11 +206,12 @@ static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base) mutex_lock(kvm_hyp_pgd_mutex); for (addr = start; addr end; addr = next) { - pgd = hyp_pgd + pgd_index(addr); - pud = pud_offset(pgd, addr); + unsigned long hyp_addr = KERN_TO_HYP(addr); + pgd = hyp_pgd + pgd_index(hyp_addr); + pud = pud_offset(pgd, hyp_addr); if (pud_none_or_clear_bad(pud)) { - pmd = pmd_alloc_one(NULL, addr); + pmd = pmd_alloc_one(NULL, hyp_addr); if (!pmd) { kvm_err(Cannot allocate Hyp pmd\n); err = -ENOMEM; @@ -224,12 +231,13 @@ out: } /** - * create_hyp_mappings - map a kernel virtual address range in Hyp mode + * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode * @from: The virtual kernel start address of the range * @to:The virtual kernel end address
[PATCH 18/29] ARM: KVM: remove superfluous include from kvm_vgic.h
Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/include/asm/kvm_vgic.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h index ab97207..343744e 100644 --- a/arch/arm/include/asm/kvm_vgic.h +++ b/arch/arm/include/asm/kvm_vgic.h @@ -21,7 +21,6 @@ #include linux/kernel.h #include linux/kvm.h -#include linux/kvm_host.h #include linux/irqreturn.h #include linux/spinlock.h #include linux/types.h -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 17/29] ARM: KVM: abstract most MMU operations
Move low level MMU-related operations to kvm_mmu.h. This makes the MMU code reusable by the arm64 port. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/include/asm/kvm_mmu.h | 58 ++ arch/arm/kvm/mmu.c | 58 +- 2 files changed, 70 insertions(+), 46 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 421a20b..ac78493 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -19,6 +19,9 @@ #ifndef __ARM_KVM_MMU_H__ #define __ARM_KVM_MMU_H__ +#include asm/cacheflush.h +#include asm/pgalloc.h + int create_hyp_mappings(void *from, void *to); int create_hyp_io_mappings(void *from, void *to, phys_addr_t); void free_hyp_pmds(void); @@ -36,6 +39,16 @@ phys_addr_t kvm_mmu_get_httbr(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); +static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) +{ + pte_val(*pte) = new_pte; + /* +* flush_pmd_entry just takes a void pointer and cleans the necessary +* cache entries, so we can reuse the function for ptes. +*/ + flush_pmd_entry(pte); +} + static inline bool kvm_is_write_fault(unsigned long hsr) { unsigned long hsr_ec = hsr HSR_EC_SHIFT; @@ -47,4 +60,49 @@ static inline bool kvm_is_write_fault(unsigned long hsr) return true; } +static inline void kvm_clean_pgd(pgd_t *pgd) +{ + clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); +} + +static inline void kvm_clean_pmd_entry(pmd_t *pmd) +{ + clean_pmd_entry(pmd); +} + +static inline void kvm_clean_pte(pte_t *pte) +{ + clean_pte_table(pte); +} + +static inline void kvm_set_s2pte_writable(pte_t *pte) +{ + pte_val(*pte) |= L_PTE_S2_RDWR; +} + +struct kvm; + +static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) +{ + /* +* If we are going to insert an instruction page and the icache is +* either VIPT or PIPT, there is a potential problem where the host +* (or another VM) may have used the same page as this guest, and we +* read incorrect data from the icache. If we're using a PIPT cache, +* we can invalidate just that page, but if we are using a VIPT cache +* we need to invalidate the entire icache - damn shame - as written +* in the ARM ARM (DDI 0406C.b - Page B3-1393). +* +* VIVT caches are tagged using both the ASID and the VMID and doesn't +* need any kind of flushing (DDI 0406C.b - Page B3-1392). +*/ + if (icache_is_pipt()) { + unsigned long hva = gfn_to_hva(kvm, gfn); + __cpuc_coherent_user_range(hva, hva + PAGE_SIZE); + } else if (!icache_is_vivt_asid_tagged()) { + /* any kind of VIPT cache */ + __flush_icache_all(); + } +} + #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 8e9047a..6b4ea18 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -28,8 +28,6 @@ #include asm/kvm_mmio.h #include asm/kvm_asm.h #include asm/kvm_emulate.h -#include asm/mach/map.h -#include trace/events/kvm.h #include trace.h @@ -42,16 +40,6 @@ static void kvm_tlb_flush_vmid(struct kvm *kvm) kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); } -static void kvm_set_pte(pte_t *pte, pte_t new_pte) -{ - pte_val(*pte) = new_pte; - /* -* flush_pmd_entry just takes a void pointer and cleans the necessary -* cache entries, so we can reuse the function for ptes. -*/ - flush_pmd_entry(pte); -} - static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, int min, int max) { @@ -290,7 +278,7 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) VM_BUG_ON((unsigned long)pgd (S2_PGD_SIZE - 1)); memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); - clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); + kvm_clean_pgd(pgd); kvm-arch.pgd = pgd; return 0; @@ -422,22 +410,22 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, return 0; /* ignore calls from kvm_set_spte_hva */ pmd = mmu_memory_cache_alloc(cache); pud_populate(NULL, pud, pmd); - pmd += pmd_index(addr); get_page(virt_to_page(pud)); - } else - pmd = pmd_offset(pud, addr); + } + + pmd = pmd_offset(pud, addr); /* Create 2nd stage page table mapping - Level 2 */ if (pmd_none(*pmd)) { if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ pte = mmu_memory_cache_alloc(cache); - clean_pte_table(pte); + kvm_clean_pte(pte); pmd_populate_kernel(NULL, pmd, pte); -
[PATCH 09/29] ARM: KVM: abstract SAS decoding away
Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/include/asm/kvm_emulate.h | 15 +++ arch/arm/kvm/mmio.c| 17 +++-- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index c27d9c9..2cbb694 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -120,4 +120,19 @@ static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu) return (kvm_vcpu_get_hsr(vcpu) 7) 1; } +static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu) +{ + switch ((kvm_vcpu_get_hsr(vcpu) 22) 0x3) { + case 0: + return 1; + case 1: + return 2; + case 2: + return 4; + default: + kvm_err(Hardware is weird: SAS 0b11 is reserved\n); + return -EFAULT; + } +} + #endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 6495c1c..cd33a7e 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -77,20 +77,9 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return 1; } - switch ((kvm_vcpu_get_hsr(vcpu) 22) 0x3) { - case 0: - len = 1; - break; - case 1: - len = 2; - break; - case 2: - len = 4; - break; - default: - kvm_err(Hardware is weird: SAS 0b11 is reserved\n); - return -EFAULT; - } + len = kvm_vcpu_dabt_get_as(vcpu); + if (len 0) + return len; /* Unlikely... */ is_write = kvm_vcpu_dabt_iswrite(vcpu); sign_extend = kvm_vcpu_dabt_issext(vcpu); -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 03/29] ARM: KVM: abstract HSR_ISV away
Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/include/asm/kvm_emulate.h | 5 + arch/arm/kvm/mmio.c| 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 3c01988..9531535 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -90,4 +90,9 @@ static inline unsigned long kvm_vcpu_get_hyp_pc(struct kvm_vcpu *vcpu) return vcpu-arch.fault.hyp_pc; } +static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) HSR_ISV); +} + #endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index ce63f39..41f96e9 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -130,7 +130,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, * space do its magic. */ - if (kvm_vcpu_get_hsr(vcpu) HSR_ISV) { + if (kvm_vcpu_dabt_isvalid(vcpu)) { ret = decode_hsr(vcpu, fault_ipa, mmio); if (ret) return ret; -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 15/29] ARM: KVM: move exit handler selection to a separate file
The exit handler selection code cannot be shared with arm64 (two different modes, more exception classes...). Move it to a separate file (handle_exit.c). Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/include/asm/kvm_host.h | 3 + arch/arm/kvm/Makefile | 2 +- arch/arm/kvm/arm.c | 113 arch/arm/kvm/handle_exit.c | 140 4 files changed, 144 insertions(+), 114 deletions(-) create mode 100644 arch/arm/kvm/handle_exit.c diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index eb836e6..24f457a 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -183,4 +183,7 @@ struct kvm_one_reg; int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, + int exception_index); + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index fc96ce6..8dc5e76 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -17,7 +17,7 @@ AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) obj-y += kvm-arm.o init.o interrupts.o -obj-y += arm.o guest.o mmu.o emulate.o reset.o +obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o mmio.o psci.o obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 93aaba6..de783ee 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -30,7 +30,6 @@ #define CREATE_TRACE_POINTS #include trace.h -#include asm/unified.h #include asm/uaccess.h #include asm/ptrace.h #include asm/mman.h @@ -480,118 +479,6 @@ static void update_vttbr(struct kvm *kvm) spin_unlock(kvm_vmid_lock); } -static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - /* SVC called from Hyp mode should never get here */ - kvm_debug(SVC called from Hyp mode shouldn't go here\n); - BUG(); - return -EINVAL; /* Squash warning */ -} - -static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), - vcpu-arch.hsr HSR_HVC_IMM_MASK); - - if (kvm_psci_call(vcpu)) - return 1; - - kvm_inject_undefined(vcpu); - return 1; -} - -static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - if (kvm_psci_call(vcpu)) - return 1; - - kvm_inject_undefined(vcpu); - return 1; -} - -static int handle_pabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - /* The hypervisor should never cause aborts */ - kvm_err(Prefetch Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n, - kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu)); - return -EFAULT; -} - -static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - /* This is either an error in the ws. code or an external abort */ - kvm_err(Data Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n, - kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu)); - return -EFAULT; -} - -typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); -static exit_handle_fn arm_exit_handlers[] = { - [HSR_EC_WFI]= kvm_handle_wfi, - [HSR_EC_CP15_32]= kvm_handle_cp15_32, - [HSR_EC_CP15_64]= kvm_handle_cp15_64, - [HSR_EC_CP14_MR]= kvm_handle_cp14_access, - [HSR_EC_CP14_LS]= kvm_handle_cp14_load_store, - [HSR_EC_CP14_64]= kvm_handle_cp14_access, - [HSR_EC_CP_0_13]= kvm_handle_cp_0_13_access, - [HSR_EC_CP10_ID]= kvm_handle_cp10_id, - [HSR_EC_SVC_HYP]= handle_svc_hyp, - [HSR_EC_HVC]= handle_hvc, - [HSR_EC_SMC]= handle_smc, - [HSR_EC_IABT] = kvm_handle_guest_abort, - [HSR_EC_IABT_HYP] = handle_pabt_hyp, - [HSR_EC_DABT] = kvm_handle_guest_abort, - [HSR_EC_DABT_HYP] = handle_dabt_hyp, -}; - -/* - * Return 0 to return to guest, 0 on error, 0 (and set exit_reason) on - * proper exit to QEMU. - */ -static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, - int exception_index) -{ - unsigned long hsr_ec; - - switch (exception_index) { - case ARM_EXCEPTION_IRQ: - return 1; - case ARM_EXCEPTION_UNDEFINED: - kvm_err(Undefined exception in Hyp mode at: %#08lx\n, - kvm_vcpu_get_hyp_pc(vcpu)); - BUG(); - panic(KVM: Hypervisor undefined exception!\n); - case ARM_EXCEPTION_DATA_ABORT: - case
[PATCH 10/29] ARM: KVM: abstract IL decoding away
Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/include/asm/kvm_emulate.h | 6 ++ arch/arm/kvm/arm.c | 3 +-- arch/arm/kvm/coproc.c | 2 +- arch/arm/kvm/mmio.c| 2 +- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 2cbb694..bb1a25c 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -135,4 +135,10 @@ static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu) } } +/* This one is not specific to Data Abort */ +static inline bool kvm_vcpu_trap_il_is32bit(struct kvm_vcpu *vcpu) +{ + return !!((kvm_vcpu_get_hsr(vcpu) 25) 1); +} + #endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 6ba934b..e49a687 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -624,8 +624,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, * that fail their condition code check */ if (!kvm_condition_valid(vcpu)) { - bool is_wide = kvm_vcpu_get_hsr(vcpu) HSR_IL; - kvm_skip_instr(vcpu, is_wide); + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); return 1; } diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index b305916..94eee8b 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -293,7 +293,7 @@ static int emulate_cp15(struct kvm_vcpu *vcpu, if (likely(r-access(vcpu, params, r))) { /* Skip instruction, since it was emulated */ - kvm_skip_instr(vcpu, (kvm_vcpu_get_hsr(vcpu) 25) 1); + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); return 1; } /* If access function fails, it should complain. */ diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index cd33a7e..9bd471a 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -101,7 +101,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * The MMIO instruction is emulated and should not be re-executed * in the guest. */ - kvm_skip_instr(vcpu, (kvm_vcpu_get_hsr(vcpu) 25) 1); + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); return 0; } -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 24/29] ARM: KVM: move kvm_target_cpu to guest.c
guest.c already contains some target-specific checks. Let's move kvm_target_cpu() over there so arm.c is mostly target agnostic. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/kvm/arm.c | 17 - arch/arm/kvm/guest.c | 17 + 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index f053049..c10a45f 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -33,7 +33,6 @@ #include asm/uaccess.h #include asm/ptrace.h #include asm/mman.h -#include asm/cputype.h #include asm/tlbflush.h #include asm/cacheflush.h #include asm/virt.h @@ -301,22 +300,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return 0; } -int __attribute_const__ kvm_target_cpu(void) -{ - unsigned long implementor = read_cpuid_implementor(); - unsigned long part_number = read_cpuid_part_number(); - - if (implementor != ARM_CPU_IMP_ARM) - return -EINVAL; - - switch (part_number) { - case ARM_CPU_PART_CORTEX_A15: - return KVM_ARM_TARGET_CORTEX_A15; - default: - return -EINVAL; - } -} - int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { int ret; diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 2339d96..152d036 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -22,6 +22,7 @@ #include linux/module.h #include linux/vmalloc.h #include linux/fs.h +#include asm/cputype.h #include asm/uaccess.h #include asm/kvm.h #include asm/kvm_asm.h @@ -180,6 +181,22 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return -EINVAL; } +int __attribute_const__ kvm_target_cpu(void) +{ + unsigned long implementor = read_cpuid_implementor(); + unsigned long part_number = read_cpuid_part_number(); + + if (implementor != ARM_CPU_IMP_ARM) + return -EINVAL; + + switch (part_number) { + case ARM_CPU_PART_CORTEX_A15: + return KVM_ARM_TARGET_CORTEX_A15; + default: + return -EINVAL; + } +} + int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init) { -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 29/29] ARM: KVM: Fix length of mmio access
Instead of hardcoding the maximum MMIO access to be 4 bytes, compare it to sizeof(unsigned long), which will do the right thing on both 32 and 64bit systems. Same thing for sign extention. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/kvm/mmio.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 9bd471a..bd1bbcd 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -39,10 +39,10 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) if (!run-mmio.is_write) { dest = vcpu_reg(vcpu, vcpu-arch.mmio_decode.rt); - memset(dest, 0, sizeof(int)); + *dest = 0; len = run-mmio.len; - if (len 4) + if (len sizeof(unsigned long)) return -EINVAL; memcpy(dest, run-mmio.data, len); @@ -50,7 +50,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run-mmio.phys_addr, *((u64 *)run-mmio.data)); - if (vcpu-arch.mmio_decode.sign_extend len 4) { + if (vcpu-arch.mmio_decode.sign_extend + len sizeof(unsigned long)) { mask = 1U ((len * 8) - 1); *dest = (*dest ^ mask) - mask; } -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 22/29] ARM: KVM: fix address validation for HYP mappings
__create_hyp_mappings() performs some kind of address validation before creating the mapping, by verifying that the start address is above PAGE_OFFSET. This check is not completely correct for kernel memory (the upper boundary has to be checked as well so we do not end up with highmem pages), and wrong for IO mappings (the mapping must exist in the vmalloc region). Fix this by using the proper predicates (virt_addr_valid and is_vmalloc_addr), which also work correctly on ARM64 (where the vmalloc region is below PAGE_OFFSET). Also change the BUG_ON() into a less agressive error return. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/kvm/mmu.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index ead6b16..ec14269 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -200,8 +200,13 @@ static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base) unsigned long addr, next; int err = 0; - BUG_ON(start end); - if (start PAGE_OFFSET) + if (start = end) + return -EINVAL; + /* Check for a valid kernel memory mapping */ + if (!pfn_base (!virt_addr_valid(from) || !virt_addr_valid(to - 1))) + return -EINVAL; + /* Check for a valid kernel IO mapping */ + if (pfn_base (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))) return -EINVAL; mutex_lock(kvm_hyp_pgd_mutex); -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 19/29] ARM: KVM: move hyp init to kvm_host.h
Make the split of the pgd_ptr an implementation specific thing by moving the init call to an inline function. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/include/asm/kvm_host.h | 19 +++ arch/arm/kvm/arm.c | 12 +--- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 24f457a..f00a557 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -186,4 +186,23 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); +static inline void __cpu_init_hyp_mode(unsigned long long pgd_ptr, + unsigned long hyp_stack_ptr, + unsigned long vector_ptr) +{ + unsigned long pgd_low, pgd_high; + + pgd_low = (pgd_ptr ((1ULL 32) - 1)); + pgd_high = (pgd_ptr 32ULL); + + /* +* Call initialization code, and switch to the full blown +* HYP code. The init code doesn't need to preserve these registers as +* r1-r3 and r12 are already callee save according to the AAPCS. +* Note that we slightly misuse the prototype by casing the pgd_low to +* a void *. +*/ + kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr); +} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index de783ee..3c7c50a 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -813,7 +813,6 @@ long kvm_arch_vm_ioctl(struct file *filp, static void cpu_init_hyp_mode(void *vector) { unsigned long long pgd_ptr; - unsigned long pgd_low, pgd_high; unsigned long hyp_stack_ptr; unsigned long stack_page; unsigned long vector_ptr; @@ -822,20 +821,11 @@ static void cpu_init_hyp_mode(void *vector) __hyp_set_vectors((unsigned long)vector); pgd_ptr = (unsigned long long)kvm_mmu_get_httbr(); - pgd_low = (pgd_ptr ((1ULL 32) - 1)); - pgd_high = (pgd_ptr 32ULL); stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; vector_ptr = (unsigned long)__kvm_hyp_vector; - /* -* Call initialization code, and switch to the full blown -* HYP code. The init code doesn't need to preserve these registers as -* r1-r3 and r12 are already callee save according to the AAPCS. -* Note that we slightly misuse the prototype by casing the pgd_low to -* a void *. -*/ - kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr); + __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); } /** -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 13/29] ARM: KVM: abstract HSR_EC_IABT away
Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/include/asm/kvm_emulate.h | 5 + arch/arm/kvm/mmu.c | 8 +++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 75e54ba..2f5cc48 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -146,6 +146,11 @@ static inline u8 kvm_vcpu_trap_get_class(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) HSR_EC_SHIFT; } +static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_trap_get_class(vcpu) == HSR_EC_IABT; +} + static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) HSR_FSC_TYPE; diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index e00f28d..8e9047a 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -585,7 +585,6 @@ out_unlock: */ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) { - unsigned long hsr_ec; unsigned long fault_status; phys_addr_t fault_ipa; struct kvm_memory_slot *memslot; @@ -593,8 +592,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) gfn_t gfn; int ret, idx; - hsr_ec = kvm_vcpu_trap_get_class(vcpu); - is_iabt = (hsr_ec == HSR_EC_IABT); + is_iabt = kvm_vcpu_trap_is_iabt(vcpu); fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), @@ -603,8 +601,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) /* Check the stage-2 fault is trans. fault or write fault */ fault_status = kvm_vcpu_trap_get_fault(vcpu); if (fault_status != FSC_FAULT fault_status != FSC_PERM) { - kvm_err(Unsupported fault status: EC=%#lx DFCS=%#lx\n, - hsr_ec, fault_status); + kvm_err(Unsupported fault status: EC=%#x DFCS=%#lx\n, + kvm_vcpu_trap_get_class(vcpu), fault_status); return -EFAULT; } -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 00/29] ARM: KVM: pre-arm64 KVM/arm rework
This patch series is reworking KVM/arm in order to prepare the code to be shared with the upcoming KVM/arm64. Nothing major here, just a lot of accessors, small cleanups and fixes to make the code useable on arm64. This code has been tested on VE-TC2 and arm64 models. As always, comments are most welcome. Marc Zyngier (29): ARM: KVM: convert GP registers from u32 to unsigned long ARM: KVM: abstract fault register accesses ARM: KVM: abstract HSR_ISV away ARM: KVM: abstract HSR_WNR away ARM: KVM: abstract HSR_SSE away ARM: KVM: abstract HSR_SRT_{MASK,SHIFT} away ARM: KVM: abstract external abort detection away ARM: KVM: abstract S1TW abort detection away ARM: KVM: abstract SAS decoding away ARM: KVM: abstract IL decoding away ARM: KVM: abstract exception class decoding away ARM: KVM: abstract fault decoding away ARM: KVM: abstract HSR_EC_IABT away ARM: KVM: move kvm_condition_valid to emulate.c ARM: KVM: move exit handler selection to a separate file ARM: KVM: move kvm_handle_wfi to handle_exit.c ARM: KVM: abstract most MMU operations ARM: KVM: remove superfluous include from kvm_vgic.h ARM: KVM: move hyp init to kvm_host.h ARM: KVM: use kvm_kernel_vfp_t as an abstract type for VFP containers ARM: KVM: allow HYP mappings to be at an offset from kernel mappings ARM: KVM: fix address validation for HYP mappings ARM: KVM: sanitize freeing of HYP page tables ARM: KVM: move kvm_target_cpu to guest.c ARM: KVM: fix fault_ipa computing ARM: KVM: vgic: decouple alignment restriction from page size ARM: KVM: move include of asm/idmap.h to kvm_mmu.h ARM: KVM: change kvm_tlb_flush_vmid to kvm_tlb_flush_vmid_ipa ARM: KVM: Fix length of mmio access arch/arm/include/asm/kvm_asm.h | 2 +- arch/arm/include/asm/kvm_emulate.h | 101 +-- arch/arm/include/asm/kvm_host.h| 42 ++-- arch/arm/include/asm/kvm_mmu.h | 67 + arch/arm/include/asm/kvm_vgic.h| 1 - arch/arm/include/uapi/asm/kvm.h| 12 +-- arch/arm/kernel/asm-offsets.c | 8 +- arch/arm/kvm/Makefile | 2 +- arch/arm/kvm/arm.c | 194 + arch/arm/kvm/coproc.c | 28 +++--- arch/arm/kvm/coproc.h | 4 +- arch/arm/kvm/emulate.c | 75 +- arch/arm/kvm/guest.c | 17 arch/arm/kvm/handle_exit.c | 159 ++ arch/arm/kvm/interrupts.S | 9 +- arch/arm/kvm/mmio.c| 46 - arch/arm/kvm/mmu.c | 184 +-- arch/arm/kvm/vgic.c| 2 +- 18 files changed, 568 insertions(+), 385 deletions(-) create mode 100644 arch/arm/kvm/handle_exit.c -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 06/29] ARM: KVM: abstract HSR_SRT_{MASK,SHIFT} away
Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/include/asm/kvm_emulate.h | 5 + arch/arm/kvm/mmio.c| 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 2a077bc..603f5ea 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -105,4 +105,9 @@ static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) HSR_SSE); } +static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu) +{ + return (kvm_vcpu_get_hsr(vcpu) HSR_SRT_MASK) HSR_SRT_SHIFT; +} + #endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 058029c..586063d 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -94,7 +94,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, is_write = kvm_vcpu_dabt_iswrite(vcpu); sign_extend = kvm_vcpu_dabt_issext(vcpu); - rt = (kvm_vcpu_get_hsr(vcpu) HSR_SRT_MASK) HSR_SRT_SHIFT; + rt = kvm_vcpu_dabt_get_rd(vcpu); if (kvm_vcpu_reg_is_pc(vcpu, rt)) { /* IO memory trying to read/write pc */ -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 27/29] ARM: KVM: move include of asm/idmap.h to kvm_mmu.h
Since the arm64 code doesn't have a global asm/idmap.h file, move the inclusion to asm/kvm_mmu.h. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/include/asm/kvm_mmu.h | 1 + arch/arm/kvm/mmu.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 3c71a1d..970f3b5 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -21,6 +21,7 @@ #include asm/cacheflush.h #include asm/pgalloc.h +#include asm/idmap.h /* * We directly use the kernel VA for the HYP, as we can directly share diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index f601e6f..b694f58 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -20,7 +20,6 @@ #include linux/kvm_host.h #include linux/io.h #include trace/events/kvm.h -#include asm/idmap.h #include asm/pgalloc.h #include asm/cacheflush.h #include asm/kvm_arm.h -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 23/29] ARM: KVM: sanitize freeing of HYP page tables
Instead of trying to free everything from PAGE_OFFSET to the top of memory, use the virt_addr_valid macro to check the upper limit. Also do the same for the vmalloc region where the IO mappings are allocated. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/kvm/mmu.c | 44 ++-- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index ec14269..71d15bc 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -86,34 +86,42 @@ static void free_ptes(pmd_t *pmd, unsigned long addr) } } +static void free_hyp_pgd_entry(unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + unsigned long hyp_addr = KERN_TO_HYP(addr); + + pgd = hyp_pgd + pgd_index(hyp_addr); + pud = pud_offset(pgd, hyp_addr); + + if (pud_none(*pud)) + return; + BUG_ON(pud_bad(*pud)); + + pmd = pmd_offset(pud, hyp_addr); + free_ptes(pmd, addr); + pmd_free(NULL, pmd); + pud_clear(pud); +} + /** * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables * * Assumes this is a page table used strictly in Hyp-mode and therefore contains - * only mappings in the kernel memory area, which is above PAGE_OFFSET. + * either mappings in the kernel memory area (above PAGE_OFFSET), or + * device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END). */ void free_hyp_pmds(void) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; unsigned long addr; mutex_lock(kvm_hyp_pgd_mutex); - for (addr = PAGE_OFFSET; addr != 0; addr += PGDIR_SIZE) { - unsigned long hyp_addr = KERN_TO_HYP(addr); - pgd = hyp_pgd + pgd_index(hyp_addr); - pud = pud_offset(pgd, hyp_addr); - - if (pud_none(*pud)) - continue; - BUG_ON(pud_bad(*pud)); - - pmd = pmd_offset(pud, hyp_addr); - free_ptes(pmd, addr); - pmd_free(NULL, pmd); - pud_clear(pud); - } + for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) + free_hyp_pgd_entry(addr); + for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) + free_hyp_pgd_entry(addr); mutex_unlock(kvm_hyp_pgd_mutex); } -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 14/29] ARM: KVM: move kvm_condition_valid to emulate.c
This is really hardware emulation, and as such it better be with its little friends. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/include/asm/kvm_emulate.h | 1 + arch/arm/kvm/arm.c | 45 -- arch/arm/kvm/emulate.c | 45 ++ 3 files changed, 46 insertions(+), 45 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 2f5cc48..88c43df 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -28,6 +28,7 @@ unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu); int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run); +bool kvm_condition_valid(struct kvm_vcpu *vcpu); void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr); void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 4e83d57..93aaba6 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -44,7 +44,6 @@ #include asm/kvm_emulate.h #include asm/kvm_coproc.h #include asm/kvm_psci.h -#include asm/opcodes.h #ifdef REQUIRES_VIRT __asm__(.arch_extension virt); @@ -546,50 +545,6 @@ static exit_handle_fn arm_exit_handlers[] = { }; /* - * A conditional instruction is allowed to trap, even though it - * wouldn't be executed. So let's re-implement the hardware, in - * software! - */ -static bool kvm_condition_valid(struct kvm_vcpu *vcpu) -{ - unsigned long cpsr, cond, insn; - - /* -* Exception Code 0 can only happen if we set HCR.TGE to 1, to -* catch undefined instructions, and then we won't get past -* the arm_exit_handlers test anyway. -*/ - BUG_ON(!kvm_vcpu_trap_get_class(vcpu)); - - /* Top two bits non-zero? Unconditional. */ - if (kvm_vcpu_get_hsr(vcpu) 30) - return true; - - cpsr = *vcpu_cpsr(vcpu); - - /* Is condition field valid? */ - if ((kvm_vcpu_get_hsr(vcpu) HSR_CV) HSR_CV_SHIFT) - cond = (kvm_vcpu_get_hsr(vcpu) HSR_COND) HSR_COND_SHIFT; - else { - /* This can happen in Thumb mode: examine IT state. */ - unsigned long it; - - it = ((cpsr 8) 0xFC) | ((cpsr 25) 0x3); - - /* it == 0 = unconditional. */ - if (it == 0) - return true; - - /* The cond for this insn works out as the top 4 bits. */ - cond = (it 4); - } - - /* Shift makes it look like an ARM-mode instruction */ - insn = cond 28; - return arm_check_condition(insn, cpsr) != ARM_OPCODE_CONDTEST_FAIL; -} - -/* * Return 0 to return to guest, 0 on error, 0 (and set exit_reason) on * proper exit to QEMU. */ diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index d3094eb..04dbac6 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -20,6 +20,7 @@ #include linux/kvm_host.h #include asm/kvm_arm.h #include asm/kvm_emulate.h +#include asm/opcodes.h #include trace/events/kvm.h #include trace.h @@ -176,6 +177,50 @@ int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } +/* + * A conditional instruction is allowed to trap, even though it + * wouldn't be executed. So let's re-implement the hardware, in + * software! + */ +bool kvm_condition_valid(struct kvm_vcpu *vcpu) +{ + unsigned long cpsr, cond, insn; + + /* +* Exception Code 0 can only happen if we set HCR.TGE to 1, to +* catch undefined instructions, and then we won't get past +* the arm_exit_handlers test anyway. +*/ + BUG_ON(!kvm_vcpu_trap_get_class(vcpu)); + + /* Top two bits non-zero? Unconditional. */ + if (kvm_vcpu_get_hsr(vcpu) 30) + return true; + + cpsr = *vcpu_cpsr(vcpu); + + /* Is condition field valid? */ + if ((kvm_vcpu_get_hsr(vcpu) HSR_CV) HSR_CV_SHIFT) + cond = (kvm_vcpu_get_hsr(vcpu) HSR_COND) HSR_COND_SHIFT; + else { + /* This can happen in Thumb mode: examine IT state. */ + unsigned long it; + + it = ((cpsr 8) 0xFC) | ((cpsr 25) 0x3); + + /* it == 0 = unconditional. */ + if (it == 0) + return true; + + /* The cond for this insn works out as the top 4 bits. */ + cond = (it 4); + } + + /* Shift makes it look like an ARM-mode instruction */ + insn = cond 28; + return arm_check_condition(insn, cpsr) != ARM_OPCODE_CONDTEST_FAIL; +} + /** * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block * @vcpu: The VCPU pointer -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH 08/29] ARM: KVM: abstract S1TW abort detection away
Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/include/asm/kvm_emulate.h | 5 + arch/arm/kvm/mmio.c| 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 90e92b4..c27d9c9 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -115,4 +115,9 @@ static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu) return (kvm_vcpu_get_hsr(vcpu) 8) 1; } +static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu) +{ + return (kvm_vcpu_get_hsr(vcpu) 7) 1; +} + #endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index e4682a3..6495c1c 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -71,7 +71,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return 1; } - if ((kvm_vcpu_get_hsr(vcpu) 7) 1) { + if (kvm_vcpu_dabt_iss1tw(vcpu)) { /* page table accesses IO mem: tell guest to fix its TTBR */ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); return 1; -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 01/29] ARM: KVM: convert GP registers from u32 to unsigned long
On 32bit ARM, unsigned long is guaranteed to be a 32bit quantity. On 64bit ARM, it is a 64bit quantity. In order to be able to share code between the two architectures, convert the registers to be unsigned long, so the core code can be oblivious of the change. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/include/asm/kvm_emulate.h | 12 ++-- arch/arm/include/uapi/asm/kvm.h| 12 ++-- arch/arm/kvm/coproc.c | 4 ++-- arch/arm/kvm/coproc.h | 4 ++-- arch/arm/kvm/emulate.c | 22 +++--- arch/arm/kvm/mmio.c| 2 +- 6 files changed, 28 insertions(+), 28 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index fd61199..510488a 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -23,8 +23,8 @@ #include asm/kvm_asm.h #include asm/kvm_mmio.h -u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); -u32 *vcpu_spsr(struct kvm_vcpu *vcpu); +unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); +unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu); int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run); void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr); @@ -37,14 +37,14 @@ static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) return 1; } -static inline u32 *vcpu_pc(struct kvm_vcpu *vcpu) +static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu) { - return (u32 *)vcpu-arch.regs.usr_regs.ARM_pc; + return vcpu-arch.regs.usr_regs.ARM_pc; } -static inline u32 *vcpu_cpsr(struct kvm_vcpu *vcpu) +static inline unsigned long *vcpu_cpsr(struct kvm_vcpu *vcpu) { - return (u32 *)vcpu-arch.regs.usr_regs.ARM_cpsr; + return vcpu-arch.regs.usr_regs.ARM_cpsr; } static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 023bfeb..c1ee007 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -53,12 +53,12 @@ #define KVM_ARM_FIQ_spsr fiq_regs[7] struct kvm_regs { - struct pt_regs usr_regs;/* R0_usr - R14_usr, PC, CPSR */ - __u32 svc_regs[3]; /* SP_svc, LR_svc, SPSR_svc */ - __u32 abt_regs[3]; /* SP_abt, LR_abt, SPSR_abt */ - __u32 und_regs[3]; /* SP_und, LR_und, SPSR_und */ - __u32 irq_regs[3]; /* SP_irq, LR_irq, SPSR_irq */ - __u32 fiq_regs[8]; /* R8_fiq - R14_fiq, SPSR_fiq */ + struct pt_regs usr_regs;/* R0_usr - R14_usr, PC, CPSR */ + unsigned long svc_regs[3]; /* SP_svc, LR_svc, SPSR_svc */ + unsigned long abt_regs[3]; /* SP_abt, LR_abt, SPSR_abt */ + unsigned long und_regs[3]; /* SP_und, LR_und, SPSR_und */ + unsigned long irq_regs[3]; /* SP_irq, LR_irq, SPSR_irq */ + unsigned long fiq_regs[8]; /* R8_fiq - R14_fiq, SPSR_fiq */ }; /* Supported Processor Types */ diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 4ea9a98..38e76bc 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -76,7 +76,7 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, const struct coproc_params *p, const struct coproc_reg *r) { - u32 val; + unsigned long val; int cpu; cpu = get_cpu(); @@ -298,7 +298,7 @@ static int emulate_cp15(struct kvm_vcpu *vcpu, } /* If access function fails, it should complain. */ } else { - kvm_err(Unsupported guest CP15 access at: %08x\n, + kvm_err(Unsupported guest CP15 access at: %08lx\n, *vcpu_pc(vcpu)); print_cp_instr(params); } diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index 992adfa..b7301d3 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h @@ -84,7 +84,7 @@ static inline bool read_zero(struct kvm_vcpu *vcpu, static inline bool write_to_read_only(struct kvm_vcpu *vcpu, const struct coproc_params *params) { - kvm_debug(CP15 write to read-only register at: %08x\n, + kvm_debug(CP15 write to read-only register at: %08lx\n, *vcpu_pc(vcpu)); print_cp_instr(params); return false; @@ -93,7 +93,7 @@ static inline bool write_to_read_only(struct kvm_vcpu *vcpu, static inline bool read_from_write_only(struct kvm_vcpu *vcpu, const struct coproc_params *params) { - kvm_debug(CP15 read to write-only register at: %08x\n, + kvm_debug(CP15 read to write-only register at: %08lx\n, *vcpu_pc(vcpu)); print_cp_instr(params); return false; diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index d61450a..d3094eb 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -109,10 +109,10 @@ static
[PATCH 05/29] ARM: KVM: abstract HSR_SSE away
Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/include/asm/kvm_emulate.h | 5 + arch/arm/kvm/mmio.c| 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 236fadd..2a077bc 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -100,4 +100,9 @@ static inline bool kvm_vcpu_dabt_iswrite(struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) HSR_WNR); } +static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) HSR_SSE); +} + #endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 7d58cb5..058029c 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -93,7 +93,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } is_write = kvm_vcpu_dabt_iswrite(vcpu); - sign_extend = kvm_vcpu_get_hsr(vcpu) HSR_SSE; + sign_extend = kvm_vcpu_dabt_issext(vcpu); rt = (kvm_vcpu_get_hsr(vcpu) HSR_SRT_MASK) HSR_SRT_SHIFT; if (kvm_vcpu_reg_is_pc(vcpu, rt)) { -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v13 2/8] start vm after resetting it
On Thu, Feb 28, 2013 at 02:23:42PM +0100, Jan Kiszka wrote: On 2013-02-28 13:13, Hu Tao wrote: From: Wen Congyang we...@cn.fujitsu.com The guest should run after resetting it, but it does not run if its old state is RUN_STATE_INTERNAL_ERROR or RUN_STATE_PAUSED. We don't set runstate to RUN_STATE_PAUSED when resetting the guest, so the runstate will be changed from RUN_STATE_INTERNAL_ERROR or RUN_STATE_PAUSED to RUN_STATE_RUNNING(not RUN_STATE_PAUSED). I just wonder what will happen if I interrupted the guest via gdb and then issue monitor system_reset, also via gdb - common pattern if you set a breakpoint on some BUG() or fault handler and then want to restart the guest. Will the guest continue then while gdb thinks it is still stopped? Likely, we do not differentiate between gdb-initiated stops and the rest. Could you clarify? Guest won't continue unless issue gdb continue. Anyway, I'll seperate this patch, as Paolo requested. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v13 2/8] start vm after resetting it
On Mon, Mar 04, 2013 at 10:32:17AM +0100, Paolo Bonzini wrote: Il 28/02/2013 13:13, Hu Tao ha scritto: From: Wen Congyang we...@cn.fujitsu.com The guest should run after resetting it, but it does not run if its old state is RUN_STATE_INTERNAL_ERROR or RUN_STATE_PAUSED. We don't set runstate to RUN_STATE_PAUSED when resetting the guest, so the runstate will be changed from RUN_STATE_INTERNAL_ERROR or RUN_STATE_PAUSED to RUN_STATE_RUNNING(not RUN_STATE_PAUSED). This is also debatable. In particular, restarting an INTERNAL_ERROR guest makes it harder to inspect the state at the time of the failure. INTERNAL_ERROR should never happen, let's separate this patch too. Sure. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 01/29] arm64: KVM: define HYP and Stage-2 translation page flags
Add HYP and S2 page flags, for both normal and device memory. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/include/asm/pgtable-hwdef.h | 13 + arch/arm64/include/asm/pgtable.h | 13 + arch/arm64/mm/mmu.c| 6 +- 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 75fd13d..acb4ee5 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -35,6 +35,7 @@ /* * Section */ +#define PMD_SECT_USER (_AT(pteval_t, 1) 6) /* AP[1] */ #define PMD_SECT_S (_AT(pmdval_t, 3) 8) #define PMD_SECT_AF(_AT(pmdval_t, 1) 10) #define PMD_SECT_NG(_AT(pmdval_t, 1) 11) @@ -68,6 +69,18 @@ #define PTE_ATTRINDX_MASK (_AT(pteval_t, 7) 2) /* + * 2nd stage PTE definitions + */ +#define PTE_S2_RDONLY (_AT(pteval_t, 1) 6) /* HAP[1] */ +#define PTE_S2_RDWR (_AT(pteval_t, 2) 6) /* HAP[2:1] */ + +/* + * EL2/HYP PTE/PMD definitions + */ +#define PMD_HYPPMD_SECT_USER +#define PTE_HYPPTE_USER + +/* * 40-bit physical address supported. */ #define PHYS_MASK_SHIFT(40) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e333a24..11c608a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -60,6 +60,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define _PAGE_DEFAULT PTE_TYPE_PAGE | PTE_AF extern pgprot_t pgprot_default; +extern pgprot_tpgprot_device; #define __pgprot_modify(prot,mask,bits) \ __pgprot((pgprot_val(prot) ~(mask)) | (bits)) @@ -76,6 +77,12 @@ extern pgprot_t pgprot_default; #define PAGE_KERNEL_MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY) #define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY) +#define PAGE_HYP _MOD_PROT(pgprot_default, PTE_HYP) +#define PAGE_HYP_DEVICE_MOD_PROT(pgprot_device, PTE_HYP) + +#define PAGE_S2_MOD_PROT(pgprot_default, PTE_USER | PTE_S2_RDONLY) +#define PAGE_S2_DEVICE _MOD_PROT(pgprot_device, PTE_USER | PTE_S2_RDWR) + #define __PAGE_NONE__pgprot(((_PAGE_DEFAULT) ~PTE_TYPE_MASK) | PTE_PROT_NONE) #define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) @@ -197,6 +204,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #define pmd_bad(pmd) (!(pmd_val(pmd) 2)) +#define pmd_table(pmd) ((pmd_val(pmd) PMD_TYPE_MASK) == \ +PMD_TYPE_TABLE) +#define pmd_sect(pmd) ((pmd_val(pmd) PMD_TYPE_MASK) == \ +PMD_TYPE_SECT) + + static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { *pmdp = pmd; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 224b44a..df03aea 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -44,6 +44,7 @@ struct page *empty_zero_page; EXPORT_SYMBOL(empty_zero_page); pgprot_t pgprot_default; +pgprot_t pgprot_device; EXPORT_SYMBOL(pgprot_default); static pmdval_t prot_sect_kernel; @@ -127,10 +128,11 @@ early_param(cachepolicy, early_cachepolicy); */ static void __init init_mem_pgprot(void) { - pteval_t default_pgprot; + pteval_t default_pgprot, device_pgprot; int i; default_pgprot = PTE_ATTRINDX(MT_NORMAL); + device_pgprot = PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN; prot_sect_kernel = PMD_TYPE_SECT | PMD_SECT_AF | PMD_ATTRINDX(MT_NORMAL); #ifdef CONFIG_SMP @@ -138,6 +140,7 @@ static void __init init_mem_pgprot(void) * Mark memory with the shared attribute for SMP systems */ default_pgprot |= PTE_SHARED; + device_pgprot |= PTE_SHARED; prot_sect_kernel |= PMD_SECT_S; #endif @@ -147,6 +150,7 @@ static void __init init_mem_pgprot(void) } pgprot_default = __pgprot(PTE_TYPE_PAGE | PTE_AF | default_pgprot); + pgprot_device = __pgprot(PTE_TYPE_PAGE | PTE_AF | device_pgprot); } pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 00/29] Port of KVM to arm64
This series contains the implementation of KVM for arm64. It depends on the pre-arm64 rework series I posted earlier, as well as on the tiny perf patch sent just after. The code is unsurprisingly extremely similar to the KVM/arm code, and a lot of it is actually shared with the 32bit version. Some of the include files are duplicated though (I'm definitely willing to fix that). In terms of features: - Support for 4k and 64k pages - Support for 32bit and 64bit guests - PSCI support for SMP booting As we do not have a 64bit QEMU port, it has been tested using kvmtool (support has already been merged). Marc Zyngier (29): arm64: KVM: define HYP and Stage-2 translation page flags arm64: KVM: HYP mode idmap support arm64: KVM: EL2 register definitions arm64: KVM: system register definitions for 64bit guests arm64: KVM: Basic ESR_EL2 helpers and vcpu register access arm64: KVM: fault injection into a guest arm64: KVM: architecture specific MMU backend arm64: KVM: user space interface arm64: KVM: system register handling arm64: KVM: Cortex-A57 specific system registers handling arm64: KVM: virtual CPU reset arm64: KVM: kvm_arch and kvm_vcpu_arch definitions arm64: KVM: MMIO access backend arm64: KVM: guest one-reg interface arm64: KVM: hypervisor initialization code arm64: KVM: HYP mode world switch implementation arm64: KVM: Exit handling arm64: KVM: Plug the VGIC arm64: KVM: Plug the arch timer arm64: KVM: PSCI implementation arm64: KVM: Build system integration arm64: KVM: define 32bit specific registers arm64: KVM: 32bit GP register access arm64: KVM: 32bit conditional execution emulation arm64: KVM: 32bit handling of coprocessor traps arm64: KVM: 32bit coprocessor access for Cortex-A57 arm64: KVM: 32bit specific register world switch arm64: KVM: 32bit guest fault injection arm64: KVM: enable initialization of a 32bit vcpu arch/arm/kvm/arch_timer.c |1 + arch/arm64/Kconfig |2 + arch/arm64/Makefile |2 +- arch/arm64/include/asm/kvm_arch_timer.h | 58 ++ arch/arm64/include/asm/kvm_arm.h| 243 +++ arch/arm64/include/asm/kvm_asm.h| 104 +++ arch/arm64/include/asm/kvm_coproc.h | 56 ++ arch/arm64/include/asm/kvm_emulate.h| 181 + arch/arm64/include/asm/kvm_host.h | 192 ++ arch/arm64/include/asm/kvm_mmio.h | 59 ++ arch/arm64/include/asm/kvm_mmu.h| 126 arch/arm64/include/asm/kvm_psci.h | 23 + arch/arm64/include/asm/kvm_vgic.h | 156 + arch/arm64/include/asm/pgtable-hwdef.h | 13 + arch/arm64/include/asm/pgtable.h| 13 + arch/arm64/include/uapi/asm/kvm.h | 190 ++ arch/arm64/kernel/asm-offsets.c | 33 + arch/arm64/kernel/vmlinux.lds.S | 10 + arch/arm64/kvm/Kconfig | 59 ++ arch/arm64/kvm/Makefile | 18 + arch/arm64/kvm/emulate.c| 154 + arch/arm64/kvm/guest.c | 246 +++ arch/arm64/kvm/handle_exit.c| 124 arch/arm64/kvm/hyp-init.S | 89 +++ arch/arm64/kvm/hyp.S| 826 +++ arch/arm64/kvm/idmap.c | 141 arch/arm64/kvm/idmap.h |8 + arch/arm64/kvm/inject_fault.c | 194 ++ arch/arm64/kvm/regmap.c | 168 + arch/arm64/kvm/reset.c | 83 +++ arch/arm64/kvm/sys_regs.c | 1113 +++ arch/arm64/kvm/sys_regs.h | 141 arch/arm64/kvm/sys_regs_a57.c | 118 arch/arm64/mm/mmu.c |6 +- include/uapi/linux/kvm.h|1 + 35 files changed, 4949 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/include/asm/kvm_arch_timer.h create mode 100644 arch/arm64/include/asm/kvm_arm.h create mode 100644 arch/arm64/include/asm/kvm_asm.h create mode 100644 arch/arm64/include/asm/kvm_coproc.h create mode 100644 arch/arm64/include/asm/kvm_emulate.h create mode 100644 arch/arm64/include/asm/kvm_host.h create mode 100644 arch/arm64/include/asm/kvm_mmio.h create mode 100644 arch/arm64/include/asm/kvm_mmu.h create mode 100644 arch/arm64/include/asm/kvm_psci.h create mode 100644 arch/arm64/include/asm/kvm_vgic.h create mode 100644 arch/arm64/include/uapi/asm/kvm.h create mode 100644 arch/arm64/kvm/Kconfig create mode 100644 arch/arm64/kvm/Makefile create mode 100644 arch/arm64/kvm/emulate.c create mode 100644 arch/arm64/kvm/guest.c create mode 100644 arch/arm64/kvm/handle_exit.c create mode 100644 arch/arm64/kvm/hyp-init.S create mode 100644 arch/arm64/kvm/hyp.S create mode 100644 arch/arm64/kvm/idmap.c create mode 100644 arch/arm64/kvm/idmap.h create mode 100644 arch/arm64/kvm/inject_fault.c create mode 100644 arch/arm64/kvm/regmap.c create mode 100644 arch/arm64/kvm/reset.c create mode 100644
[PATCH 02/29] arm64: KVM: HYP mode idmap support
Add the necessary infrastructure for identity-mapped HYP page tables. Idmap-ed code must be in the .hyp.idmap.text linker section. The rest of the HYP ends up in .hyp.text. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/kernel/vmlinux.lds.S | 10 +++ arch/arm64/kvm/idmap.c | 141 arch/arm64/kvm/idmap.h | 8 +++ 3 files changed, 159 insertions(+) create mode 100644 arch/arm64/kvm/idmap.c create mode 100644 arch/arm64/kvm/idmap.h diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 3fae2be..51b87c3 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -17,6 +17,15 @@ ENTRY(stext) jiffies = jiffies_64; +#define HYPERVISOR_TEXT\ + ALIGN_FUNCTION(); \ + VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ + *(.hyp.idmap.text) \ + VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; \ + VMLINUX_SYMBOL(__hyp_text_start) = .; \ + *(.hyp.text)\ + VMLINUX_SYMBOL(__hyp_text_end) = .; + SECTIONS { /* @@ -49,6 +58,7 @@ SECTIONS TEXT_TEXT SCHED_TEXT LOCK_TEXT + HYPERVISOR_TEXT *(.fixup) *(.gnu.warning) . = ALIGN(16); diff --git a/arch/arm64/kvm/idmap.c b/arch/arm64/kvm/idmap.c new file mode 100644 index 000..68a55d4 --- /dev/null +++ b/arch/arm64/kvm/idmap.c @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier marc.zyng...@arm.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#include linux/module.h +#include linux/kernel.h +#include linux/slab.h + +#include asm/cputype.h +#include asm/pgalloc.h +#include asm/pgtable.h +#include asm/sections.h +#include asm/virt.h + +#include idmap.h + +pgd_t *hyp_pgd; + +/* + * We always use a 2-level mapping for hyp-idmap: + * - Section mapped for 4kB pages + * - Page mapped for 64kB pages + */ +#ifdef CONFIG_ARM64_64K_PAGES +static void idmap_add_pte(pmd_t *pmd, unsigned long addr, unsigned long end) +{ + struct page *page; + pte_t *pte; + unsigned long next; + + if (pmd_none(*pmd)) { + pte = pte_alloc_one_kernel(NULL, addr); + if (!pte) { + pr_warning(Failed to allocate identity pte.\n); + return; + } + pmd_populate_kernel(NULL, pmd, pte); + } + + pte = pte_offset_kernel(pmd, addr); + + do { + page = phys_to_page(addr); + next = (addr PAGE_MASK) + PAGE_SIZE; + set_pte(pte, mk_pte(page, PAGE_HYP)); + } while (pte++, addr = next, addr end); +} +#else +#define HYP_SECT_PROT (PMD_TYPE_SECT | PMD_SECT_AF | \ +PMD_ATTRINDX(MT_NORMAL) | PMD_HYP) + +/* + * For 4kB pages, we use a section to perform the identity mapping, + * hence the direct call to __pmd_populate(). + */ +static void idmap_add_pte(pmd_t *pmd, unsigned long addr, unsigned long end) +{ + __pmd_populate(pmd, addr PMD_MASK, HYP_SECT_PROT); +} +#endif + +static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end) +{ + pmd_t *pmd; + unsigned long next; + + if (pud_none_or_clear_bad(pud)) { + pmd = pmd_alloc_one(NULL, addr); + if (!pmd) { + pr_warning(Failed to allocate identity pmd.\n); + return; + } + pud_populate(NULL, pud, pmd); + } + + pmd = pmd_offset(pud, addr); + + do { + next = pmd_addr_end(addr, end); + idmap_add_pte(pmd, addr, next); + } while (pmd++, addr = next, addr != end); +} + +static void idmap_add_pud(pgd_t *pgd, unsigned long addr, unsigned long end) +{ + pud_t *pud = pud_offset(pgd, addr); + unsigned long next; + + do { + next = pud_addr_end(addr, end); + idmap_add_pmd(pud, addr, next); + } while (pud++, addr = next, addr != end); +} + +extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; + +static int __init hyp_idmap_setup(void) +{ + unsigned long addr, end; +
[PATCH 03/29] arm64: KVM: EL2 register definitions
Define all the useful bitfields for EL2 registers. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/include/asm/kvm_arm.h | 243 +++ 1 file changed, 243 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_arm.h diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h new file mode 100644 index 000..6561507 --- /dev/null +++ b/arch/arm64/include/asm/kvm_arm.h @@ -0,0 +1,243 @@ +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier marc.zyng...@arm.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#ifndef __ARM64_KVM_ARM_H__ +#define __ARM64_KVM_ARM_H__ + +#include asm/types.h + +/* Hyp Configuration Register (HCR) bits */ +#define HCR_ID (1 33) +#define HCR_CD (1 32) +#define HCR_RW_SHIFT 31 +#define HCR_RW (1 HCR_RW_SHIFT) +#define HCR_TRVM (1 30) +#define HCR_HCD(1 29) +#define HCR_TDZ(1 28) +#define HCR_TGE(1 27) +#define HCR_TVM(1 26) +#define HCR_TTLB (1 25) +#define HCR_TPU(1 24) +#define HCR_TPC(1 23) +#define HCR_TSW(1 22) +#define HCR_TAC(1 21) +#define HCR_TIDCP (1 20) +#define HCR_TSC(1 19) +#define HCR_TID3 (1 18) +#define HCR_TID2 (1 17) +#define HCR_TID1 (1 16) +#define HCR_TID0 (1 15) +#define HCR_TWE(1 14) +#define HCR_TWI(1 13) +#define HCR_DC (1 12) +#define HCR_BSU(3 10) +#define HCR_BSU_IS (1 10) +#define HCR_FB (1 9) +#define HCR_VA (1 8) +#define HCR_VI (1 7) +#define HCR_VF (1 6) +#define HCR_AMO(1 5) +#define HCR_IMO(1 4) +#define HCR_FMO(1 3) +#define HCR_PTW(1 2) +#define HCR_SWIO (1 1) +#define HCR_VM (1) + +/* + * The bits we set in HCR: + * RW: 64bit by default, can be overriden for 32bit VMs + * TAC:Trap ACTLR + * TSC:Trap SMC + * TSW:Trap cache operations by set/way + * TWI:Trap WFI + * TIDCP: Trap L2CTLR/L2ECTLR + * BSU_IS: Upgrade barriers to the inner shareable domain + * FB: Force broadcast of all maintainance operations + * AMO:Override CPSR.A and enable signaling with VA + * IMO:Override CPSR.I and enable signaling with VI + * FMO:Override CPSR.F and enable signaling with VF + * SWIO: Turn set/way invalidates into set/way clean+invalidate + */ +#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ +HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ +HCR_SWIO | HCR_TIDCP | HCR_RW) +#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) + +/* Hyp System Control Register (SCTLR_EL2) bits */ +#define SCTLR_EL2_EE (1 25) +#define SCTLR_EL2_WXN (1 19) +#define SCTLR_EL2_I(1 12) +#define SCTLR_EL2_SA (1 3) +#define SCTLR_EL2_C(1 2) +#define SCTLR_EL2_A(1 1) +#define SCTLR_EL2_M1 +#define SCTLR_EL2_FLAGS(SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C | \ +SCTLR_EL2_SA | SCTLR_EL2_I) + +/* TCR_EL2 Registers bits */ +#define TCR_EL2_TBI(1 20) +#define TCR_EL2_PS (7 16) +#define TCR_EL2_PS_40B (2 16) +#define TCR_EL2_TG0(1 14) +#define TCR_EL2_SH0(3 12) +#define TCR_EL2_ORGN0 (3 10) +#define TCR_EL2_IRGN0 (3 8) +#define TCR_EL2_T0SZ 0x3f +#define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ +TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) + +#define TCR_EL2_FLAGS (TCR_EL2_PS_40B) + +/* VTCR_EL2 Registers bits */ +#define VTCR_EL2_PS_MASK (7 16) +#define VTCR_EL2_PS_40B(2 16) +#define VTCR_EL2_TG0_MASK (1 14) +#define VTCR_EL2_TG0_4K(0 14) +#define VTCR_EL2_TG0_64K (1 14) +#define VTCR_EL2_SH0_MASK (3 12) +#define VTCR_EL2_SH0_INNER (3 12) +#define VTCR_EL2_ORGN0_MASK(3 10) +#define VTCR_EL2_ORGN0_WBWA(3 10) +#define VTCR_EL2_IRGN0_MASK(3 8) +#define VTCR_EL2_IRGN0_WBWA(3 8) +#define VTCR_EL2_SL0_MASK (3 6) +#define VTCR_EL2_SL0_LVL1 (1 6) +#define VTCR_EL2_T0SZ_MASK 0x3f +#define VTCR_EL2_T0SZ_40B 24 + +#ifdef
[PATCH 05/29] arm64: KVM: Basic ESR_EL2 helpers and vcpu register access
Implements helpers for dealing with the EL2 syndrome register as well as accessing the vcpu registers. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/include/asm/kvm_emulate.h | 159 +++ 1 file changed, 159 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_emulate.h diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h new file mode 100644 index 000..16a343b --- /dev/null +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -0,0 +1,159 @@ +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier marc.zyng...@arm.com + * + * Derived from arch/arm/include/kvm_emulate.h + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall c.d...@virtualopensystems.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#ifndef __ARM64_KVM_EMULATE_H__ +#define __ARM64_KVM_EMULATE_H__ + +#include linux/kvm_host.h +#include asm/kvm_asm.h +#include asm/kvm_arm.h +#include asm/kvm_mmio.h +#include asm/ptrace.h + +void kvm_inject_undefined(struct kvm_vcpu *vcpu); +void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); +void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); + +static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu) +{ + return (unsigned long *)vcpu-arch.regs.regs.pc; +} + +static inline unsigned long *vcpu_cpsr(struct kvm_vcpu *vcpu) +{ + return (unsigned long *)vcpu-arch.regs.regs.pstate; +} + +static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) +{ + return false; /* 32bit? Bahhh... */ +} + +static inline bool kvm_condition_valid(struct kvm_vcpu *vcpu) +{ + return true;/* No conditionals on arm64 */ +} + +static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) +{ + *vcpu_pc(vcpu) += 4; +} + +static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) +{ +} + +static inline unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num) +{ + return (unsigned long *)vcpu-arch.regs.regs.regs[reg_num]; + +} + +/* Get vcpu SPSR for current mode */ +static inline unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu) +{ + return vcpu-arch.regs.spsr[KVM_SPSR_EL1]; +} + +static inline bool kvm_vcpu_reg_is_pc(struct kvm_vcpu *vcpu, int reg) +{ + return false; +} + +static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu) +{ + u32 mode = *vcpu_cpsr(vcpu) PSR_MODE_MASK; + + return mode != PSR_MODE_EL0t; +} + +static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu) +{ + return vcpu-arch.fault.esr_el2; +} + +static inline unsigned long kvm_vcpu_get_hfar(struct kvm_vcpu *vcpu) +{ + return vcpu-arch.fault.far_el2; +} + +static inline phys_addr_t kvm_vcpu_get_fault_ipa(struct kvm_vcpu *vcpu) +{ + return ((phys_addr_t)vcpu-arch.fault.hpfar_el2 HPFAR_MASK) 8; +} + +static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) ESR_EL2_ISV); +} + +static inline bool kvm_vcpu_dabt_iswrite(struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) ESR_EL2_WNR); +} + +static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) ESR_EL2_SSE); +} + +static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu) +{ + return (kvm_vcpu_get_hsr(vcpu) ESR_EL2_SRT_MASK) ESR_EL2_SRT_SHIFT; +} + +static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) ESR_EL2_EA); +} + +static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) ESR_EL2_S1PTW); +} + +static inline int kvm_vcpu_dabt_get_as(struct kvm_vcpu *vcpu) +{ + return 1 ((kvm_vcpu_get_hsr(vcpu) ESR_EL2_SAS) ESR_EL2_SAS_SHIFT); +} + +/* This one is not specific to Data Abort */ +static inline bool kvm_vcpu_trap_il_is32bit(struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) ESR_EL2_IL); +} + +static inline u8 kvm_vcpu_trap_get_class(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) ESR_EL2_EC_SHIFT; +} + +static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_trap_get_class(vcpu) == ESR_EL2_EC_IABT; +} + +static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_hsr(vcpu) ESR_EL2_FSC_TYPE; +} + +#endif /* __ARM64_KVM_EMULATE_H__ */ -- 1.7.12.4 -- To unsubscribe
[PATCH 06/29] arm64: KVM: fault injection into a guest
Implement the injection of a fault (undefined, data abort or prefetch abort) into a 64bit guest. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/kvm/inject_fault.c | 117 ++ 1 file changed, 117 insertions(+) create mode 100644 arch/arm64/kvm/inject_fault.c diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c new file mode 100644 index 000..80b245f --- /dev/null +++ b/arch/arm64/kvm/inject_fault.c @@ -0,0 +1,117 @@ +/* + * Fault injection for 64bit guests. + * + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier marc.zyng...@arm.com + * + * Based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall c.d...@virtualopensystems.com + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#include linux/kvm_host.h +#include asm/kvm_emulate.h + +static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) +{ + unsigned long cpsr = *vcpu_cpsr(vcpu); + int is_aarch32; + u32 esr = 0; + + is_aarch32 = vcpu_mode_is_32bit(vcpu); + + *vcpu_spsr(vcpu) = cpsr; + vcpu-arch.regs.elr_el1 = *vcpu_pc(vcpu); + + *vcpu_cpsr(vcpu) = PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT; + *vcpu_pc(vcpu) = vcpu-arch.sys_regs[VBAR_EL1] + 0x200; + + vcpu-arch.sys_regs[FAR_EL1] = addr; + + /* +* Build an {i,d}abort, depending on the level and the +* instruction set. Report an external synchronous abort. +*/ + if (kvm_vcpu_trap_il_is32bit(vcpu)) + esr |= (1 25); + + if (is_aarch32 || (cpsr PSR_MODE_MASK) == PSR_MODE_EL0t) + esr |= (0x20 26); + else + esr |= (0x21 26); + + if (!is_iabt) + esr |= (1 28); + + vcpu-arch.sys_regs[ESR_EL1] = esr | 0x10; +} + +static void inject_undef64(struct kvm_vcpu *vcpu) +{ + unsigned long cpsr = *vcpu_cpsr(vcpu); + u32 esr = 0; + + *vcpu_spsr(vcpu) = cpsr; + vcpu-arch.regs.elr_el1 = *vcpu_pc(vcpu); + + *vcpu_cpsr(vcpu) = PSR_MODE_EL1h | PSR_F_BIT | PSR_I_BIT; + *vcpu_pc(vcpu) = vcpu-arch.sys_regs[VBAR_EL1] + 0x200; + + /* +* Build an unknown exception, depending on the instruction +* set. +*/ + if (kvm_vcpu_trap_il_is32bit(vcpu)) + esr |= (1 25); + + vcpu-arch.sys_regs[ESR_EL1] = esr; +} + +/** + * kvm_inject_dabt - inject a data abort into the guest + * @vcpu: The VCPU to receive the undefined exception + * @addr: The address to report in the DFAR + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) +{ + inject_abt64(vcpu, false, addr); +} + +/** + * kvm_inject_pabt - inject a prefetch abort into the guest + * @vcpu: The VCPU to receive the undefined exception + * @addr: The address to report in the DFAR + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) +{ + inject_abt64(vcpu, true, addr); +} + +/** + * kvm_inject_undefined - inject a undefined instruction into the guest + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +void kvm_inject_undefined(struct kvm_vcpu *vcpu) +{ + inject_undef64(vcpu); +} -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 09/29] arm64: KVM: system register handling
Provide 64bit system register handling, modeled after the cp15 handling for ARM. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/include/asm/kvm_coproc.h | 51 ++ arch/arm64/include/uapi/asm/kvm.h | 56 +++ arch/arm64/kvm/sys_regs.c | 962 arch/arm64/kvm/sys_regs.h | 141 ++ include/uapi/linux/kvm.h| 1 + 5 files changed, 1211 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_coproc.h create mode 100644 arch/arm64/kvm/sys_regs.c create mode 100644 arch/arm64/kvm/sys_regs.h diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h new file mode 100644 index 000..e791894 --- /dev/null +++ b/arch/arm64/include/asm/kvm_coproc.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier marc.zyng...@arm.com + * + * Derived from arch/arm/include/asm/kvm_coproc.h + * Copyright (C) 2012 Rusty Russell IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#ifndef __ARM64_KVM_COPROC_H__ +#define __ARM64_KVM_COPROC_H__ + +#include linux/kvm_host.h + +void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); + +struct kvm_sys_reg_table { + const struct sys_reg_desc *table; + size_t num; +}; + +struct kvm_sys_reg_target_table { + unsigned target; + struct kvm_sys_reg_table table64; +}; + +void kvm_register_target_sys_reg_table(struct kvm_sys_reg_target_table *table); + +int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run); + +#define kvm_coproc_table_init kvm_sys_reg_table_init +void kvm_sys_reg_table_init(void); + +struct kvm_one_reg; +int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); +int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu); + +#endif /* __ARM64_KVM_COPROC_H__ */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index f5525f1..fffeb11 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -87,6 +87,62 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* If you need to interpret the index values, here is the key: */ +#define KVM_REG_ARM_COPROC_MASK0x0FFF +#define KVM_REG_ARM_COPROC_SHIFT 16 +#define KVM_REG_ARM_32_OPC2_MASK 0x0007 +#define KVM_REG_ARM_32_OPC2_SHIFT 0 +#define KVM_REG_ARM_OPC1_MASK 0x0078 +#define KVM_REG_ARM_OPC1_SHIFT 3 +#define KVM_REG_ARM_CRM_MASK 0x0780 +#define KVM_REG_ARM_CRM_SHIFT 7 +#define KVM_REG_ARM_32_CRN_MASK0x7800 +#define KVM_REG_ARM_32_CRN_SHIFT 11 + +/* Normal registers are mapped as coprocessor 16. */ +#define KVM_REG_ARM_CORE (0x0010 KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / sizeof(unsigned long)) + +/* Some registers need more space to represent values. */ +#define KVM_REG_ARM_DEMUX (0x0011 KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_DEMUX_ID_MASK 0xFF00 +#define KVM_REG_ARM_DEMUX_ID_SHIFT 8 +#define KVM_REG_ARM_DEMUX_ID_CCSIDR(0x00 KVM_REG_ARM_DEMUX_ID_SHIFT) +#define KVM_REG_ARM_DEMUX_VAL_MASK 0x00FF +#define KVM_REG_ARM_DEMUX_VAL_SHIFT0 + +/* VFP registers: we could overload CP10 like ARM does, but that's ugly. */ +#define KVM_REG_ARM_VFP(0x0012 KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_VFP_MASK 0x +#define KVM_REG_ARM_VFP_BASE_REG 0x0 +#define KVM_REG_ARM_VFP_FPSID 0x1000 +#define KVM_REG_ARM_VFP_FPSCR 0x1001 +#define KVM_REG_ARM_VFP_MVFR1 0x1006 +#define KVM_REG_ARM_VFP_MVFR0 0x1007 +#define KVM_REG_ARM_VFP_FPEXC 0x1008 +#define KVM_REG_ARM_VFP_FPINST 0x1009 +#define KVM_REG_ARM_VFP_FPINST20x100A + +/* AArch64 system registers */ +#define KVM_REG_ARM64_SYSREG (0x0013 KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM64_SYSREG_OP0_MASK 0xc000 +#define KVM_REG_ARM64_SYSREG_OP0_SHIFT 14 +#define KVM_REG_ARM64_SYSREG_OP1_MASK 0x3800 +#define KVM_REG_ARM64_SYSREG_OP1_SHIFT 11 +#define
[PATCH 08/29] arm64: KVM: user space interface
Provide the kvm.h file that defines the user space visible interface. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/include/uapi/asm/kvm.h | 112 ++ 1 file changed, 112 insertions(+) create mode 100644 arch/arm64/include/uapi/asm/kvm.h diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h new file mode 100644 index 000..f5525f1 --- /dev/null +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier marc.zyng...@arm.com + * + * Derived from arch/arm/include/uapi/asm/kvm.h: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall c.d...@virtualopensystems.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#ifndef __ARM_KVM_H__ +#define __ARM_KVM_H__ + +#define KVM_SPSR_EL1 0 +#define KVM_NR_SPSR1 + +#ifndef __ASSEMBLY__ +#include asm/types.h +#include asm/ptrace.h + +#define __KVM_HAVE_GUEST_DEBUG +#define __KVM_HAVE_IRQ_LINE + +#define KVM_REG_SIZE(id) \ + (1U (((id) KVM_REG_SIZE_MASK) KVM_REG_SIZE_SHIFT)) + +struct kvm_regs { + struct user_pt_regs regs; /* sp = sp_el0 */ + + unsigned long sp_el1; + unsigned long elr_el1; + + unsigned long spsr[KVM_NR_SPSR]; +}; + +/* Supported Processor Types */ +#define KVM_ARM_TARGET_CORTEX_A57 0 +#define KVM_ARM_NUM_TARGETS1 + +/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ +#define KVM_ARM_DEVICE_TYPE_SHIFT 0 +#define KVM_ARM_DEVICE_TYPE_MASK (0x KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_ID_SHIFT16 +#define KVM_ARM_DEVICE_ID_MASK (0x KVM_ARM_DEVICE_ID_SHIFT) + +/* Supported device IDs */ +#define KVM_ARM_DEVICE_VGIC_V2 0 + +/* Supported VGIC address types */ +#define KVM_VGIC_V2_ADDR_TYPE_DIST 0 +#define KVM_VGIC_V2_ADDR_TYPE_CPU 1 + +#define KVM_VGIC_V2_DIST_SIZE 0x1000 +#define KVM_VGIC_V2_CPU_SIZE 0x2000 + +struct kvm_vcpu_init { + __u32 target; + __u32 features[7]; +}; + +struct kvm_sregs { +}; + +struct kvm_fpu { +}; + +struct kvm_guest_debug_arch { +}; + +struct kvm_debug_exit_arch { +}; + +struct kvm_sync_regs { +}; + +struct kvm_arch_memory_slot { +}; + +/* KVM_IRQ_LINE irq field index values */ +#define KVM_ARM_IRQ_TYPE_SHIFT 24 +#define KVM_ARM_IRQ_TYPE_MASK 0xff +#define KVM_ARM_IRQ_VCPU_SHIFT 16 +#define KVM_ARM_IRQ_VCPU_MASK 0xff +#define KVM_ARM_IRQ_NUM_SHIFT 0 +#define KVM_ARM_IRQ_NUM_MASK 0x + +/* irq_type field */ +#define KVM_ARM_IRQ_TYPE_CPU 0 +#define KVM_ARM_IRQ_TYPE_SPI 1 +#define KVM_ARM_IRQ_TYPE_PPI 2 + +/* out-of-kernel GIC cpu interrupt injection irq_number field */ +#define KVM_ARM_IRQ_CPU_IRQ0 +#define KVM_ARM_IRQ_CPU_FIQ1 + +/* Highest supported SPI, from VGIC_NR_IRQS */ +#define KVM_ARM_IRQ_GIC_MAX127 + +#endif + +#endif /* __ARM_KVM_H__ */ -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 22/29] arm64: KVM: define 32bit specific registers
Define the 32bit specific registers (SPSRs, cp15...). Most CPU registers are directly mapped to a 64bit register (r0-x0...). Only the SPSRs have separate registers. cp15 registers are also mapped into their 64bit counterpart in most cases. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/include/asm/kvm_asm.h | 38 +- arch/arm64/include/asm/kvm_host.h | 5 - arch/arm64/include/uapi/asm/kvm.h | 7 ++- 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 851fee5..3f4e6e1 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -42,7 +42,43 @@ #defineTPIDR_EL1 18 /* Thread ID, Privileged */ #defineAMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ #defineCNTKCTL_EL1 20 /* Timer Control Register (EL1) */ -#defineNR_SYS_REGS 21 +/* 32bit specific registers. Keep them at the end of the range */ +#defineDACR32_EL2 21 /* Domain Access Control Register */ +#defineIFSR32_EL2 22 /* Instruction Fault Status Register */ +#defineFPEXC32_EL2 23 /* Floating-Point Exception Control Register */ +#defineDBGVCR32_EL224 /* Debug Vector Catch Register */ +#defineTEECR32_EL1 25 /* ThumbEE Configuration Register */ +#defineTEEHBR32_EL126 /* ThumbEE Handler Base Register */ +#defineNR_SYS_REGS 27 + +/* 32bit mapping */ +#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ +#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ +#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */ +#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxilliary Control Register */ +#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */ +#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */ +#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */ +#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ +#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ +#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ +#define c3_DACR(DACR32_EL2 * 2)/* Domain Access Control Register */ +#define c5_DFSR(ESR_EL1 * 2) /* Data Fault Status Register */ +#define c5_IFSR(IFSR32_EL2 * 2)/* Instruction Fault Status Register */ +#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxilary Data Fault Status R */ +#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxilary Instr Fault Status R */ +#define c6_DFAR(FAR_EL1 * 2) /* Data Fault Address Register */ +#define c6_IFAR(c6_DFAR + 1) /* Instruction Fault Address Register */ +#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ +#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ +#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ +#define c13_CID(CONTEXTIDR_EL1 * 2)/* Context ID Register */ +#define c13_TID_URW(TPIDR_EL0 * 2) /* Thread ID, User R/W */ +#define c13_TID_URO(TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ +#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Priveleged */ +#define c10_AMAIR (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ +#define c14_CNTKCTL(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ +#define NR_CP15_REGS (NR_SYS_REGS * 2) #define ARM_EXCEPTION_IRQ0 #define ARM_EXCEPTION_TRAP 1 diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 68558ac..24dc8d7 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -85,7 +85,10 @@ typedef struct user_fpsimd_state kvm_kernel_vfp_t; struct kvm_vcpu_arch { struct kvm_regs regs; - u64 sys_regs[NR_SYS_REGS]; + union { + u64 sys_regs[NR_SYS_REGS]; + u32 cp15[NR_CP15_REGS]; + }; /* HYP configuration */ u64 hcr_el2; diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 24c8318..f9c269e 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -23,7 +23,12 @@ #define __ARM_KVM_H__ #define KVM_SPSR_EL1 0 -#define KVM_NR_SPSR1 +#define KVM_SPSR_SVC KVM_SPSR_EL1 +#define KVM_SPSR_ABT 1 +#define KVM_SPSR_UND 2 +#define KVM_SPSR_IRQ 3 +#define KVM_SPSR_FIQ 4 +#define KVM_NR_SPSR5 #ifndef __ASSEMBLY__ #include asm/types.h -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 21/29] arm64: KVM: Build system integration
Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/Kconfig | 2 ++ arch/arm64/Makefile | 2 +- arch/arm64/kvm/Kconfig | 59 + arch/arm64/kvm/Makefile | 18 +++ 4 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kvm/Kconfig create mode 100644 arch/arm64/kvm/Makefile diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 536611a..cff688b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -241,6 +241,8 @@ source drivers/Kconfig source fs/Kconfig +source arch/arm64/kvm/Kconfig + source arch/arm64/Kconfig.debug source security/Kconfig diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index c95c5cb..ae89e63 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -36,7 +36,7 @@ TEXT_OFFSET := 0x0008 export TEXT_OFFSET GZFLAGS -core-y += arch/arm64/kernel/ arch/arm64/mm/ +core-y += arch/arm64/kernel/ arch/arm64/mm/ arch/arm64/kvm/ libs-y := arch/arm64/lib/ $(libs-y) libs-y += $(LIBGCC) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig new file mode 100644 index 000..a76be8b --- /dev/null +++ b/arch/arm64/kvm/Kconfig @@ -0,0 +1,59 @@ +# +# KVM configuration +# + +source virt/kvm/Kconfig + +menuconfig VIRTUALIZATION + bool Virtualization + ---help--- + Say Y here to get to see options for using your Linux host to run + other operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled. + +if VIRTUALIZATION + +config KVM + bool Kernel-based Virtual Machine (KVM) support + select PREEMPT_NOTIFIERS + select ANON_INODES + select KVM_MMIO + select KVM_ARM_HOST + select KVM_ARM_VGIC + select KVM_ARM_TIMER + ---help--- + Support hosting virtualized guest machines. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + If unsure, say N. + +config KVM_ARM_HOST + bool + depends on KVM + depends on MMU + select MMU_NOTIFIER + ---help--- + Provides host support for ARM processors. + +config KVM_ARM_VGIC +bool + depends on KVM_ARM_HOST OF + select HAVE_KVM_IRQCHIP + ---help--- + Adds support for a hardware assisted, in-kernel GIC emulation. + +config KVM_ARM_TIMER +bool + depends on KVM_ARM_VGIC + select HAVE_KVM_IRQCHIP + ---help--- + Adds support for the Architected Timers in virtual machines + +source drivers/virtio/Kconfig + +endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile new file mode 100644 index 000..14ba38d --- /dev/null +++ b/arch/arm64/kvm/Makefile @@ -0,0 +1,18 @@ +# +# Makefile for Kernel-based Virtual Machine module +# + +ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm +CFLAGS_arm.o := -I. +CFLAGS_mmu.o := -I. + +obj-$(CONFIG_KVM_ARM_HOST) += $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) + +obj-$(CONFIG_KVM_ARM_HOST) += $(addprefix ../../../arch/arm/kvm/, arm.o mmu.o mmio.o psci.o perf.o) + +obj-$(CONFIG_KVM_ARM_HOST) += inject_fault.o +obj-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o idmap.o +obj-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_a57.o + +obj-$(CONFIG_KVM_ARM_VGIC) += $(addprefix ../../../arch/arm/kvm/, vgic.o) +obj-$(CONFIG_KVM_ARM_TIMER) += $(addprefix ../../../arch/arm/kvm/, arch_timer.o) -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 20/29] arm64: KVM: PSCI implementation
Wire the PSCI backend into the exit handling code. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/include/asm/kvm_psci.h | 23 +++ arch/arm64/include/uapi/asm/kvm.h | 16 arch/arm64/kvm/handle_exit.c | 16 +++- 4 files changed, 47 insertions(+), 10 deletions(-) create mode 100644 arch/arm64/include/asm/kvm_psci.h diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 85e706b..68558ac 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -34,7 +34,7 @@ #include asm/kvm_vgic.h #include asm/kvm_arch_timer.h -#define KVM_VCPU_MAX_FEATURES 0 +#define KVM_VCPU_MAX_FEATURES 1 /* We don't currently support large pages. */ #define KVM_HPAGE_GFN_SHIFT(x) 0 diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h new file mode 100644 index 000..d96f054 --- /dev/null +++ b/arch/arm64/include/asm/kvm_psci.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier marc.zyng...@arm.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#ifndef __ARM64_KVM_PSCI_H__ +#define __ARM64_KVM_PSCI_H__ + +bool kvm_psci_call(struct kvm_vcpu *vcpu); + +#endif /* __ARM64_KVM_PSCI_H__ */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index fffeb11..24c8318 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -64,6 +64,8 @@ struct kvm_regs { #define KVM_VGIC_V2_DIST_SIZE 0x1000 #define KVM_VGIC_V2_CPU_SIZE 0x2000 +#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ + struct kvm_vcpu_init { __u32 target; __u32 features[7]; @@ -163,6 +165,20 @@ struct kvm_arch_memory_slot { /* Highest supported SPI, from VGIC_NR_IRQS */ #define KVM_ARM_IRQ_GIC_MAX127 +/* PSCI interface */ +#define KVM_PSCI_FN_BASE 0x95c1ba5e +#define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n)) + +#define KVM_PSCI_FN_CPU_SUSPENDKVM_PSCI_FN(0) +#define KVM_PSCI_FN_CPU_OFFKVM_PSCI_FN(1) +#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2) +#define KVM_PSCI_FN_MIGRATEKVM_PSCI_FN(3) + +#define KVM_PSCI_RET_SUCCESS 0 +#define KVM_PSCI_RET_NI((unsigned long)-1) +#define KVM_PSCI_RET_INVAL ((unsigned long)-2) +#define KVM_PSCI_RET_DENIED((unsigned long)-3) + #endif #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 0e1fa4e..fa38230 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -24,26 +24,24 @@ #include asm/kvm_emulate.h #include asm/kvm_coproc.h #include asm/kvm_mmu.h +#include asm/kvm_psci.h typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - /* -* Guest called HVC instruction: -* Let it know we don't want that by injecting an undefined exception. -*/ - kvm_debug(hvc: %x (at %08lx), kvm_vcpu_get_hsr(vcpu) ((1 16) - 1), - *vcpu_pc(vcpu)); - kvm_debug( HSR: %8x, kvm_vcpu_get_hsr(vcpu)); + if (kvm_psci_call(vcpu)) + return 1; + kvm_inject_undefined(vcpu); return 1; } static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - /* We don't support SMC; don't do that. */ - kvm_debug(smc: at %08lx, *vcpu_pc(vcpu)); + if (kvm_psci_call(vcpu)) + return 1; + kvm_inject_undefined(vcpu); return 1; } -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 18/29] arm64: KVM: Plug the VGIC
Shouldn't be needed - a complete duplicate from arch/arm. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/include/asm/kvm_vgic.h | 156 ++ 1 file changed, 156 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_vgic.h diff --git a/arch/arm64/include/asm/kvm_vgic.h b/arch/arm64/include/asm/kvm_vgic.h new file mode 100644 index 000..f353f22 --- /dev/null +++ b/arch/arm64/include/asm/kvm_vgic.h @@ -0,0 +1,156 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier marc.zyng...@arm.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#ifndef __ARM64_KVM_VGIC_H +#define __ARM64_KVM_VGIC_H + +#include linux/kernel.h +#include linux/kvm.h +#include linux/irqreturn.h +#include linux/spinlock.h +#include linux/types.h +#include linux/irqchip/arm-gic.h + +#define VGIC_NR_IRQS 128 +#define VGIC_NR_SGIS 16 +#define VGIC_NR_PPIS 16 +#define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) +#define VGIC_NR_SHARED_IRQS(VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS) +#define VGIC_MAX_CPUS KVM_MAX_VCPUS + +/* Sanity checks... */ +#if (VGIC_MAX_CPUS 8) +#error Invalid number of CPU interfaces +#endif + +#if (VGIC_NR_IRQS 31) +#error VGIC_NR_IRQS must be a multiple of 32 +#endif + +#if (VGIC_NR_IRQS 1024) +#error VGIC_NR_IRQS must be = 1024 +#endif + +/* + * The GIC distributor registers describing interrupts have two parts: + * - 32 per-CPU interrupts (SGI + PPI) + * - a bunch of shared interrupts (SPI) + */ +struct vgic_bitmap { + union { + u32 reg[VGIC_NR_PRIVATE_IRQS / 32]; + DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS); + } percpu[VGIC_MAX_CPUS]; + union { + u32 reg[VGIC_NR_SHARED_IRQS / 32]; + DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS); + } shared; +}; + +struct vgic_bytemap { + u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4]; + u32 shared[VGIC_NR_SHARED_IRQS / 4]; +}; + +struct vgic_dist { + spinlock_t lock; + boolready; + + /* Virtual control interface mapping */ + void __iomem*vctrl_base; + + /* Distributor and vcpu interface mapping in the guest */ + phys_addr_t vgic_dist_base; + phys_addr_t vgic_cpu_base; + + /* Distributor enabled */ + u32 enabled; + + /* Interrupt enabled (one bit per IRQ) */ + struct vgic_bitmap irq_enabled; + + /* Interrupt 'pin' level */ + struct vgic_bitmap irq_state; + + /* Level-triggered interrupt in progress */ + struct vgic_bitmap irq_active; + + /* Interrupt priority. Not used yet. */ + struct vgic_bytemap irq_priority; + + /* Level/edge triggered */ + struct vgic_bitmap irq_cfg; + + /* Source CPU per SGI and target CPU */ + u8 irq_sgi_sources[VGIC_MAX_CPUS][16]; + + /* Target CPU for each IRQ */ + u8 irq_spi_cpu[VGIC_NR_SHARED_IRQS]; + struct vgic_bitmap irq_spi_target[VGIC_MAX_CPUS]; + + /* Bitmap indicating which CPU has something pending */ + unsigned long irq_pending_on_cpu; +}; + +struct vgic_cpu { + /* per IRQ to LR mapping */ + u8 vgic_irq_lr_map[VGIC_NR_IRQS]; + + /* Pending interrupts on this VCPU */ + DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); + DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS); + + /* Bitmap of used/free list registers */ + DECLARE_BITMAP( lr_used, 64); + + /* Number of list registers on this CPU */ + int nr_lr; + + /* CPU vif control registers for world switch */ + u32 vgic_hcr; + u32 vgic_vmcr; + u32 vgic_misr; /* Saved only */ + u32 vgic_eisr[2]; /* Saved only */ + u32 vgic_elrsr[2]; /* Saved only */ + u32 vgic_apr; + u32 vgic_lr[64];/* A15 has only 4... */ +}; + +#define LR_EMPTY 0xff + +struct kvm; +struct kvm_vcpu; +struct kvm_run; +struct kvm_exit_mmio; + +int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr); +int kvm_vgic_hyp_init(void); +int kvm_vgic_init(struct kvm *kvm); +int kvm_vgic_create(struct kvm *kvm); +int
[PATCH 13/29] arm64: KVM: MMIO access backend
Define the necessary structures to perform an MMIO access. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/include/asm/kvm_mmio.h | 59 +++ 1 file changed, 59 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_mmio.h diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h new file mode 100644 index 000..fc2f689 --- /dev/null +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall c.d...@virtualopensystems.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#ifndef __ARM64_KVM_MMIO_H__ +#define __ARM64_KVM_MMIO_H__ + +#include linux/kvm_host.h +#include asm/kvm_asm.h +#include asm/kvm_arm.h + +/* + * This is annoying. The mmio code requires this, even if we don't + * need any decoding. To be fixed. + */ +struct kvm_decode { + unsigned long rt; + bool sign_extend; +}; + +/* + * The in-kernel MMIO emulation code wants to use a copy of run-mmio, + * which is an anonymous type. Use our own type instead. + */ +struct kvm_exit_mmio { + phys_addr_t phys_addr; + u8 data[8]; + u32 len; + boolis_write; +}; + +static inline void kvm_prepare_mmio(struct kvm_run *run, + struct kvm_exit_mmio *mmio) +{ + run-mmio.phys_addr = mmio-phys_addr; + run-mmio.len = mmio-len; + run-mmio.is_write = mmio-is_write; + memcpy(run-mmio.data, mmio-data, mmio-len); + run-exit_reason= KVM_EXIT_MMIO; +} + +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); +int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, +phys_addr_t fault_ipa); + +#endif /* __ARM64_KVM_MMIO_H__ */ -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 16/29] arm64: KVM: HYP mode world switch implementation
The HYP mode world switch in all its glory. Implements save/restore of host/guest registers, EL2 trapping, IPA resolution, and additional services (tlb invalidation). Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/kernel/asm-offsets.c | 33 ++ arch/arm64/kvm/hyp.S| 756 2 files changed, 789 insertions(+) create mode 100644 arch/arm64/kvm/hyp.S diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index a2a4d81..a7f706a 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -21,6 +21,7 @@ #include linux/sched.h #include linux/mm.h #include linux/dma-mapping.h +#include linux/kvm_host.h #include asm/thread_info.h #include asm/memory.h #include asm/cputable.h @@ -104,5 +105,37 @@ int main(void) BLANK(); DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); + BLANK(); +#ifdef CONFIG_KVM_ARM_HOST + DEFINE(VCPU_REGS,offsetof(struct kvm_vcpu, arch.regs)); + DEFINE(VCPU_USER_PT_REGS,offsetof(struct kvm_regs, regs)); + DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest)); + DEFINE(VCPU_VFP_HOST,offsetof(struct kvm_vcpu, arch.vfp_host)); + DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); + DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); + DEFINE(VCPU_SP_EL1, offsetof(struct kvm_vcpu, arch.regs.sp_el1)); + DEFINE(VCPU_ELR_EL1, offsetof(struct kvm_vcpu, arch.regs.elr_el1)); + DEFINE(VCPU_SPSR,offsetof(struct kvm_vcpu, arch.regs.spsr)); + DEFINE(VCPU_SYSREGS, offsetof(struct kvm_vcpu, arch.sys_regs)); + DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); + DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); + DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); + DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); + DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); + DEFINE(KVM_TIMER_CNTVOFF,offsetof(struct kvm, arch.timer.cntvoff)); + DEFINE(KVM_TIMER_ENABLED,offsetof(struct kvm, arch.timer.enabled)); + DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); + DEFINE(VCPU_VGIC_CPU,offsetof(struct kvm_vcpu, arch.vgic_cpu)); + DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); + DEFINE(VGIC_CPU_VMCR,offsetof(struct vgic_cpu, vgic_vmcr)); + DEFINE(VGIC_CPU_MISR,offsetof(struct vgic_cpu, vgic_misr)); + DEFINE(VGIC_CPU_EISR,offsetof(struct vgic_cpu, vgic_eisr)); + DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr)); + DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr)); + DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr)); + DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); + DEFINE(KVM_VTTBR,offsetof(struct kvm, arch.vttbr)); + DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); +#endif return 0; } diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S new file mode 100644 index 000..cd7506d --- /dev/null +++ b/arch/arm64/kvm/hyp.S @@ -0,0 +1,756 @@ +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier marc.zyng...@arm.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#include linux/linkage.h +#include linux/irqchip/arm-gic.h + +#include asm/assembler.h +#include asm/memory.h +#include asm/asm-offsets.h +#include asm/fpsimdmacros.h +#include asm/kvm.h +#include asm/kvm_asm.h +#include asm/kvm_arm.h +#include asm/kvm_mmu.h + +#define REG_OFFSET(x) (VCPU_REGS + VCPU_USER_PT_REGS + 8*x) +#define SPSR_OFFSET(x) (VCPU_SPSR + 8*x) +#define SYSREG_OFFSET(x) (VCPU_SYSREGS + 8*x) + + .text + .pushsection.hyp.text, ax + .align PAGE_SHIFT + +__kvm_hyp_code_start: + .globl __kvm_hyp_code_start + +.macro save_host_regs + pushx19, x20 + pushx21, x22 + pushx23, x24 + pushx25, x26 + pushx27, x28 + pushx29, lr + + mrs x19, sp_el0 + mrs x20, sp_el1 + mrs x21, elr_el1 + mrs x22, spsr_el1 + mrs x23,
[PATCH 10/29] arm64: KVM: Cortex-A57 specific system registers handling
Add the support code for Cortex-A57 specific system registers. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/kvm/sys_regs_a57.c | 96 +++ 1 file changed, 96 insertions(+) create mode 100644 arch/arm64/kvm/sys_regs_a57.c diff --git a/arch/arm64/kvm/sys_regs_a57.c b/arch/arm64/kvm/sys_regs_a57.c new file mode 100644 index 000..dcc88fe --- /dev/null +++ b/arch/arm64/kvm/sys_regs_a57.c @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier marc.zyng...@arm.com + * + * Based on arch/arm/kvm/coproc_a15.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Authors: Rusty Russell ru...@rustcorp.au + * Christoffer Dall c.d...@virtualopensystems.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ +#include linux/kvm_host.h +#include asm/cputype.h +#include asm/kvm_arm.h +#include asm/kvm_asm.h +#include asm/kvm_host.h +#include asm/kvm_emulate.h +#include asm/kvm_coproc.h +#include linux/init.h + +#include sys_regs.h + +#define MPIDR_EL1_AFF0_MASK0xff + +static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + /* +* Simply map the vcpu_id into the Aff0 field of the MPIDR. +*/ + vcpu-arch.sys_regs[MPIDR_EL1] = (1 31) | (vcpu-vcpu_id MPIDR_EL1_AFF0_MASK); +} + +static bool access_actlr(struct kvm_vcpu *vcpu, +const struct sys_reg_params *p, +const struct sys_reg_desc *r) +{ + if (p-is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p-Rt) = vcpu-arch.sys_regs[ACTLR_EL1]; + return true; +} + +static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + u64 actlr; + + asm volatile(mrs %0, actlr_el1\n : =r (actlr)); + vcpu-arch.sys_regs[ACTLR_EL1] = actlr; +} + +/* + * A57-specific sys-reg registers. + * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 + */ +static const struct sys_reg_desc a57_sys_regs[] = { + { Op0(0b11), Op1(0b000), CRn(0b), CRm(0b), Op2(0b101), /* MPIDR_EL1 */ + NULL, reset_mpidr, MPIDR_EL1 }, + { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b), Op2(0b000), /* SCTLR_EL1 */ + NULL, reset_val, SCTLR_EL1, 0x00C50078 }, + { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b), Op2(0b001), /* ACTLR_EL1 */ + access_actlr, reset_actlr, ACTLR_EL1 }, + { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b), Op2(0b010), /* CPACR_EL1 */ + NULL, reset_val, CPACR_EL1, 0 }, +}; + +static struct kvm_sys_reg_target_table a57_target_table = { + .target = KVM_ARM_TARGET_CORTEX_A57, + .table64 = { + .table = a57_sys_regs, + .num = ARRAY_SIZE(a57_sys_regs), + }, +}; + +static int __init sys_reg_a57_init(void) +{ + unsigned int i; + + for (i = 1; i ARRAY_SIZE(a57_sys_regs); i++) + BUG_ON(cmp_sys_reg(a57_sys_regs[i-1], + a57_sys_regs[i]) = 0); + + kvm_register_target_sys_reg_table(a57_target_table); + return 0; +} +late_initcall(sys_reg_a57_init); -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 12/29] arm64: KVM: kvm_arch and kvm_vcpu_arch definitions
Provide the architecture dependent structures for VM and vcpu abstractions. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/include/asm/kvm_host.h | 178 ++ 1 file changed, 178 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_host.h diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h new file mode 100644 index 000..d1095d1 --- /dev/null +++ b/arch/arm64/include/asm/kvm_host.h @@ -0,0 +1,178 @@ +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier marc.zyng...@arm.com + * + * Derived from arch/arm/include/asm/kvm_host.h: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall c.d...@virtualopensystems.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#ifndef __ARM64_KVM_HOST_H__ +#define __ARM64_KVM_HOST_H__ + +#include asm/kvm.h +#include asm/kvm_asm.h +#include asm/kvm_mmio.h + +#define KVM_MAX_VCPUS 4 +#define KVM_USER_MEM_SLOTS 32 +#define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + +#include asm/kvm_vgic.h +#include asm/kvm_arch_timer.h + +#define KVM_VCPU_MAX_FEATURES 0 + +/* We don't currently support large pages. */ +#define KVM_HPAGE_GFN_SHIFT(x) 0 +#define KVM_NR_PAGE_SIZES 1 +#define KVM_PAGES_PER_HPAGE(x) (1UL31) + +struct kvm_vcpu; +int kvm_target_cpu(void); +int kvm_reset_vcpu(struct kvm_vcpu *vcpu); + +struct kvm_arch { + /* The VMID generation used for the virt. memory system */ + u64vmid_gen; + u32vmid; + + /* 1-level 2nd stage table and lock */ + spinlock_t pgd_lock; + pgd_t *pgd; + + /* VTTBR value associated with above pgd and vmid */ + u64vttbr; + + /* Interrupt controller */ + struct vgic_distvgic; + + /* Timer */ + struct arch_timer_kvm timer; +}; + +#define KVM_NR_MEM_OBJS 40 + +/* + * We don't want allocation failures within the mmu code, so we preallocate + * enough memory for a single page fault in a cache. + */ +struct kvm_mmu_memory_cache { + int nobjs; + void *objects[KVM_NR_MEM_OBJS]; +}; + +struct kvm_vcpu_fault_info { + u32 esr_el2;/* Hyp Syndrom Register */ + u64 far_el2;/* Hyp Fault Address Register */ + u64 hpfar_el2; /* Hyp IPA Fault Address Register */ +}; + +typedef struct user_fpsimd_state kvm_kernel_vfp_t; + +struct kvm_vcpu_arch { + struct kvm_regs regs; + u64 sys_regs[NR_SYS_REGS]; + + /* HYP configuration */ + u64 hcr_el2; + + /* Exception Information */ + struct kvm_vcpu_fault_info fault; + + /* Floating point registers (VFP and Advanced SIMD/NEON) */ + kvm_kernel_vfp_t vfp_guest; + kvm_kernel_vfp_t *vfp_host; + + /* VGIC state */ + struct vgic_cpu vgic_cpu; + struct arch_timer_cpu timer_cpu; + + /* +* Anything that is not used directly from assembly code goes +* here. +*/ + /* dcache set/way operation pending */ + int last_pcpu; + cpumask_t require_dcache_flush; + + /* Don't run the guest */ + bool pause; + + /* IO related fields */ + struct kvm_decode mmio_decode; + + /* Interrupt related fields */ + u64 irq_lines; /* IRQ and FIQ levels */ + + /* Cache some mmu pages needed inside spinlock regions */ + struct kvm_mmu_memory_cache mmu_page_cache; + + /* Target CPU and feature flags */ + u32 target; + DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); + + /* Detect first run of a vcpu */ + bool has_run_once; +}; + +struct kvm_vm_stat { + u32 remote_tlb_flush; +}; + +struct kvm_vcpu_stat { + u32 halt_wakeup; +}; + +struct kvm_vcpu_init; +int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init); +unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); +int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); +struct kvm_one_reg; +int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); + +#define KVM_ARCH_WANT_MMU_NOTIFIER +struct kvm; +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +int kvm_unmap_hva_range(struct kvm *kvm, + unsigned long start, unsigned long end); +void
[PATCH 14/29] arm64: KVM: guest one-reg interface
Let userspace play with the guest registers. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/kvm/guest.c | 240 + 1 file changed, 240 insertions(+) create mode 100644 arch/arm64/kvm/guest.c diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c new file mode 100644 index 000..2a8aaf8 --- /dev/null +++ b/arch/arm64/kvm/guest.c @@ -0,0 +1,240 @@ +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier marc.zyng...@arm.com + * + * Derived from arch/arm/kvm/guest.c: + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall c.d...@virtualopensystems.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#include linux/errno.h +#include linux/err.h +#include linux/kvm_host.h +#include linux/module.h +#include linux/vmalloc.h +#include linux/fs.h +#include asm/cputype.h +#include asm/uaccess.h +#include asm/kvm.h +#include asm/kvm_asm.h +#include asm/kvm_emulate.h +#include asm/kvm_coproc.h + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { NULL } +}; + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + vcpu-arch.hcr_el2 = HCR_GUEST_FLAGS; + return 0; +} + +static u64 core_reg_offset_from_id(u64 id) +{ + return id ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); +} + +static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + unsigned long __user *uaddr = (unsigned long __user *)(unsigned long)reg-addr; + struct kvm_regs *regs = vcpu-arch.regs; + u64 off; + + if (KVM_REG_SIZE(reg-id) != sizeof(unsigned long)) + return -ENOENT; + + /* Our ID is an index into the kvm_regs struct. */ + off = core_reg_offset_from_id(reg-id); + if (off = sizeof(*regs) / KVM_REG_SIZE(reg-id)) + return -ENOENT; + + return put_user(((unsigned long *)regs)[off], uaddr); +} + +static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + unsigned long __user *uaddr = (unsigned long __user *)(unsigned long)reg-addr; + struct kvm_regs *regs = vcpu-arch.regs; + u64 off, val; + + if (KVM_REG_SIZE(reg-id) != sizeof(unsigned long)) + return -ENOENT; + + /* Our ID is an index into the kvm_regs struct. */ + off = core_reg_offset_from_id(reg-id); + if (off = sizeof(*regs) / KVM_REG_SIZE(reg-id)) + return -ENOENT; + + if (get_user(val, uaddr) != 0) + return -EFAULT; + + if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { + unsigned long mode = val COMPAT_PSR_MODE_MASK; + switch (mode) { + case PSR_MODE_EL0t: + case PSR_MODE_EL1t: + case PSR_MODE_EL1h: + break; + default: + return -EINVAL; + } + } + + ((unsigned long *)regs)[off] = val; + return 0; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + return -EINVAL; +} + +static unsigned long num_core_regs(void) +{ + return sizeof(struct kvm_regs) / sizeof(unsigned long); +} + +/** + * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG + * + * This is for all registers. + */ +unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) +{ + return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu); +} + +/** + * kvm_arm_copy_reg_indices - get indices of all registers. + * + * We do core registers right here, then we apppend system regs. + */ +int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + unsigned int i; + const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE; + + for (i = 0; i sizeof(struct kvm_regs)/sizeof(unsigned long); i++) { + if (put_user(core_reg | i, uindices)) + return -EFAULT; + uindices++; + } + + return kvm_arm_copy_sys_reg_indices(vcpu, uindices); +} + +int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + /* We currently use nothing arch-specific in upper 32 bits */ + if ((reg-id ~KVM_REG_SIZE_MASK) 32 != KVM_REG_ARM64 32) + return -EINVAL; + +
[PATCH 19/29] arm64: KVM: Plug the arch timer
Shouldn't be needed - a complete duplicate from arch/arm. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm/kvm/arch_timer.c | 1 + arch/arm64/include/asm/kvm_arch_timer.h | 58 + 2 files changed, 59 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_arch_timer.h diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c index 6ac938d..ca04a99 100644 --- a/arch/arm/kvm/arch_timer.c +++ b/arch/arm/kvm/arch_timer.c @@ -194,6 +194,7 @@ static struct notifier_block kvm_timer_cpu_nb = { static const struct of_device_id arch_timer_of_match[] = { { .compatible = arm,armv7-timer,}, + { .compatible = arm,armv8-timer,}, {}, }; diff --git a/arch/arm64/include/asm/kvm_arch_timer.h b/arch/arm64/include/asm/kvm_arch_timer.h new file mode 100644 index 000..eb02273 --- /dev/null +++ b/arch/arm64/include/asm/kvm_arch_timer.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier marc.zyng...@arm.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#ifndef __ARM64_KVM_ARCH_TIMER_H +#define __ARM64_KVM_ARCH_TIMER_H + +#include linux/clocksource.h +#include linux/hrtimer.h +#include linux/workqueue.h + +struct arch_timer_kvm { + /* Is the timer enabled */ + boolenabled; + + /* Virtual offset, restored only */ + cycle_t cntvoff; +}; + +struct arch_timer_cpu { + /* Background timer used when the guest is not running */ + struct hrtimer timer; + + /* Work queued with the above timer expires */ + struct work_struct expired; + + /* Background timer active */ + boolarmed; + + /* Timer IRQ */ + const struct kvm_irq_level *irq; + + /* Registers: control register, timer value */ + u32 cntv_ctl; /* Saved/restored */ + cycle_t cntv_cval; /* Saved/restored */ +}; + +int kvm_timer_hyp_init(void); +int kvm_timer_init(struct kvm *kvm); +void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); +void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); +void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); +void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); + +#endif -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 24/29] arm64: KVM: 32bit conditional execution emulation
As conditionnal instructions can trap on AArch32, add the thinest possible emulation layer to keep 32bit guests happy. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/include/asm/kvm_emulate.h | 13 ++- arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/emulate.c | 154 +++ 3 files changed, 166 insertions(+), 3 deletions(-) create mode 100644 arch/arm64/kvm/emulate.c diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 2e72a4f..4d5e0ee 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -31,6 +31,9 @@ unsigned long *vcpu_reg32(struct kvm_vcpu *vcpu, u8 reg_num); unsigned long *vcpu_spsr32(struct kvm_vcpu *vcpu); +bool kvm_condition_valid32(struct kvm_vcpu *vcpu); +void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr); + void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); @@ -52,12 +55,18 @@ static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) static inline bool kvm_condition_valid(struct kvm_vcpu *vcpu) { - return true;/* No conditionals on arm64 */ + if (vcpu_mode_is_32bit(vcpu)) + return kvm_condition_valid32(vcpu); + + return true; } static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) { - *vcpu_pc(vcpu) += 4; + if (vcpu_mode_is_32bit(vcpu)) + kvm_skip_instr32(vcpu, is_wide_instr); + else + *vcpu_pc(vcpu) += 4; } static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 50f9da0..a6ba0d8 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_KVM_ARM_HOST) += $(addprefix ../../../virt/kvm/, kvm_main.o coalesc obj-$(CONFIG_KVM_ARM_HOST) += $(addprefix ../../../arch/arm/kvm/, arm.o mmu.o mmio.o psci.o perf.o) -obj-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o +obj-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o obj-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o idmap.o obj-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_a57.o diff --git a/arch/arm64/kvm/emulate.c b/arch/arm64/kvm/emulate.c new file mode 100644 index 000..6b3dbc3 --- /dev/null +++ b/arch/arm64/kvm/emulate.c @@ -0,0 +1,154 @@ +/* + * (not much of an) Emulation layer for 32bit guests. + * + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall c.d...@virtualopensystems.com + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#include linux/kvm_host.h +#include asm/kvm_emulate.h + +/* + * stolen from arch/arm/kernel/opcodes.c + * + * condition code lookup table + * index into the table is test code: EQ, NE, ... LT, GT, AL, NV + * + * bit position in short is condition code: NZCV + */ +static const unsigned short cc_map[16] = { + 0xF0F0, /* EQ == Z set*/ + 0x0F0F, /* NE */ + 0x, /* CS == C set*/ + 0x, /* CC */ + 0xFF00, /* MI == N set*/ + 0x00FF, /* PL */ + 0x, /* VS == V set*/ + 0x, /* VC */ + 0x0C0C, /* HI == C set Z clear */ + 0xF3F3, /* LS == C clear || Z set */ + 0xAA55, /* GE == (N==V) */ + 0x55AA, /* LT == (N!=V) */ + 0x0A05, /* GT == (!Z (N==V)) */ + 0xF5FA, /* LE == (Z || (N!=V))*/ + 0x, /* AL always */ + 0 /* NV */ +}; + +static int kvm_vcpu_get_condition(struct kvm_vcpu *vcpu) +{ + u32 esr = kvm_vcpu_get_hsr(vcpu); + + if (esr ESR_EL2_CV) + return (esr ESR_EL2_COND) ESR_EL2_COND_SHIFT; + + return -1; +} + +/* + * Check if a trapped instruction should have been executed or not. + */ +bool kvm_condition_valid32(struct kvm_vcpu *vcpu) +{ + unsigned long cpsr; +
[PATCH 25/29] arm64: KVM: 32bit handling of coprocessor traps
Provide the necessary infrastructure to trap coprocessor accesses that occur when running 32bit guests. Also wire SMC and HVC trapped in 32bit mode while were at it. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/include/asm/kvm_coproc.h | 5 ++ arch/arm64/kvm/handle_exit.c| 7 ++ arch/arm64/kvm/sys_regs.c | 165 ++-- 3 files changed, 170 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h index e791894..0378be9 100644 --- a/arch/arm64/include/asm/kvm_coproc.h +++ b/arch/arm64/include/asm/kvm_coproc.h @@ -33,10 +33,15 @@ struct kvm_sys_reg_table { struct kvm_sys_reg_target_table { unsigned target; struct kvm_sys_reg_table table64; + struct kvm_sys_reg_table table32; }; void kvm_register_target_sys_reg_table(struct kvm_sys_reg_target_table *table); +int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run); #define kvm_coproc_table_init kvm_sys_reg_table_init diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index fa38230..3e61dcb 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -62,6 +62,13 @@ static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) static exit_handle_fn arm_exit_handlers[] = { [ESR_EL2_EC_WFI]= kvm_handle_wfi, + [ESR_EL2_EC_CP15_32]= kvm_handle_cp15_32, + [ESR_EL2_EC_CP15_64]= kvm_handle_cp15_64, + [ESR_EL2_EC_CP14_MR]= kvm_handle_cp14_access, + [ESR_EL2_EC_CP14_LS]= kvm_handle_cp14_load_store, + [ESR_EL2_EC_CP14_64]= kvm_handle_cp14_access, + [ESR_EL2_EC_HVC32] = handle_hvc, + [ESR_EL2_EC_SMC32] = handle_smc, [ESR_EL2_EC_HVC64] = handle_hvc, [ESR_EL2_EC_SMC64] = handle_smc, [ESR_EL2_EC_SYS64] = kvm_handle_sys_reg, diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 9fc8c17..1b1cb21 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -38,6 +38,10 @@ * types are different. My gut feeling is that it should be pretty * easy to merge, but that would be an ABI breakage -- again. VFP * would also need to be abstracted. + * + * For AArch32, we only take care of what is being trapped. Anything + * that has to do with init and userspace access has to go via the + * 64bit interface. */ /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ @@ -256,6 +260,36 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* TPIDRRO_EL0 */ { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b), Op2(0b011), NULL, reset_unknown, TPIDRRO_EL0 }, + + /* DACR32_EL2 */ + { Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b), Op2(0b000), + NULL, reset_unknown, DACR32_EL2 }, + /* IFSR32_EL2 */ + { Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b), Op2(0b001), + NULL, reset_unknown, IFSR32_EL2 }, +}; + +/* Trapped cp15 registers */ +static const struct sys_reg_desc cp15_regs[] = { + /* +* DC{C,I,CI}SW operations: +*/ + { Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw }, + { Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw }, + { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw }, + { Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 3), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 5), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 6), pm_fake }, + { Op1( 0), CRn( 9), CRm(12), Op2( 7), pm_fake }, + { Op1( 0), CRn( 9), CRm(13), Op2( 0), pm_fake }, + { Op1( 0), CRn( 9), CRm(13), Op2( 1), pm_fake }, + { Op1( 0), CRn( 9), CRm(13), Op2( 2), pm_fake }, + { Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake }, + { Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake }, + { Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake }, }; /* Target specific emulation tables */ @@ -267,13 +301,20 @@ void kvm_register_target_sys_reg_table(struct kvm_sys_reg_target_table *table) } /* Get specific register table for this target. */ -static const struct sys_reg_desc *get_target_table(unsigned target, size_t *num) +static const struct sys_reg_desc *get_target_table(unsigned target, + bool mode_is_64, + size_t *num) { struct kvm_sys_reg_target_table *table; table = target_tables[target]; - *num =
[PATCH 26/29] arm64: KVM: 32bit coprocessor access for Cortex-A57
Enable handling of 32bit coprocessor traps for Cortex-A57. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/kvm/sys_regs_a57.c | 22 ++ 1 file changed, 22 insertions(+) diff --git a/arch/arm64/kvm/sys_regs_a57.c b/arch/arm64/kvm/sys_regs_a57.c index dcc88fe..56c0641 100644 --- a/arch/arm64/kvm/sys_regs_a57.c +++ b/arch/arm64/kvm/sys_regs_a57.c @@ -59,6 +59,17 @@ static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) vcpu-arch.sys_regs[ACTLR_EL1] = actlr; } +static bool access_ectlr(struct kvm_vcpu *vcpu, +const struct sys_reg_params *p, +const struct sys_reg_desc *r) +{ + if (p-is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p-Rt) = 0; + return true; +} + /* * A57-specific sys-reg registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 @@ -74,12 +85,23 @@ static const struct sys_reg_desc a57_sys_regs[] = { NULL, reset_val, CPACR_EL1, 0 }, }; +static const struct sys_reg_desc a57_cp15_regs[] = { + { Op1(0b000), CRn(0b0001), CRm(0b), Op2(0b001), /* ACTLR */ + access_actlr }, + { Op1(0b001), CRn(0b), CRm(0b), Op2(0b000), /* ECTLR */ + access_ectlr }, +}; + static struct kvm_sys_reg_target_table a57_target_table = { .target = KVM_ARM_TARGET_CORTEX_A57, .table64 = { .table = a57_sys_regs, .num = ARRAY_SIZE(a57_sys_regs), }, + .table32 = { + .table = a57_cp15_regs, + .num = ARRAY_SIZE(a57_cp15_regs), + }, }; static int __init sys_reg_a57_init(void) -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 27/29] arm64: KVM: 32bit specific register world switch
Allow registers specific to 32bit guests to be saved/restored during the world switch. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/kvm/hyp.S | 70 1 file changed, 70 insertions(+) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index cd7506d..1d4b0a7 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -312,6 +312,74 @@ __kvm_hyp_code_start: load_sysregs .endm +.macro skip_32bit_state tmp, target + // Skip 32bit state if not needed + mrs \tmp, hcr_el2 + tbnz\tmp, #HCR_RW_SHIFT, \target +.endm + +.macro skip_tee_state tmp, target + // Skip ThumbEE state if not needed + mrs \tmp, id_pfr0_el1 + tbz \tmp, #12, \target +.endm + +.macro save_guest_32bit_state + skip_32bit_state x2, 1f + + add x2, x0, #SPSR_OFFSET(KVM_SPSR_ABT) + mrs x4, spsr_abt + mrs x5, spsr_und + mrs x6, spsr_irq + mrs x7, spsr_fiq + stp x4, x5, [x2], #16 + stp x6, x7, [x2] + + add x2, x0, #SYSREG_OFFSET(DACR32_EL2) + mrs x4, dacr32_el2 + mrs x5, ifsr32_el2 + mrs x6, fpexc32_el2 + mrs x7, dbgvcr32_el2 + stp x4, x5, [x2], #16 + stp x6, x7, [x2] + + skip_tee_state x8, 1f + + add x2, x0, #SYSREG_OFFSET(TEECR32_EL1) + mrs x4, teecr32_el1 + mrs x5, teehbr32_el1 + stp x4, x5, [x2] +1: +.endm + +.macro restore_guest_32bit_state + skip_32bit_state x2, 1f + + add x2, x0, #SPSR_OFFSET(KVM_SPSR_ABT) + ldp x4, x5, [x2], #16 + ldp x6, x7, [x2] + msr spsr_abt, x4 + msr spsr_und, x5 + msr spsr_irq, x6 + msr spsr_fiq, x7 + + add x2, x0, #SYSREG_OFFSET(DACR32_EL2) + ldp x4, x5, [x2], #16 + ldp x6, x7, [x2] + msr dacr32_el2, x4 + msr ifsr32_el2, x5 + msr fpexc32_el2, x6 + msr dbgvcr32_el2, x7 + + skip_tee_state x8, 1f + + add x2, x0, #SYSREG_OFFSET(TEECR32_EL1) + ldp x4, x5, [x2] + msr teecr32_el1, x4 + msr teehbr32_el1, x5 +1: +.endm + .macro activate_traps ldr x2, [x0, #VCPU_IRQ_LINES] ldr x1, [x0, #VCPU_HCR_EL2] @@ -513,6 +581,7 @@ ENTRY(__kvm_vcpu_run) restore_timer_state restore_guest_sysregs restore_guest_fpsimd + restore_guest_32bit_state restore_guest_regs // That's it, no more messing around. @@ -523,6 +592,7 @@ __kvm_vcpu_return: // Assume x0 is the vcpu pointer, x1 the return code // Guest's x0-x3 are on the stack save_guest_regs + save_guest_32bit_state save_guest_fpsimd save_guest_sysregs save_timer_state -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 28/29] arm64: KVM: 32bit guest fault injection
Add fault injection capability for 32bit guests. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/kvm/inject_fault.c | 79 ++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 80b245f..85a4548 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -1,5 +1,5 @@ /* - * Fault injection for 64bit guests. + * Fault injection for both 32 and 64bit guests. * * Copyright (C) 2012 - ARM Ltd * Author: Marc Zyngier marc.zyng...@arm.com @@ -24,6 +24,74 @@ #include linux/kvm_host.h #include asm/kvm_emulate.h +static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) +{ + unsigned long cpsr; + unsigned long new_spsr_value = *vcpu_cpsr(vcpu); + bool is_thumb = (new_spsr_value COMPAT_PSR_T_BIT); + u32 return_offset = (is_thumb) ? 4 : 0; + u32 sctlr = vcpu-arch.cp15[c1_SCTLR]; + + cpsr = mode | COMPAT_PSR_I_BIT; + + if (sctlr (1 30)) + cpsr |= COMPAT_PSR_T_BIT; + if (sctlr (1 25)) + cpsr |= COMPAT_PSR_E_BIT; + + *vcpu_cpsr(vcpu) = cpsr; + + /* Note: These now point to the banked copies */ + *vcpu_spsr(vcpu) = new_spsr_value; + *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; + + /* Branch to exception vector */ + if (sctlr (1 13)) + vect_offset += 0x; + else /* always have security exceptions */ + vect_offset += vcpu-arch.cp15[c12_VBAR]; + + *vcpu_pc(vcpu) = vect_offset; +} + +static void inject_undef32(struct kvm_vcpu *vcpu) +{ + prepare_fault32(vcpu, COMPAT_PSR_MODE_UND, 4); +} + +/* + * Modelled after TakeDataAbortException() and TakePrefetchAbortException + * pseudocode. + */ +static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, +unsigned long addr) +{ + u32 vect_offset; + u32 *far, *fsr; + bool is_lpae; + + if (is_pabt) { + vect_offset = 12; + far = vcpu-arch.cp15[c6_IFAR]; + fsr = vcpu-arch.cp15[c5_IFSR]; + } else { /* !iabt */ + vect_offset = 16; + far = vcpu-arch.cp15[c6_DFAR]; + fsr = vcpu-arch.cp15[c5_DFSR]; + } + + prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, vect_offset); + + *far = addr; + + /* Always give debug fault for now - should give guest a clue */ + is_lpae = (vcpu-arch.cp15[c2_TTBCR] 31); + if (is_lpae) + *fsr = 1 9 | 0x22; + else + *fsr = 2; +} + static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); @@ -89,6 +157,9 @@ static void inject_undef64(struct kvm_vcpu *vcpu) */ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) { + if (!(vcpu-arch.hcr_el2 HCR_RW)) + inject_abt32(vcpu, false, addr); + inject_abt64(vcpu, false, addr); } @@ -102,6 +173,9 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) */ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) { + if (!(vcpu-arch.hcr_el2 HCR_RW)) + inject_abt32(vcpu, true, addr); + inject_abt64(vcpu, true, addr); } @@ -113,5 +187,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) */ void kvm_inject_undefined(struct kvm_vcpu *vcpu) { + if (!(vcpu-arch.hcr_el2 HCR_RW)) + inject_undef32(vcpu); + inject_undef64(vcpu); } -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 29/29] arm64: KVM: enable initialization of a 32bit vcpu
Wire the init of a 32bit vcpu by allowing 32bit modes in pstate, and providing sensible defaults out of reset state. This feature is of course conditionned by the presence of 32bit capability on the physical CPU. Signed-off-by: Marc Zyngier marc.zyng...@arm.com --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/include/uapi/asm/kvm.h | 1 + arch/arm64/kvm/guest.c| 6 ++ arch/arm64/kvm/reset.c| 22 +- 4 files changed, 29 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 24dc8d7..0f17cfe 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -34,7 +34,7 @@ #include asm/kvm_vgic.h #include asm/kvm_arch_timer.h -#define KVM_VCPU_MAX_FEATURES 1 +#define KVM_VCPU_MAX_FEATURES 2 /* We don't currently support large pages. */ #define KVM_HPAGE_GFN_SHIFT(x) 0 diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index f9c269e..813427f 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -70,6 +70,7 @@ struct kvm_regs { #define KVM_VGIC_V2_CPU_SIZE 0x2000 #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ +#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ struct kvm_vcpu_init { __u32 target; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 2a8aaf8..123703d 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -84,6 +84,12 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { unsigned long mode = val COMPAT_PSR_MODE_MASK; switch (mode) { + case COMPAT_PSR_MODE_USR: + case COMPAT_PSR_MODE_FIQ: + case COMPAT_PSR_MODE_IRQ: + case COMPAT_PSR_MODE_SVC: + case COMPAT_PSR_MODE_ABT: + case COMPAT_PSR_MODE_UND: case PSR_MODE_EL0t: case PSR_MODE_EL1t: case PSR_MODE_EL1h: diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 3ac2f20..411659e 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -35,6 +35,19 @@ static struct kvm_regs default_regs_reset = { .regs.pstate = PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; +static struct kvm_regs default_regs_reset32 = { + .regs.pstate = (COMPAT_PSR_MODE_SVC | COMPAT_PSR_A_BIT | + COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT), +}; + +static bool cpu_has_32bit_el1(void) +{ + u64 pfr0; + + pfr0 = read_cpuid(ID_AA64PFR0_EL1); + return !!(pfr0 0x20); +} + /** * kvm_reset_vcpu - sets core registers and sys_regs to reset value * @vcpu: The VCPU pointer @@ -49,7 +62,14 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) switch (vcpu-arch.target) { default: - cpu_reset = default_regs_reset; + if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu-arch.features)) { + if (!cpu_has_32bit_el1()) + return -EINVAL; + cpu_reset = default_regs_reset32; + vcpu-arch.hcr_el2 = ~HCR_RW; + } else { + cpu_reset = default_regs_reset; + } break; } -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html