Does macvtap support host to guest communication?
Hi, folks I am trying to use qemu/qemu-kvm with macvtap using following commands: # ip link add link eth0 name v0 type macvtap mode {vepa,bridge,private} # ip link set v0 address da:4e:17:88:42:b1 up # idx=`ip link show v0 | grep mtu| awk -F: '{print $1}'` # kvm -net nic,macaddr=da:4e:17:88:42:b1 -net tap,fd=3 -hda /home/asias/qemu-stuff/sid.img 3/dev/tap${idx} I found that guest can access other hosts on the LAN except the host where guest lives, and host where guest lives can not access guest. My question is: Does macvtap support host(hypervisor host) to guest communication? -- Best Regards, Asias He -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm tools: Add robust error handling for fork/waitpid()
* Amos Kong kongjian...@gmail.com wrote: + } else if (pid 0) { + while(waitpid(pid, status, 0) != pid) { } } Doesn't that look like an infinite loop when waitpid() returns an error? Thanks, Ingo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Does macvtap support host to guest communication?
On Monday 18 April 2011, Asias He wrote: Hi, folks I am trying to use qemu/qemu-kvm with macvtap using following commands: # ip link add link eth0 name v0 type macvtap mode {vepa,bridge,private} # ip link set v0 address da:4e:17:88:42:b1 up # idx=`ip link show v0 | grep mtu| awk -F: '{print $1}'` # kvm -net nic,macaddr=da:4e:17:88:42:b1 -net tap,fd=3 -hda /home/asias/qemu-stuff/sid.img 3/dev/tap${idx} I found that guest can access other hosts on the LAN except the host where guest lives, and host where guest lives can not access guest. My question is: Does macvtap support host(hypervisor host) to guest communication? You can communicate between macvtap and macvlan devices when they are in bridge mode, but these devices cannot communicate with clients that run on the underlying device. Just add a macvlan device to your hardware interface and use that in the host instead of running on the low-level device directly. The other option is to use a vepa enabled bridge, but these are relatively rare. Arnd -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/6] KVM: X86: Make tsc_delta calculation a function of guest tsc
On Sat, Apr 16, 2011 at 12:09:17PM -0400, Jan Kiszka wrote: This patch appears to cause troubles to Linux guests on TSC clocksource and APIC highres timer. The first boot after qemu start is always fine, but after a reboot the guest timer appears to fire incorrectly or even not at all. Hi Jan, can you please test if this patch fixes the problem? diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bd57639..d7282ef 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -391,7 +391,6 @@ struct kvm_vcpu_arch { unsigned int hw_tsc_khz; unsigned int time_offset; struct page *time_page; - u64 last_host_tsc; u64 last_guest_tsc; u64 last_kernel_ns; u64 last_tsc_nsec; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1d5a7f4..d8277aa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2146,7 +2146,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvm_x86_ops-vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); - vcpu-arch.last_host_tsc = native_read_tsc(); + kvm_get_msr(vcpu, MSR_IA32_TSC, vcpu-arch.last_guest_tsc); } static int is_efer_nx(void) -- AMD Operating System Research Center Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach General Managers: Alberto Bozzo, Andrew Bowd Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] acpi_piix4: remove bad save/restore of cpus_sts
Introduced by a mismerge and segfaults immediately. Still need proper save/restore of this field. Signed-off-by: Avi Kivity a...@redhat.com --- hw/acpi.h |1 - hw/acpi_piix4.c |1 - 2 files changed, 0 insertions(+), 2 deletions(-) diff --git a/hw/acpi.h b/hw/acpi.h index 96f1d5f..c141e65 100644 --- a/hw/acpi.h +++ b/hw/acpi.h @@ -133,7 +133,6 @@ struct ACPIGPE { uint8_t *sts; uint8_t *en; -uint8_t *cpu_sts; }; typedef struct ACPIGPE ACPIGPE; diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c index 9dc6f43..b582302 100644 --- a/hw/acpi_piix4.c +++ b/hw/acpi_piix4.c @@ -235,7 +235,6 @@ static const VMStateDescription vmstate_gpe = { .fields = (VMStateField []) { VMSTATE_GPE_ARRAY(sts, ACPIGPE), VMSTATE_GPE_ARRAY(en, ACPIGPE), -VMSTATE_UINT8_ARRAY(cpus_sts, struct gpe_regs, 32), VMSTATE_END_OF_LIST() } }; -- 1.7.4.3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/6] KVM: X86: Make tsc_delta calculation a function of guest tsc
On 2011-04-18 10:03, Roedel, Joerg wrote: On Sat, Apr 16, 2011 at 12:09:17PM -0400, Jan Kiszka wrote: This patch appears to cause troubles to Linux guests on TSC clocksource and APIC highres timer. The first boot after qemu start is always fine, but after a reboot the guest timer appears to fire incorrectly or even not at all. Hi Jan, can you please test if this patch fixes the problem? diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bd57639..d7282ef 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -391,7 +391,6 @@ struct kvm_vcpu_arch { unsigned int hw_tsc_khz; unsigned int time_offset; struct page *time_page; - u64 last_host_tsc; u64 last_guest_tsc; u64 last_kernel_ns; u64 last_tsc_nsec; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1d5a7f4..d8277aa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2146,7 +2146,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvm_x86_ops-vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); - vcpu-arch.last_host_tsc = native_read_tsc(); + kvm_get_msr(vcpu, MSR_IA32_TSC, vcpu-arch.last_guest_tsc); } static int is_efer_nx(void) Looks good, behaviour is now fine also after reboot. Thanks, Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/6] KVM: X86: Make tsc_delta calculation a function of guest tsc
On Mon, Apr 18, 2011 at 11:12:56AM +0200, Jan Kiszka wrote: Looks good, behaviour is now fine also after reboot. Thanks for testing, I send it out together with another fix today. Joerg -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/4] qemu-kvm: Sort out upstream merge regressions
Recent merge with upstream left some corners of qemu-kvm broken behind. This series addresses those I've spotted based on my merge experiments in the past months. Jan Kiszka (4): qemu-kvm: Map run_on_cpu on qemu-kvm variant qemu-kvm: Synchronize states before reset qemu-kvm: Process async MCE events in main loop qemu-kvm: Unbreak guest debugging kvm.h|4 qemu-kvm-x86.c | 41 - qemu-kvm.c | 47 ++- target-i386/helper.c |3 +++ target-i386/kvm.c|2 +- 5 files changed, 58 insertions(+), 39 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/4] qemu-kvm: Map run_on_cpu on qemu-kvm variant
Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- target-i386/helper.c |3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/target-i386/helper.c b/target-i386/helper.c index c859846..3fdbe68 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -33,6 +33,9 @@ #endif #include qemu-kvm.h +#ifndef OBSOLETE_KVM_IMPL +#define run_on_cpu on_vcpu +#endif /* !OBSOLETE_KVM_IMPL */ //#define DEBUG_MMU -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/4] qemu-kvm: Unbreak guest debugging
Align handling of guest debug exits to upstream by moving it completely to the arch section. This patch accounts for changed return codes in the exit handling code. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- kvm.h |2 -- qemu-kvm-x86.c| 14 +- qemu-kvm.c| 36 ++-- target-i386/kvm.c |2 +- 4 files changed, 20 insertions(+), 34 deletions(-) diff --git a/kvm.h b/kvm.h index bda6ad7..b890b5d 100644 --- a/kvm.h +++ b/kvm.h @@ -147,8 +147,6 @@ QTAILQ_HEAD(kvm_sw_breakpoint_head, kvm_sw_breakpoint); struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env, target_ulong pc); -int kvm_handle_debug(struct kvm_debug_exit_arch *arch_info); - int kvm_sw_breakpoints_active(CPUState *env); int kvm_arch_insert_sw_breakpoint(CPUState *current_env, diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 96d2fa6..d37f217 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -183,6 +183,8 @@ int kvm_enable_vapic(CPUState *env, uint64_t vapic) #endif +extern CPUState *kvm_debug_cpu_requested; + int kvm_arch_run(CPUState *env) { int r = 0; @@ -198,8 +200,18 @@ int kvm_arch_run(CPUState *env) r = kvm_handle_tpr_access(env); break; #endif +#ifdef KVM_CAP_SET_GUEST_DEBUG +case KVM_EXIT_DEBUG: +DPRINTF(kvm_exit_debug\n); +r = kvm_handle_debug(run-debug.arch); +if (r == EXCP_DEBUG) { +kvm_debug_cpu_requested = env; +env-stopped = 1; +} +break; +#endif /* KVM_CAP_SET_GUEST_DEBUG */ default: -r = 1; +r = -1; break; } diff --git a/qemu-kvm.c b/qemu-kvm.c index d7d50f5..7689225 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -74,7 +74,7 @@ static int qemu_system_ready; pthread_t io_thread; static int io_thread_sigfd = -1; -static CPUState *kvm_debug_cpu_requested; +CPUState *kvm_debug_cpu_requested; #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT /* The list of ioperm_data */ @@ -86,20 +86,6 @@ static QLIST_HEAD(, ioperm_data) ioperm_head; int kvm_abi = EXPECTED_KVM_API_VERSION; int kvm_page_size; -#ifdef KVM_CAP_SET_GUEST_DEBUG -static int kvm_debug(CPUState *env, - struct kvm_debug_exit_arch *arch_info) -{ -int handle = kvm_handle_debug(arch_info); - -if (handle) { -kvm_debug_cpu_requested = env; -env-stopped = 1; -} -return handle; -} -#endif - static int handle_unhandled(uint64_t reason) { fprintf(stderr, kvm: unhandled exit % PRIx64 \n, reason); @@ -453,17 +439,6 @@ int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip) #endif -static int handle_debug(CPUState *env) -{ -#ifdef KVM_CAP_SET_GUEST_DEBUG -struct kvm_run *run = env-kvm_run; - -return kvm_debug(env, run-debug.arch); -#else -return 0; -#endif -} - int kvm_get_regs(CPUState *env, struct kvm_regs *regs) { return kvm_vcpu_ioctl(env, KVM_GET_REGS, regs); @@ -623,9 +598,6 @@ int kvm_run(CPUState *env) run-io.count); r = 0; break; -case KVM_EXIT_DEBUG: -r = handle_debug(env); -break; case KVM_EXIT_MMIO: r = handle_mmio(env); break; @@ -649,11 +621,15 @@ int kvm_run(CPUState *env) r = kvm_handle_internal_error(env, run); break; default: -if (kvm_arch_run(env)) { +r = kvm_arch_run(env); +if (r 0) { fprintf(stderr, unhandled vm exit: 0x%x\n, run-exit_reason); kvm_show_regs(env); abort(); } +if (r 0) { +return r; +} break; } } diff --git a/target-i386/kvm.c b/target-i386/kvm.c index be6a356..a3587ed 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1768,7 +1768,7 @@ void kvm_arch_remove_all_hw_breakpoints(void) static CPUWatchpoint hw_watchpoint; -int kvm_handle_debug(struct kvm_debug_exit_arch *arch_info) +static int kvm_handle_debug(struct kvm_debug_exit_arch *arch_info) { int ret = 0; int n; -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/4] qemu-kvm: Synchronize states before reset
This aligns qemu-kvm with upstream commit a7ada1510c. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- qemu-kvm.c |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/qemu-kvm.c b/qemu-kvm.c index 39974be..8781cde 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -18,6 +18,7 @@ #include compatfd.h #include gdbstub.h #include monitor.h +#include cpus.h #include qemu-kvm.h #include libkvm.h @@ -1396,6 +1397,7 @@ static void qemu_kvm_system_reset(void) { pause_all_threads(); +cpu_synchronize_all_states(); qemu_system_reset(); resume_all_threads(); -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/4] qemu-kvm: Process async MCE events in main loop
This aligns qemu-kvm with upstream commit ab443475c9. Namely, we were missing a call to an equivalent of kvm_arch_process_async_events from the main loop. This adds a stripped down version of upstream's process_async_events until we switch over. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- kvm.h |2 -- qemu-kvm-x86.c | 27 +++ qemu-kvm.c |9 ++--- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/kvm.h b/kvm.h index 90c4e48..bda6ad7 100644 --- a/kvm.h +++ b/kvm.h @@ -110,9 +110,7 @@ void kvm_arch_post_run(CPUState *env, struct kvm_run *run); int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run); -#ifdef OBSOLETE_KVM_IMPL int kvm_arch_process_async_events(CPUState *env); -#endif int kvm_arch_get_registers(CPUState *env); diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 20093fc..96d2fa6 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -811,3 +811,30 @@ void kvm_arch_process_irqchip_events(CPUState *env) do_cpu_sipi(env); } } + +int kvm_arch_process_async_events(CPUState *env) +{ +if (env-interrupt_request CPU_INTERRUPT_MCE) { +/* We must not raise CPU_INTERRUPT_MCE if it's not supported. */ +assert(env-mcg_cap); + +env-interrupt_request = ~CPU_INTERRUPT_MCE; + +kvm_cpu_synchronize_state(env); + +if (env-exception_injected == EXCP08_DBLE) { +/* this means triple fault */ +qemu_system_reset_request(); +env-exit_request = 1; +return 0; +} +env-exception_injected = EXCP12_MCHK; +env-has_error_code = 0; + +env-halted = 0; +if (kvm_irqchip_in_kernel() env-mp_state == KVM_MP_STATE_HALTED) { +env-mp_state = KVM_MP_STATE_RUNNABLE; +} +} +return 0; +} diff --git a/qemu-kvm.c b/qemu-kvm.c index 8781cde..d7d50f5 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -1414,9 +1414,12 @@ static int kvm_main_loop_cpu(CPUState *env) { while (1) { int run_cpu = !kvm_cpu_is_stopped(env); -if (run_cpu !kvm_irqchip_in_kernel()) { -process_irqchip_events(env); -run_cpu = !env-halted; +if (run_cpu) { +kvm_arch_process_async_events(env); +if (!kvm_irqchip_in_kernel()) { +process_irqchip_events(env); +run_cpu = !env-halted; +} } if (run_cpu) { kvm_cpu_exec(env); -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/2] Some KVM fixes
Hi, these two patches fix one issue introduced with the recent emulator-intercept code (the issue was there before too, but hidden by other workaround code which was removed in the mentioned patch-set). The second patch fixes a problem introduced with the tsc-scaling patch-set where the TSC was not usable anymore after a guest-reboot. All-in-all, these fixes are no -stable material. Regards, Joerg Diffstat: arch/x86/include/asm/kvm_host.h |1 - arch/x86/kvm/svm.c |3 +++ arch/x86/kvm/x86.c |2 +- 3 files changed, 4 insertions(+), 2 deletions(-) Shortlog: Joerg Roedel (2): KVM: SVM: Fix nested sel_cr0 intercept path with decode-assists KVM: X86: Update last_guest_tsc in vcpu_put -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] KVM: SVM: Fix nested sel_cr0 intercept path with decode-assists
This patch fixes a bug in the nested-svm path when decode-assists is available on the machine. After a selective-cr0 intercept is detected the rip is advanced unconditionally. This causes the l1-guest to continue running with an l2-rip. This bug was with the sel_cr0 unit-test on decode-assists capable hardware. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/kvm/svm.c |3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a6bf2ad..de4bba9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2799,6 +2799,9 @@ static int cr_interception(struct vcpu_svm *svm) case 0: if (!check_selective_cr0_intercepted(svm, val)) err = kvm_set_cr0(svm-vcpu, val); + else + return 1; + break; case 3: err = kvm_set_cr3(svm-vcpu, val); -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/2] KVM: X86: Update last_guest_tsc in vcpu_put
The last_guest_tsc is used in vcpu_load to adjust the tsc_offset since tsc-scaling is merged. So the last_guest_tsc needs to be updated in vcpu_put instead of the the last_host_tsc. This is fixed with this patch. Reported-by: Jan Kiszka jan.kis...@web.de Tested-by: Jan Kiszka jan.kis...@siemens.com Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/include/asm/kvm_host.h |1 - arch/x86/kvm/x86.c |2 +- 2 files changed, 1 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bd57639..d7282ef 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -391,7 +391,6 @@ struct kvm_vcpu_arch { unsigned int hw_tsc_khz; unsigned int time_offset; struct page *time_page; - u64 last_host_tsc; u64 last_guest_tsc; u64 last_kernel_ns; u64 last_tsc_nsec; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1d5a7f4..d8277aa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2146,7 +2146,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvm_x86_ops-vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); - vcpu-arch.last_host_tsc = native_read_tsc(); + kvm_get_msr(vcpu, MSR_IA32_TSC, vcpu-arch.last_guest_tsc); } static int is_efer_nx(void) -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
KVM call agenda for April 19th
Please, send in any agenda items you are interested in covering. Later, Juan. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Enhancing qemu-img convert format compatibility
qemu-img is a pretty good Rosetta stone for image formats but it is missing support some format versions. In order to bring qemu-img up-to-date with the latest disk image formats we will need to find specific image files and/or software versions that produce image files that qemu-img cannot understand today. If you have image files that qemu-img is unable to manipulate, please respond with details of the software and version used to produce the image. If possible please include a link to a small example image file. Stefan -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Does macvtap support host to guest communication?
On 04/18/2011 02:58 PM, Arnd Bergmann wrote: On Monday 18 April 2011, Asias He wrote: Hi, folks I am trying to use qemu/qemu-kvm with macvtap using following commands: # ip link add link eth0 name v0 type macvtap mode {vepa,bridge,private} # ip link set v0 address da:4e:17:88:42:b1 up # idx=`ip link show v0 | grep mtu| awk -F: '{print $1}'` # kvm -net nic,macaddr=da:4e:17:88:42:b1 -net tap,fd=3 -hda /home/asias/qemu-stuff/sid.img 3/dev/tap${idx} I found that guest can access other hosts on the LAN except the host where guest lives, and host where guest lives can not access guest. My question is: Does macvtap support host(hypervisor host) to guest communication? You can communicate between macvtap and macvlan devices when they are in bridge mode, but these devices cannot communicate with clients that run on the underlying device. Just add a macvlan device to your hardware interface and use that in the host instead of running on the low-level device directly. Hi, Arnd Works for me now. Thanks! Here are some thoughts on the macvtap: (1) Is it possible to add an interface to macvtap like /dev/net/tun, eg, /dev/net/macvtap. Currently, it is hard to use macvtap programmatically. (2) Adding another macvlan device(e.g., macvlan0) to the hardware interface(e.g., eth0) and using it as the old eth0 make the process of using macvtap complicate. One has to reconfigure the network. This is not optimal from the user perspective. Is it possible to leave the low-level device as is when using the macvtap device? The other option is to use a vepa enabled bridge, but these are relatively rare. Arnd -- Best Regards, Asias He -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Does macvtap support host to guest communication?
On Monday 18 April 2011, Asias He wrote: (1) Is it possible to add an interface to macvtap like /dev/net/tun, eg, /dev/net/macvtap. Currently, it is hard to use macvtap programmatically. I decided against having a multiplexor device because it makes permission handling rather hard. One chardev per network interface makes it possible to handle permissions in multiuser setups. (2) Adding another macvlan device(e.g., macvlan0) to the hardware interface(e.g., eth0) and using it as the old eth0 make the process of using macvtap complicate. One has to reconfigure the network. This is not optimal from the user perspective. Is it possible to leave the low-level device as is when using the macvtap device? Only in VEPA mode. Note that a similar restriction applies when using the bridge device, for the same technical reasons. Arnd -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] Enhancing qemu-img convert format compatibility
On Mon, Apr 18, 2011 at 11:18:42AM +0100, Stefan Hajnoczi wrote: qemu-img is a pretty good Rosetta stone for image formats but it is missing support some format versions. In order to bring qemu-img up-to-date with the latest disk image formats we will need to find specific image files and/or software versions that produce image files that qemu-img cannot understand today. If you have image files that qemu-img is unable to manipulate, please respond with details of the software and version used to produce the image. If possible please include a link to a small example image file. Stefan, We found that using the vSphere 4.x Export to OVF option would produce a VMDK file that qemu-img could not convert to raw. For older qemu-img the file would be converted to something that was not all zeroes, but nevertheless was certainly not a raw disk image. For current qemu-img, we get an Operation not permitted error which is at least better than silent corruption. Full details are in this bug report: https://bugzilla.redhat.com/show_bug.cgi?id=548723 Note the links at the top of that bug are broken. The disk image which failed is: http://oirase.annexia.org/tmp/TestLinux-disk1.vmdk SHA1: 2c81bae89210b075acc51da9d025935470149d55 http://oirase.annexia.org/tmp/TestLinux.ovf SHA1: 30831689b8c6f1b1a1fcbb728769b5f71056a580 Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones virt-df lists disk usage of guests without needing to install any software inside the virtual machine. Supports Linux and Windows. http://et.redhat.com/~rjones/virt-df/ -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] Enhancing qemu-img convert format compatibility
On Mon, Apr 18, 2011 at 12:03 PM, Richard W.M. Jones rjo...@redhat.com wrote: On Mon, Apr 18, 2011 at 11:18:42AM +0100, Stefan Hajnoczi wrote: qemu-img is a pretty good Rosetta stone for image formats but it is missing support some format versions. In order to bring qemu-img up-to-date with the latest disk image formats we will need to find specific image files and/or software versions that produce image files that qemu-img cannot understand today. If you have image files that qemu-img is unable to manipulate, please respond with details of the software and version used to produce the image. If possible please include a link to a small example image file. Stefan, We found that using the vSphere 4.x Export to OVF option would produce a VMDK file that qemu-img could not convert to raw. Excellent, thanks for sharing this. I hope we can build a picture of where there is missing support and address this in the Improved Image Format Compatiblity project for Google Summer of Code: http://wiki.qemu.org/Google_Summer_of_Code_2011#Improved_image_format_compatibility Stefan -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Does macvtap support host to guest communication?
* Arnd Bergmann a...@arndb.de wrote: On Monday 18 April 2011, Asias He wrote: (1) Is it possible to add an interface to macvtap like /dev/net/tun, eg, /dev/net/macvtap. Currently, it is hard to use macvtap programmatically. I decided against having a multiplexor device because it makes permission handling rather hard. One chardev per network interface makes it possible to handle permissions in multiuser setups. (2) Adding another macvlan device(e.g., macvlan0) to the hardware interface(e.g., eth0) and using it as the old eth0 make the process of using macvtap complicate. One has to reconfigure the network. This is not optimal from the user perspective. Is it possible to leave the low-level device as is when using the macvtap device? Only in VEPA mode. Note that a similar restriction applies when using the bridge device, for the same technical reasons. Just to sum things up, our goal is to allow the tools/kvm/ unprivileged tool to provide TCP connectivity to Linux guests transparently, with the following parameters: - the kvm tool runs unprivileged - as ordinary user - without having to configure much (preferably zero configuration: without having to configure anything) on the guest Linux side - multiple guests should just work without interfering with each other - the kvm tool wants to be stateless - i.e. it does not want to allocate or manage host side devices - it just wants to provide the kind of TCP/IP connectivity host unprivileged user-space has, to the guest. The tool wants to be a generic tool with no global state, not a daemon. So it wants to be a stateless, unprivileged and zero-configuration solution. Is this possible with macvtap, and if yes, what kind of macvtap mode and usage would you recommend for that goal? Thanks, Ingo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/4] kvm tools: Thread virtio-blk
Add I/O thread to handle I/O operations in virtio-blk. There is currently support for multiple virtio queues but the kernel side supports only one virtio queue. It's not too much of a performance impact and the ABI does support multiple queues there - So I've prefered to do it like that to keep it flexible. I/O performance itself doesn't increase much due to the patch, what changes is system responsiveness during I/O operations. On an unthreaded system, The VCPU is frozen up until the I/O request is complete. On the other hand, On a threaded system the VCPU is free to do other work or queue more I/O while waiting for the original I/O request to complete. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/virtio-blk.c | 61 1 files changed, 56 insertions(+), 5 deletions(-) diff --git a/tools/kvm/virtio-blk.c b/tools/kvm/virtio-blk.c index 124ce95..029f753 100644 --- a/tools/kvm/virtio-blk.c +++ b/tools/kvm/virtio-blk.c @@ -30,9 +30,13 @@ struct blk_device { uint32_tguest_features; uint16_tconfig_vector; uint8_t status; + pthread_t io_thread; + pthread_mutex_t io_mutex; + pthread_cond_t io_cond; /* virtio queue */ uint16_tqueue_selector; + uint64_tvirtio_blk_queue_set_flags; struct virt_queue vqs[NUM_VIRT_QUEUES]; }; @@ -52,6 +56,9 @@ static struct blk_device blk_device = { * same applies to VIRTIO_BLK_F_BLK_SIZE */ .host_features = (1UL VIRTIO_BLK_F_SEG_MAX), + + .io_mutex = PTHREAD_MUTEX_INITIALIZER, + .io_cond= PTHREAD_COND_INITIALIZER }; static bool virtio_blk_pci_io_device_specific_in(void *data, unsigned long offset, int size, uint32_t count) @@ -148,15 +155,57 @@ static bool virtio_blk_do_io_request(struct kvm *self, struct virt_queue *queue) return true; } -static void virtio_blk_handle_callback(struct kvm *self, uint16_t queue_index) + + +static int virtio_blk_get_selected_queue(void) { - struct virt_queue *vq = blk_device.vqs[queue_index]; + int i; - while (virt_queue__available(vq)) - virtio_blk_do_io_request(self, vq); + for (i = 0 ; i NUM_VIRT_QUEUES ; i++) { + if (blk_device.virtio_blk_queue_set_flags (1 i)) { + blk_device.virtio_blk_queue_set_flags = ~(1 i); + return i; + } + } - kvm__irq_line(self, VIRTIO_BLK_IRQ, 1); + return -1; +} +static void *virtio_blk_io_thread(void *ptr) +{ + struct kvm *self = ptr; + int ret; + mutex_lock(blk_device.io_mutex); + ret = pthread_cond_wait(blk_device.io_cond, blk_device.io_mutex); + while (ret == 0) { + int queue_index = virtio_blk_get_selected_queue(); + mutex_unlock(blk_device.io_mutex); + while (queue_index = 0) { + struct virt_queue *vq = blk_device.vqs[queue_index]; + + while (virt_queue__available(vq)) + virtio_blk_do_io_request(self, vq); + + kvm__irq_line(self, VIRTIO_BLK_IRQ, 1); + + mutex_lock(blk_device.io_mutex); + queue_index = virtio_blk_get_selected_queue(); + mutex_unlock(blk_device.io_mutex); + } + mutex_lock(blk_device.io_mutex); + ret = pthread_cond_wait((blk_device.io_cond), (blk_device.io_mutex)); + } + + return NULL; +} + +static void virtio_blk_handle_callback(struct kvm *self, uint16_t queue_index) +{ + pthread_mutex_lock((blk_device.io_mutex)); + blk_device.virtio_blk_queue_set_flags |= (1 queue_index); + pthread_mutex_unlock((blk_device.io_mutex)); + + pthread_cond_signal((blk_device.io_cond)); } static bool virtio_blk_pci_io_out(struct kvm *self, uint16_t port, void *data, int size, uint32_t count) @@ -242,6 +291,8 @@ void virtio_blk__init(struct kvm *self) if (!self-disk_image) return; + pthread_create(blk_device.io_thread, NULL, virtio_blk_io_thread, self); + blk_device.blk_config.capacity = self-disk_image-size / SECTOR_SIZE; pci__register(virtio_blk_pci_device, PCI_VIRTIO_BLK_DEVNUM); -- 1.7.5.rc1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/4] kvm tools: Use virtio_blk_parameters to configure virtio-blk
Like in virtio-net, use virtio_blk_parameters to pass configuration parameters Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/virtio-blk.h |6 +- tools/kvm/kvm-run.c|7 ++- tools/kvm/virtio-blk.c |4 +++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/kvm/include/kvm/virtio-blk.h b/tools/kvm/include/kvm/virtio-blk.h index f82bbc7..9e91035 100644 --- a/tools/kvm/include/kvm/virtio-blk.h +++ b/tools/kvm/include/kvm/virtio-blk.h @@ -3,6 +3,10 @@ struct kvm; -void virtio_blk__init(struct kvm *self); +struct virtio_blk_parameters { + struct kvm *self; +}; + +void virtio_blk__init(struct virtio_blk_parameters *params); #endif /* KVM__BLK_VIRTIO_H */ diff --git a/tools/kvm/kvm-run.c b/tools/kvm/kvm-run.c index 17fef20..5b71fb4 100644 --- a/tools/kvm/kvm-run.c +++ b/tools/kvm/kvm-run.c @@ -220,6 +220,7 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) int exit_code = 0; int i; struct virtio_net_parameters net_params; + struct virtio_blk_parameters blk_params; signal(SIGALRM, handle_sigalrm); signal(SIGQUIT, handle_sigquit); @@ -317,7 +318,11 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) pci__init(); - virtio_blk__init(kvm); + blk_params = (struct virtio_blk_parameters) { + .self = kvm + }; + + virtio_blk__init(blk_params); virtio_console__init(kvm); diff --git a/tools/kvm/virtio-blk.c b/tools/kvm/virtio-blk.c index 029f753..2470583 100644 --- a/tools/kvm/virtio-blk.c +++ b/tools/kvm/virtio-blk.c @@ -286,8 +286,10 @@ static struct pci_device_header virtio_blk_pci_device = { #define PCI_VIRTIO_BLK_DEVNUM 1 -void virtio_blk__init(struct kvm *self) +void virtio_blk__init(struct virtio_blk_parameters *params) { + struct kvm *self = params-self; + if (!self-disk_image) return; -- 1.7.5.rc1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/4] kvm tools: Add debug feature to test the IO thread
Add --debug-io-delay-cycles and --debug-io-delay-amount to delay the completion of IO requests within virtio-blk. This feature allows to verify and debug the threading within virtio-blk. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/include/kvm/virtio-blk.h |6 +- tools/kvm/kvm-run.c| 10 +- tools/kvm/virtio-blk.c | 11 +++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/tools/kvm/include/kvm/virtio-blk.h b/tools/kvm/include/kvm/virtio-blk.h index 9e91035..c0211a0 100644 --- a/tools/kvm/include/kvm/virtio-blk.h +++ b/tools/kvm/include/kvm/virtio-blk.h @@ -1,10 +1,14 @@ #ifndef KVM__BLK_VIRTIO_H #define KVM__BLK_VIRTIO_H +#include stdint.h + struct kvm; struct virtio_blk_parameters { - struct kvm *self; + struct kvm *self; + uint64_tdebug_delay_cycles; + uint64_tdebug_delay_amount; }; void virtio_blk__init(struct virtio_blk_parameters *params); diff --git a/tools/kvm/kvm-run.c b/tools/kvm/kvm-run.c index 5b71fb4..3392bfa 100644 --- a/tools/kvm/kvm-run.c +++ b/tools/kvm/kvm-run.c @@ -57,6 +57,8 @@ static void handle_sigalrm(int sig) } static u64 ram_size = MIN_RAM_SIZE_MB; +static u64 virtio_blk_delay_cycles = -1; +static u64 virtio_blk_delay_amount; static const char *kernel_cmdline; static const char *kernel_filename; static const char *initrd_filename; @@ -112,6 +114,10 @@ static const struct option options[] = { Enable single stepping), OPT_BOOLEAN('g', ioport-debug, ioport_debug, Enable ioport debugging), + OPT_U64('\0', debug-io-delay-cycles, virtio_blk_delay_cycles, + Wait this amount of cycles before delay), + OPT_U64('\0', debug-io-delay-amount, virtio_blk_delay_amount, + Delay each I/O request by this amount (usec)), OPT_END() }; @@ -319,7 +325,9 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) pci__init(); blk_params = (struct virtio_blk_parameters) { - .self = kvm + .self = kvm, + .debug_delay_cycles = virtio_blk_delay_cycles, + .debug_delay_amount = virtio_blk_delay_amount }; virtio_blk__init(blk_params); diff --git a/tools/kvm/virtio-blk.c b/tools/kvm/virtio-blk.c index 2470583..ea8c4e7 100644 --- a/tools/kvm/virtio-blk.c +++ b/tools/kvm/virtio-blk.c @@ -38,6 +38,9 @@ struct blk_device { uint16_tqueue_selector; uint64_tvirtio_blk_queue_set_flags; + uint64_tdebug_delay_cycles; + uint64_tdebug_delay_amount; + struct virt_queue vqs[NUM_VIRT_QUEUES]; }; @@ -174,6 +177,7 @@ static int virtio_blk_get_selected_queue(void) static void *virtio_blk_io_thread(void *ptr) { struct kvm *self = ptr; + uint64_t io_cycles = 0; int ret; mutex_lock(blk_device.io_mutex); ret = pthread_cond_wait(blk_device.io_cond, blk_device.io_mutex); @@ -183,6 +187,10 @@ static void *virtio_blk_io_thread(void *ptr) while (queue_index = 0) { struct virt_queue *vq = blk_device.vqs[queue_index]; + if (blk_device.debug_delay_cycles != (uint64_t)-1 + ++io_cycles blk_device.debug_delay_cycles) + usleep(blk_device.debug_delay_amount); + while (virt_queue__available(vq)) virtio_blk_do_io_request(self, vq); @@ -293,6 +301,9 @@ void virtio_blk__init(struct virtio_blk_parameters *params) if (!self-disk_image) return; + blk_device.debug_delay_amount = params-debug_delay_amount; + blk_device.debug_delay_cycles = params-debug_delay_cycles; + pthread_create(blk_device.io_thread, NULL, virtio_blk_io_thread, self); blk_device.blk_config.capacity = self-disk_image-size / SECTOR_SIZE; -- 1.7.5.rc1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/4] kvm tools: Complete missing segments in a iov op using regular op
If any of the iov operations return mid-block, use regular ops to complete the current block and continue using iov ops. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/read-write.c | 58 ++- 1 files changed, 51 insertions(+), 7 deletions(-) diff --git a/tools/kvm/read-write.c b/tools/kvm/read-write.c index 0c995c8..bf2e4a0 100644 --- a/tools/kvm/read-write.c +++ b/tools/kvm/read-write.c @@ -189,10 +189,10 @@ static inline ssize_t get_iov_size(const struct iovec *iov, int iovcnt) } static inline void shift_iovec(const struct iovec **iov, int *iovcnt, - size_t nr, ssize_t *total, size_t *count, off_t *offset) + ssize_t *nr, ssize_t *total, size_t *count, off_t *offset) { - while (nr = (*iov)-iov_len) { - nr -= (*iov)-iov_len; + while ((size_t)*nr = (*iov)-iov_len) { + *nr -= (*iov)-iov_len; *total += (*iov)-iov_len; *count -= (*iov)-iov_len; if (offset) @@ -218,7 +218,18 @@ ssize_t readv_in_full(int fd, const struct iovec *iov, int iovcnt) return -1; } - shift_iovec(iov, iovcnt, nr, total, count, NULL); + shift_iovec(iov, iovcnt, nr, total, count, NULL); + + while (nr 0) { + ssize_t nr_readagain; + nr_readagain = xread(fd, iov-iov_base + nr, + iov-iov_len - nr); + if (nr_readagain = 0) + return total; + + nr += nr_readagain; + shift_iovec(iov, iovcnt, nr, total, count, NULL); + } } return total; @@ -240,7 +251,18 @@ ssize_t writev_in_full(int fd, const struct iovec *iov, int iovcnt) return -1; } - shift_iovec(iov, iovcnt, nr, total, count, NULL); + shift_iovec(iov, iovcnt, nr, total, count, NULL); + + while (nr 0) { + ssize_t nr_writeagain; + nr_writeagain = xwrite(fd, iov-iov_base + nr, + iov-iov_len - nr); + if (nr_writeagain = 0) + return total; + + nr += nr_writeagain; + shift_iovec(iov, iovcnt, nr, total, count, NULL); + } } return total; @@ -288,7 +310,18 @@ ssize_t preadv_in_full(int fd, const struct iovec *iov, int iovcnt, off_t offset return -1; } - shift_iovec(iov, iovcnt, nr, total, count, offset); + shift_iovec(iov, iovcnt, nr, total, count, offset); + + while (nr 0) { + ssize_t nr_readagain; + nr_readagain = xpread(fd, iov-iov_base + nr, + iov-iov_len - nr, offset + nr); + if (nr_readagain = 0) + return total; + + nr += nr_readagain; + shift_iovec(iov, iovcnt, nr, total, count, offset); + } } return total; @@ -310,7 +343,18 @@ ssize_t pwritev_in_full(int fd, const struct iovec *iov, int iovcnt, off_t offse return -1; } - shift_iovec(iov, iovcnt, nr, total, count, offset); + shift_iovec(iov, iovcnt, nr, total, count, offset); + + while (nr 0) { + ssize_t nr_writeagain; + nr_writeagain = xpread(fd, iov-iov_base + nr, + iov-iov_len - nr, offset + nr); + if (nr_writeagain = 0) + return total; + + nr += nr_writeagain; + shift_iovec(iov, iovcnt, nr, total, count, offset); + } } return total; -- 1.7.5.rc1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/4 V2] kvm tools: Complete missing segments in a iov op using regular op
If any of the iov operations return mid-block, use regular ops to complete the current block and continue using iov ops. Signed-off-by: Sasha Levin levinsasha...@gmail.com --- tools/kvm/read-write.c | 58 ++- 1 files changed, 51 insertions(+), 7 deletions(-) diff --git a/tools/kvm/read-write.c b/tools/kvm/read-write.c index 0c995c8..bf2e4a0 100644 --- a/tools/kvm/read-write.c +++ b/tools/kvm/read-write.c @@ -189,10 +189,10 @@ static inline ssize_t get_iov_size(const struct iovec *iov, int iovcnt) } static inline void shift_iovec(const struct iovec **iov, int *iovcnt, - size_t nr, ssize_t *total, size_t *count, off_t *offset) + ssize_t *nr, ssize_t *total, size_t *count, off_t *offset) { - while (nr = (*iov)-iov_len) { - nr -= (*iov)-iov_len; + while ((size_t)*nr = (*iov)-iov_len) { + *nr -= (*iov)-iov_len; *total += (*iov)-iov_len; *count -= (*iov)-iov_len; if (offset) @@ -218,7 +218,18 @@ ssize_t readv_in_full(int fd, const struct iovec *iov, int iovcnt) return -1; } - shift_iovec(iov, iovcnt, nr, total, count, NULL); + shift_iovec(iov, iovcnt, nr, total, count, NULL); + + while (nr 0) { + ssize_t nr_readagain; + nr_readagain = xread(fd, iov-iov_base + nr, + iov-iov_len - nr); + if (nr_readagain = 0) + return total; + + nr += nr_readagain; + shift_iovec(iov, iovcnt, nr, total, count, NULL); + } } return total; @@ -240,7 +251,18 @@ ssize_t writev_in_full(int fd, const struct iovec *iov, int iovcnt) return -1; } - shift_iovec(iov, iovcnt, nr, total, count, NULL); + shift_iovec(iov, iovcnt, nr, total, count, NULL); + + while (nr 0) { + ssize_t nr_writeagain; + nr_writeagain = xwrite(fd, iov-iov_base + nr, + iov-iov_len - nr); + if (nr_writeagain = 0) + return total; + + nr += nr_writeagain; + shift_iovec(iov, iovcnt, nr, total, count, NULL); + } } return total; @@ -288,7 +310,18 @@ ssize_t preadv_in_full(int fd, const struct iovec *iov, int iovcnt, off_t offset return -1; } - shift_iovec(iov, iovcnt, nr, total, count, offset); + shift_iovec(iov, iovcnt, nr, total, count, offset); + + while (nr 0) { + ssize_t nr_readagain; + nr_readagain = xpread(fd, iov-iov_base + nr, + iov-iov_len - nr, offset + nr); + if (nr_readagain = 0) + return total; + + nr += nr_readagain; + shift_iovec(iov, iovcnt, nr, total, count, offset); + } } return total; @@ -310,7 +343,18 @@ ssize_t pwritev_in_full(int fd, const struct iovec *iov, int iovcnt, off_t offse return -1; } - shift_iovec(iov, iovcnt, nr, total, count, offset); + shift_iovec(iov, iovcnt, nr, total, count, offset); + + while (nr 0) { + ssize_t nr_writeagain; + nr_writeagain = xpwrite(fd, iov-iov_base + nr, + iov-iov_len - nr, offset + nr); + if (nr_writeagain = 0) + return total; + + nr += nr_writeagain; + shift_iovec(iov, iovcnt, nr, total, count, offset); + } } return total; -- 1.7.5.rc1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Does macvtap support host to guest communication?
On Monday 18 April 2011, Ingo Molnar wrote: Only in VEPA mode. Note that a similar restriction applies when using the bridge device, for the same technical reasons. Just to sum things up, our goal is to allow the tools/kvm/ unprivileged tool to provide TCP connectivity to Linux guests transparently, with the following parameters: - the kvm tool runs unprivileged - as ordinary user - without having to configure much (preferably zero configuration: without having to configure anything) on the guest Linux side - multiple guests should just work without interfering with each other - the kvm tool wants to be stateless - i.e. it does not want to allocate or manage host side devices - it just wants to provide the kind of TCP/IP connectivity host unprivileged user-space has, to the guest. The tool wants to be a generic tool with no global state, not a daemon. So it wants to be a stateless, unprivileged and zero-configuration solution. Is this possible with macvtap, and if yes, what kind of macvtap mode and usage would you recommend for that goal? With the above requirements, I would suggest using something like the the qemu user networking. This is slower and does not allow servers to be present in the guest, but those are not your goal as it seems. The primary goals of macvtap are to allow efficient networking (zero-copy, multi-queue, although we're not completely there yet) and proper security abstractions. If you want a guest to appear on the same network as the host, you can not do that without privileges to manage the host network setup, and I guess that will have to stay that way. Arnd -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Does macvtap support host to guest communication?
On Mon, Apr 18, 2011 at 02:01:14PM +0200, Ingo Molnar wrote: - the kvm tool wants to be stateless - i.e. it does not want to allocate or manage host side devices - it just wants to provide the kind of TCP/IP connectivity host unprivileged user-space has, to the guest. The tool wants to be a generic tool with no global state, not a daemon. So it wants to be a stateless, unprivileged and zero-configuration solution. Is this possible with macvtap, and if yes, what kind of macvtap mode and usage would you recommend for that goal? Well current approaches really all boil down to a network device in guest passing us raw packets. Typically this means the backend deals with raw packets as well. For example, macvtap injects raw packets into host device, That's a priveledged operation. For tap you can setup NAT to reduce the priveledge. This is at the moment far from stateless though. I would love to see something that does NAT but does not need priveledges or host state. qemu uses slirp to do (most of) this in userspace but it just seems silly to maintain yet another networking stack just for this. What do others think? -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] acpi_piix4: remove bad save/restore of cpus_sts
This patch would fix the segfaults. But I suppose the followings are necessary. - PIIX4PMState::gpe_cpu needs to be saved/loaded somewhere - gpe_writeb() needs to handle PROC_BASE ... PROC_BASE+31 like gpe_readb(). To be honest, I don't see why gpe_readb/writeb() are used for PROC_BASE...PROC_BASE + 31 - enable/disable_processor() manipulates gpe-sts pointer. it should be gpe-sts[0] |= PIIX4_CPU_HOTPLUG_STATUS thanks, On Mon, Apr 18, 2011 at 11:44:16AM +0300, Avi Kivity wrote: Introduced by a mismerge and segfaults immediately. Still need proper save/restore of this field. Signed-off-by: Avi Kivity a...@redhat.com --- hw/acpi.h |1 - hw/acpi_piix4.c |1 - 2 files changed, 0 insertions(+), 2 deletions(-) diff --git a/hw/acpi.h b/hw/acpi.h index 96f1d5f..c141e65 100644 --- a/hw/acpi.h +++ b/hw/acpi.h @@ -133,7 +133,6 @@ struct ACPIGPE { uint8_t *sts; uint8_t *en; -uint8_t *cpu_sts; }; typedef struct ACPIGPE ACPIGPE; diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c index 9dc6f43..b582302 100644 --- a/hw/acpi_piix4.c +++ b/hw/acpi_piix4.c @@ -235,7 +235,6 @@ static const VMStateDescription vmstate_gpe = { .fields = (VMStateField []) { VMSTATE_GPE_ARRAY(sts, ACPIGPE), VMSTATE_GPE_ARRAY(en, ACPIGPE), -VMSTATE_UINT8_ARRAY(cpus_sts, struct gpe_regs, 32), VMSTATE_END_OF_LIST() } }; -- 1.7.4.3 -- yamahata -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] acpi_piix4: fix save/load of PIIX4PMState
It's vmstate parameter was wrong. This patch fixes it. Reported-by: Avi Kivity a...@redhat.com Signed-off-by: Isaku Yamahata yamah...@valinux.co.jp --- hw/acpi_piix4.c |3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c index 96f5222..3a8fece 100644 --- a/hw/acpi_piix4.c +++ b/hw/acpi_piix4.c @@ -214,10 +214,9 @@ static int vmstate_acpi_post_load(void *opaque, int version_id) { \ .name = (stringify(_field)), \ .version_id = 0,\ - .num= GPE_LEN, \ .info = vmstate_info_uint16, \ .size = sizeof(uint16_t), \ - .flags = VMS_ARRAY | VMS_POINTER, \ + .flags = VMS_SINGLE | VMS_POINTER, \ .offset = vmstate_offset_pointer(_state, _field, uint8_t), \ } -- 1.7.1.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] acpi_piix4: remove bad save/restore of cpus_sts
On Mon, Apr 18, 2011 at 10:56:58PM +0900, Isaku Yamahata wrote: - enable/disable_processor() manipulates gpe-sts pointer. it should be gpe-sts[0] |= PIIX4_CPU_HOTPLUG_STATUS The last one is wrong. enable/diable_processor() looks correct. -- yamahata -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Does macvtap support host to guest communication?
On 04/18/2011 09:20 PM, Arnd Bergmann wrote: On Monday 18 April 2011, Ingo Molnar wrote: Only in VEPA mode. Note that a similar restriction applies when using the bridge device, for the same technical reasons. Just to sum things up, our goal is to allow the tools/kvm/ unprivileged tool to provide TCP connectivity to Linux guests transparently, with the following parameters: - the kvm tool runs unprivileged - as ordinary user - without having to configure much (preferably zero configuration: without having to configure anything) on the guest Linux side - multiple guests should just work without interfering with each other - the kvm tool wants to be stateless - i.e. it does not want to allocate or manage host side devices - it just wants to provide the kind of TCP/IP connectivity host unprivileged user-space has, to the guest. The tool wants to be a generic tool with no global state, not a daemon. So it wants to be a stateless, unprivileged and zero-configuration solution. Is this possible with macvtap, and if yes, what kind of macvtap mode and usage would you recommend for that goal? With the above requirements, I would suggest using something like the the qemu user networking. This is slower and does not allow servers to be present in the guest, but those are not your goal as it seems. The primary goals of macvtap are to allow efficient networking (zero-copy, multi-queue, although we're not completely there yet) and proper security abstractions. If you want a guest to appear on the same network as the host, you can not do that without privileges to manage the host network setup, and I guess that will have to stay that way. We do need guest appearing on the same network as the host support as well. The reason I am considering using macvatp instead of tap plus brctl is that it simplifies the bridge configuration and it is more efficient. However, IMHO, the interface of macvtap is not user friendly, at least for me. I have no idea about the technical reasons that make the low-level device inaccessible. But if it is accessible, a lot of configuration can be eliminated. I know virtualbox's bridge mode has this kind of restriction, while VMware's bridge mode does not. Arnd -- Best Regards, Asias He -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Does macvtap support host to guest communication?
On Monday 18 April 2011, Asias He wrote: We do need guest appearing on the same network as the host support as well. The reason I am considering using macvatp instead of tap plus brctl is that it simplifies the bridge configuration and it is more efficient. Right, you certainly don't need to consider tap/brctl any more. However, IMHO, the interface of macvtap is not user friendly, at least for me. I have no idea about the technical reasons that make the low-level device inaccessible. But if it is accessible, a lot of configuration can be eliminated. I know virtualbox's bridge mode has this kind of restriction, while VMware's bridge mode does not. The main reason is that having a MAC address scan in the regular networking core would make the common TX case where there is no macvlan device more complex. Macvtap is derived from the plain macvlan driver, which used to support only sending out to the wire until I added the optional bridge mode. If you want a regular device to be able to send to a macvlan port, that would require at least these changes: * Add an option to put a plain device into macvlan-bridge mode * Add support for that option into iproute2 * Add a hook into dev_queue_xmit() to check for macvlan ports Arnd -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Does macvtap support host to guest communication?
On 04/18/2011 11:05 PM, Arnd Bergmann wrote: On Monday 18 April 2011, Asias He wrote: We do need guest appearing on the same network as the host support as well. The reason I am considering using macvatp instead of tap plus brctl is that it simplifies the bridge configuration and it is more efficient. Right, you certainly don't need to consider tap/brctl any more. However, IMHO, the interface of macvtap is not user friendly, at least for me. I have no idea about the technical reasons that make the low-level device inaccessible. But if it is accessible, a lot of configuration can be eliminated. I know virtualbox's bridge mode has this kind of restriction, while VMware's bridge mode does not. The main reason is that having a MAC address scan in the regular networking core would make the common TX case where there is no macvlan device more complex. Macvtap is derived from the plain macvlan driver, which used to support only sending out to the wire until I added the optional bridge mode. If you want a regular device to be able to send to a macvlan port, that would require at least these changes: * Add an option to put a plain device into macvlan-bridge mode * Add support for that option into iproute2 * Add a hook into dev_queue_xmit() to check for macvlan ports Cool! Arnd, mind to add this feature to macvtap? -- Best Regards, Asias He -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[no subject]
-- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2] KVM: emulator: Use linearize() when fetching instructions.
Since segments need to be handled slightly differently when fetching instructions, we add a __linearize helper that accepts a new 'fetch' boolean. Signed-off-by: Nelson Elhage nelh...@ksplice.com --- arch/x86/include/asm/kvm_emulate.h |1 - arch/x86/kvm/emulate.c | 26 ++ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0818448..9b760c8 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -265,7 +265,6 @@ struct x86_emulate_ctxt { unsigned long eip; /* eip before instruction emulation */ /* Emulated execution mode, represented by an X86EMUL_MODE value. */ int mode; - u32 cs_base; /* interruptibility state, as a result of execution of STI or MOV SS */ int interruptibility; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a5f63d4..427d78a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -540,9 +540,9 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, NM_VECTOR, 0, false); } -static int linearize(struct x86_emulate_ctxt *ctxt, +static int __linearize(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, -unsigned size, bool write, +unsigned size, bool write, bool fetch, ulong *linear) { struct decode_cache *c = ctxt-decode; @@ -569,7 +569,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt, if (((desc.type 8) || !(desc.type 2)) write) goto bad; /* unreadable code segment */ - if ((desc.type 8) !(desc.type 2)) + if (!fetch (desc.type 8) !(desc.type 2)) goto bad; lim = desc_limit_scaled(desc); if ((desc.type 8) || !(desc.type 4)) { @@ -602,7 +602,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt, } break; } - if (c-ad_bytes != 8) + if (fetch ? ctxt-mode != X86EMUL_MODE_PROT64 : c-ad_bytes != 8) la = (u32)-1; *linear = la; return X86EMUL_CONTINUE; @@ -613,6 +613,15 @@ bad: return emulate_gp(ctxt, addr.seg); } +static int linearize(struct x86_emulate_ctxt *ctxt, +struct segmented_address addr, +unsigned size, bool write, +ulong *linear) +{ + return __linearize(ctxt, addr, size, write, false, linear); +} + + static int segmented_read_std(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, void *data, @@ -637,11 +646,13 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, int size, cur_size; if (eip == fc-end) { - unsigned long linear = eip + ctxt-cs_base; - if (ctxt-mode != X86EMUL_MODE_PROT64) - linear = (u32)-1; + unsigned long linear; + struct segmented_address addr = {VCPU_SREG_CS, eip}; cur_size = fc-end - fc-start; size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); + rc = __linearize(ctxt, addr, size, false, true, linear); + if (rc != X86EMUL_CONTINUE) + return rc; rc = ops-fetch(linear, fc-data + cur_size, size, ctxt-vcpu, ctxt-exception); if (rc != X86EMUL_CONTINUE) @@ -3154,7 +3165,6 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) c-fetch.end = c-fetch.start + insn_len; if (insn_len 0) memcpy(c-fetch.data, insn, insn_len); - ctxt-cs_base = seg_base(ctxt, ops, VCPU_SREG_CS); switch (mode) { case X86EMUL_MODE_REAL: -- 1.7.4.44.gf9e72 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] acpi_piix4: fix save/load of PIIX4PMState
Isaku Yamahata yamah...@valinux.co.jp wrote: It's vmstate parameter was wrong. This patch fixes it. Reported-by: Avi Kivity a...@redhat.com Signed-off-by: Isaku Yamahata yamah...@valinux.co.jp --- hw/acpi_piix4.c |3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c index 96f5222..3a8fece 100644 --- a/hw/acpi_piix4.c +++ b/hw/acpi_piix4.c @@ -214,10 +214,9 @@ static int vmstate_acpi_post_load(void *opaque, int version_id) { \ .name = (stringify(_field)), \ .version_id = 0,\ - .num= GPE_LEN, \ .info = vmstate_info_uint16, \ .size = sizeof(uint16_t), \ - .flags = VMS_ARRAY | VMS_POINTER, \ + .flags = VMS_SINGLE | VMS_POINTER, \ .offset = vmstate_offset_pointer(_state, _field, uint8_t), \ shouldn't last one still be uint16_t? I guess that on ich9, GPE becomes one array, do you have that code handy somewhere, just to see what you want to do? I think that best thing to do at this point is just to revert this whole patch. We are creating a new type for uint8_t, that becomes a pointer. We are not sending the length of that array, so we need to add a new version/subsection when we add ICH9 anyways. Seeing what you want to do would help me trying to figure out the best vmstate aproach. Thanks, Juan. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/3] KVM: Introduce a helper to check if gfn is in memslot
From: Takuya Yoshikawa yoshikawa.tak...@oss.ntt.co.jp This will be used later. Signed-off-by: Takuya Yoshikawa yoshikawa.tak...@oss.ntt.co.jp --- include/linux/kvm_host.h |5 + virt/kvm/kvm_main.c |6 ++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0bc3d37..9101698 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -612,6 +612,11 @@ static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, return slot-userspace_addr + (gfn - slot-base_gfn) * PAGE_SIZE; } +static inline bool gfn_in_memslot(struct kvm_memory_slot *slot, gfn_t gfn) +{ + return (gfn = slot-base_gfn) (gfn slot-base_gfn + slot-npages); +} + static inline gpa_t gfn_to_gpa(gfn_t gfn) { return (gpa_t)gfn PAGE_SHIFT; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5814645..6df199d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -940,8 +940,7 @@ static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots, for (i = 0; i slots-nmemslots; ++i) { struct kvm_memory_slot *memslot = slots-memslots[i]; - if (gfn = memslot-base_gfn -gfn memslot-base_gfn + memslot-npages) + if (gfn_in_memslot(memslot, gfn)) return memslot; } return NULL; @@ -964,8 +963,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) if (memslot-flags KVM_MEMSLOT_INVALID) continue; - if (gfn = memslot-base_gfn -gfn memslot-base_gfn + memslot-npages) + if (gfn_in_memslot(memslot, gfn)) return 1; } return 0; -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/3] KVM: MMU: Introduce a helper to read guest pte
From: Takuya Yoshikawa yoshikawa.tak...@oss.ntt.co.jp This will be optimized later. Signed-off-by: Takuya Yoshikawa yoshikawa.tak...@oss.ntt.co.jp --- arch/x86/kvm/paging_tmpl.h | 12 +--- 1 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 74f8567..109939a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -109,6 +109,14 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) return access; } +static int FNAME(read_guest_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, +gfn_t table_gfn, int offset, pt_element_t *ptep) +{ + return kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, ptep, + offset, sizeof(*ptep), + PFERR_USER_MASK | PFERR_WRITE_MASK); +} + /* * Fetch a guest pte for a guest virtual address */ @@ -160,9 +168,7 @@ walk: walker-table_gfn[walker-level - 1] = table_gfn; walker-pte_gpa[walker-level - 1] = pte_gpa; - if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, pte, - offset, sizeof(pte), - PFERR_USER_MASK|PFERR_WRITE_MASK)) { + if (FNAME(read_guest_pte)(vcpu, mmu, table_gfn, offset, pte)) { present = false; break; } -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH 3/3] KVM: MMU: Optimize guest page table walk
From: Takuya Yoshikawa yoshikawa.tak...@oss.ntt.co.jp We optimize multi level guest page table walk as follows: 1. We cache the memslot which, probably, includes the next guest page tables to avoid searching for it many times. 2. We use get_user() instead of copy_from_user(). Note that this is kind of a restricted way of Xiao's more generic work: KVM: optimize memslots searching and cache GPN to GFN. With this patch applied, paging64_walk_addr_generic() has improved as the following tracing results show. Before: 3.169 us | paging64_walk_addr_generic(); 1.880 us | paging64_walk_addr_generic(); 1.243 us | paging64_walk_addr_generic(); 1.517 us | paging64_walk_addr_generic(); 3.009 us | paging64_walk_addr_generic(); 1.814 us | paging64_walk_addr_generic(); 1.340 us | paging64_walk_addr_generic(); 1.659 us | paging64_walk_addr_generic(); 1.748 us | paging64_walk_addr_generic(); 1.488 us | paging64_walk_addr_generic(); After: 1.714 us | paging64_walk_addr_generic(); 0.806 us | paging64_walk_addr_generic(); 0.664 us | paging64_walk_addr_generic(); 0.619 us | paging64_walk_addr_generic(); 0.645 us | paging64_walk_addr_generic(); 0.605 us | paging64_walk_addr_generic(); 1.388 us | paging64_walk_addr_generic(); 0.753 us | paging64_walk_addr_generic(); 0.594 us | paging64_walk_addr_generic(); 0.833 us | paging64_walk_addr_generic(); Signed-off-by: Takuya Yoshikawa yoshikawa.tak...@oss.ntt.co.jp --- arch/x86/kvm/paging_tmpl.h | 37 - 1 files changed, 32 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 109939a..614aa3f 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -109,12 +109,37 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) return access; } +/* + * Read the guest PTE refered to by table_gfn and offset and put it into ptep. + * + * *slot_hint, if not NULL, should point to a memslot which probably includes + * the guest PTE. The actual memslot will be put back into this so that + * callers can cache it. + */ static int FNAME(read_guest_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, -gfn_t table_gfn, int offset, pt_element_t *ptep) +gfn_t table_gfn, int offset, pt_element_t *ptep, +struct kvm_memory_slot **slot_hint) { - return kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, ptep, - offset, sizeof(*ptep), - PFERR_USER_MASK | PFERR_WRITE_MASK); + unsigned long addr; + pt_element_t __user *ptep_user; + gfn_t real_gfn; + + real_gfn = mmu-translate_gpa(vcpu, gfn_to_gpa(table_gfn), + PFERR_USER_MASK | PFERR_WRITE_MASK); + if (real_gfn == UNMAPPED_GVA) + return -EFAULT; + + real_gfn = gpa_to_gfn(real_gfn); + + if (!(*slot_hint) || !gfn_in_memslot(*slot_hint, real_gfn)) + *slot_hint = gfn_to_memslot(vcpu-kvm, real_gfn); + + addr = gfn_to_hva_memslot(*slot_hint, real_gfn); + if (kvm_is_error_hva(addr)) + return -EFAULT; + + ptep_user = (pt_element_t __user *)((void *)addr + offset); + return get_user(*ptep, ptep_user); } /* @@ -130,6 +155,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, gpa_t pte_gpa; bool eperm, present, rsvd_fault; int offset, write_fault, user_fault, fetch_fault; + struct kvm_memory_slot *slot_cache = NULL; write_fault = access PFERR_WRITE_MASK; user_fault = access PFERR_USER_MASK; @@ -168,7 +194,8 @@ walk: walker-table_gfn[walker-level - 1] = table_gfn; walker-pte_gpa[walker-level - 1] = pte_gpa; - if (FNAME(read_guest_pte)(vcpu, mmu, table_gfn, offset, pte)) { + if (FNAME(read_guest_pte)(vcpu, mmu, table_gfn, + offset, pte, slot_cache)) { present = false; break; } -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 3/3] KVM: MMU: Optimize guest page table walk
On Tue, Apr 19, 2011 at 03:38:14AM +0900, Takuya Yoshikawa wrote: From: Takuya Yoshikawa yoshikawa.tak...@oss.ntt.co.jp We optimize multi level guest page table walk as follows: 1. We cache the memslot which, probably, includes the next guest page tables to avoid searching for it many times. 2. We use get_user() instead of copy_from_user(). Note that this is kind of a restricted way of Xiao's more generic work: KVM: optimize memslots searching and cache GPN to GFN. With this patch applied, paging64_walk_addr_generic() has improved as the following tracing results show. Before: 3.169 us | paging64_walk_addr_generic(); 1.880 us | paging64_walk_addr_generic(); 1.243 us | paging64_walk_addr_generic(); 1.517 us | paging64_walk_addr_generic(); 3.009 us | paging64_walk_addr_generic(); 1.814 us | paging64_walk_addr_generic(); 1.340 us | paging64_walk_addr_generic(); 1.659 us | paging64_walk_addr_generic(); 1.748 us | paging64_walk_addr_generic(); 1.488 us | paging64_walk_addr_generic(); After: 1.714 us | paging64_walk_addr_generic(); 0.806 us | paging64_walk_addr_generic(); 0.664 us | paging64_walk_addr_generic(); 0.619 us | paging64_walk_addr_generic(); 0.645 us | paging64_walk_addr_generic(); 0.605 us | paging64_walk_addr_generic(); 1.388 us | paging64_walk_addr_generic(); 0.753 us | paging64_walk_addr_generic(); 0.594 us | paging64_walk_addr_generic(); 0.833 us | paging64_walk_addr_generic(); Nice optimization! What scenarios have you used to test it? Joerg -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] KVM: Use pci_store/load_saved_state() around VM device usage
On Sun, 2011-04-17 at 12:25 +0300, Avi Kivity wrote: On 04/15/2011 10:54 PM, Alex Williamson wrote: Store the device saved state so that we can reload the device back to the original state when it's unassigned. This has the benefit that the state survives across pci_reset_function() calls via the PCI sysfs reset interface while the VM is using the device. @@ -516,7 +518,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, pci_reset_function(dev); pci_save_state(dev); - + match-pci_saved_state = pci_store_saved_state(dev); match-assigned_dev_id = assigned_dev-assigned_dev_id; Error check? It might be better to give up the opacity of the data structure and make pci_saved_state the full struct, not a pointer. pci_store_saved_state() returns NULL on error, which is correctly handled if we pass NULL to pci_load_saved_state() or a pointer to NULL to pci_load_and_free_saved_state(). This is also why I changed the __pci_reset_function() back to a normal pci_reset_function(), so we're never left with an uninitialized device like we are now. We could be more verbose or return an error here, but we've gone for a long time not even doing this save/restore across VM usage, so I don't think it's worthy of preventing the device attachment if it fails. Thanks, Alex -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] PCI: Add interfaces to store and load the device saved state
On Sun, 2011-04-17 at 12:23 +0300, Avi Kivity wrote: On 04/15/2011 10:54 PM, Alex Williamson wrote: For KVM device assignment, we'd like to save off the state of a device prior to passing it to the guest and restore it later. We also want to allow pci_reset_funciton() to be called while the device is owned by the guest. This however overwrites and invalidates the struct pci_dev buffers, so we can't just manually call save and restore. Add generic interfaces for the saved state to be stored into a buffer and reloaded back into struct pci_dev at a later time. +/** + * pci_store_saved_state - Store the device saved state into a buffer + * @dev: - PCI device that we're dealing with + * + * Returns an opaque buffer containing the device saved state. + * NULL if no state or error. + */ +void *pci_store_saved_state(struct pci_dev *dev) This is too opaque - no type safety. You can return a struct pci_state *, leaving it undefined in headers files. Good idea, I'll roll this in. Thanks, Alex -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/5] Make unattended install on Linux safer, fix OpenSUSE/SLES installs
While working on making unattended install on Linux guests safer, noticed that the recent patches changing unattended install to use -kernel and -initrd options [1] were breaking OpenSUSE and SLES installation. As a maintainer it is my duty to fix such breakages, so I did it. I tested all changes with OpenSUSE 11.4, which I downloaded from the opensuse website. I ask the IBM guys that contributed this guest support to go through and test the changes, I need some help here. Anyway, I am confident that this patchset will bring a major improvement for the users of those guests. Lucas Meneghel Rodrigues (5): KVM test: Fix incorrect CdromDisk instantiation when distro is OpenSUSE KVM test: Unattended install Give Linux VMs time to shutdown cleanly v2 KVM test: unattended_install: Introduce wait_no_ack param KVM test: Make OpenSUSE and SLES to use unattended_cdrom KVM test: Introduce OpenSUSE 11.4 guest definitions client/tests/kvm/tests/unattended_install.py | 37 +++ client/tests/kvm/tests_base.cfg.sample | 93 + client/tests/kvm/unattended/Fedora-10.ks |1 + client/tests/kvm/unattended/Fedora-11.ks |1 + client/tests/kvm/unattended/Fedora-12.ks |1 + client/tests/kvm/unattended/Fedora-13.ks |1 + client/tests/kvm/unattended/Fedora-14.ks |1 + client/tests/kvm/unattended/Fedora-9.ks |1 + client/tests/kvm/unattended/OpenSUSE-11.xml | 13 +--- client/tests/kvm/unattended/RHEL-4-series.ks |2 +- client/tests/kvm/unattended/RHEL-5-series.ks |2 +- client/tests/kvm/unattended/RHEL-6-series.ks |2 +- client/tests/kvm/unattended/SLES-11.xml | 13 +--- 13 files changed, 100 insertions(+), 68 deletions(-) -- 1.7.4.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/5] KVM test: Fix incorrect CdromDisk instantiation when distro is OpenSUSE
We need to pass an additional tmpdir param to the class initialization. Signed-off-by: Lucas Meneghel Rodrigues l...@redhat.com --- client/tests/kvm/tests/unattended_install.py |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/client/tests/kvm/tests/unattended_install.py b/client/tests/kvm/tests/unattended_install.py index c7e19da..2d58cf2 100644 --- a/client/tests/kvm/tests/unattended_install.py +++ b/client/tests/kvm/tests/unattended_install.py @@ -376,7 +376,7 @@ class UnattendedInstallConfig(object): # SUSE autoyast install dest_fname = autoinst.xml if self.cdrom_unattended: -boot_disk = CdromDisk(self.cdrom_unattended) +boot_disk = CdromDisk(self.cdrom_unattended, self.tmpdir) elif self.floppy: boot_disk = FloppyDisk(self.floppy, self.qemu_img_binary, self.tmpdir) -- 1.7.4.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/5] KVM test: Unattended install Give Linux VMs time to shutdown cleanly v2
During unattended install, right after we receive the ACK from the guest the test is deemed to be finished, and as shutdown_vm = yes, it'll try to end the vm issuing a shutdown command to it. However, on virtually all Linux guests an SSH server is not available at the end of install, so KVM autotest will end the VM forcefully, which is not really safe, although it has served us well so far. We did not fix this 'problem' so far because on RHEL3, a supported guest, the anaconda syntax does not support the 'poweroff' directive, only 'reboot', so if we don't finish the VM right after the ACK from guest we really can't prevent it from starting the install again, getting an infinite loop. Well then, let's restrict this behavior only to RHEL 3, and fix this properly for all other systems, by introducing a 'shutdown_cleanly' param and setting it to 'yes' to everybody but RHEL 3. Windows doesn't need this at all, as when the ACK is received the system is fully booted up and functional, so the postprocessing will take care of shutting down the VM cleanly. I've fixed all Fedora and RHEL unattended files as part of this change. With this change, we can get rid of another parameter, post_install_delay, that was created with the only purpose of handling the fact that on autoyast install, when the ACK is sent to host, the system is not fully done with install. So we end up fixing yet another problem :) Changes from v1: * Properly convert the post ack timeout to an int * Don't fix SLES unattended files, they'll be fixed in posterior patches Signed-off-by: Lucas Meneghel Rodrigues l...@redhat.com --- client/tests/kvm/tests/unattended_install.py | 10 ++ client/tests/kvm/tests_base.cfg.sample |8 ++-- client/tests/kvm/unattended/Fedora-10.ks |1 + client/tests/kvm/unattended/Fedora-11.ks |1 + client/tests/kvm/unattended/Fedora-12.ks |1 + client/tests/kvm/unattended/Fedora-13.ks |1 + client/tests/kvm/unattended/Fedora-14.ks |1 + client/tests/kvm/unattended/Fedora-9.ks |1 + client/tests/kvm/unattended/RHEL-4-series.ks |2 +- client/tests/kvm/unattended/RHEL-5-series.ks |2 +- client/tests/kvm/unattended/RHEL-6-series.ks |2 +- 11 files changed, 21 insertions(+), 9 deletions(-) diff --git a/client/tests/kvm/tests/unattended_install.py b/client/tests/kvm/tests/unattended_install.py index 2d58cf2..fe7222e 100644 --- a/client/tests/kvm/tests/unattended_install.py +++ b/client/tests/kvm/tests/unattended_install.py @@ -558,7 +558,9 @@ def run_unattended_install(test, params, env): logging.info(Guest reported successful installation after %d s (%d min), time_elapsed, time_elapsed/60) -if post_install_delay: -logging.debug(Post install delay specified, waiting %s s..., - post_install_delay) -time.sleep(post_install_delay) +if params.get(shutdown_cleanly, yes) == yes: +shutdown_cleanly_timeout = int(params.get(shutdown_cleanly_timeout, + 120)) +logging.info(Wait for guest to shudown cleanly...) +if kvm_utils.wait_for(vm.is_dead, shutdown_cleanly_timeout, 1, 1): +logging.info(Guest managed to shutdown cleanly) diff --git a/client/tests/kvm/tests_base.cfg.sample b/client/tests/kvm/tests_base.cfg.sample index 6397590..b96755d 100644 --- a/client/tests/kvm/tests_base.cfg.sample +++ b/client/tests/kvm/tests_base.cfg.sample @@ -101,6 +101,8 @@ variants: kill_vm = yes kill_vm_gracefully = yes kill_vm_on_error = yes +shutdown_cleanly = yes +shutdown_cleanly_timeout = 120 force_create_image = yes extra_params += -boot d guest_port_unattended_install = 12323 @@ -1294,7 +1296,6 @@ variants: # hold your autoyast file extra_params += --append 'autoyast=floppy console=ttyS0,115200 console=tty0' #extra_params += --append 'autoyast=cdrom console=ttyS0,115200 console=tty0' -post_install_delay = 10 variants: - 11.0.32: @@ -1434,7 +1435,6 @@ variants: # hold your autoyast file extra_params += --append 'autoyast=floppy console=ttyS0,115200 console=tty0' #extra_params += --append 'autoyast=cdrom console=ttyS0,115200 console=tty0' -post_install_delay = 10 kernel = linux initrd = initrd @@ -1563,6 +1563,8 @@ variants: cdrom_unattended = images/rhel39-32/ks.iso kernel = images/rhel39-32/vmlinuz initrd = images/rhel39-32/initrd.img +# 3.X anaconda does not support 'poweroff' on ks +shutdown_cleanly = no
[PATCH 3/5] KVM test: unattended_install: Introduce wait_no_ack param
Some distros such as OpenSUSE won't let the install test to communicate the guest ACK for some weird reason [1], this commit modifies the unattended_install test introducing a wait_no_ack param that will skip the ACK check, and break the unattended install loop when the machine turns off itself (directive halt on OpenSUSE and SLES unattended file). Other distros shouldn't be affected by this change, their install behavior was kept the same. [1] Tried several ways of transmitting the ACK by means of chroot scripts, no way. In the end of the day, I was short on time to spend too much of it figuring out a way to do it, hence the wait_no_ack param was born. I really could use some help from the guys that introduced OpenSUSE and SLES support to kvm autotest. Signed-off-by: Lucas Meneghel Rodrigues l...@redhat.com --- client/tests/kvm/tests/unattended_install.py | 25 + client/tests/kvm/tests_base.cfg.sample |2 ++ client/tests/kvm/unattended/OpenSUSE-11.xml | 13 ++--- client/tests/kvm/unattended/SLES-11.xml | 13 ++--- 4 files changed, 23 insertions(+), 30 deletions(-) diff --git a/client/tests/kvm/tests/unattended_install.py b/client/tests/kvm/tests/unattended_install.py index fe7222e..8513e0b 100644 --- a/client/tests/kvm/tests/unattended_install.py +++ b/client/tests/kvm/tests/unattended_install.py @@ -526,15 +526,23 @@ def run_unattended_install(test, params, env): start_time = time.time() while (time.time() - start_time) install_timeout: -vm.verify_alive() -vm.verify_kernel_crash() -client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: -client.connect((vm.get_address(), port)) -if client.recv(1024) == done: +vm.verify_alive() +except kvm_vm.VMDeadError, e: +if params.get(wait_no_ack, no) == yes: break -except (socket.error, kvm_vm.VMAddressError): -pass +else: +raise e +vm.verify_kernel_crash() +if params.get(wait_no_ack, no) == no: +client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +try: +client.connect((vm.get_address(), port)) +if client.recv(1024) == done: +break +except (socket.error, kvm_vm.VMAddressError): +pass + if migrate_background: # Drop the params which may break the migration # Better method is to use dnsmasq to do the @@ -549,7 +557,8 @@ def run_unattended_install(test, params, env): vm.migrate(timeout=mig_timeout, protocol=mig_protocol) else: time.sleep(1) -client.close() +if params.get(wait_no_ack, no) == no: +client.close() else: raise error.TestFail(Timeout elapsed while waiting for install to finish) diff --git a/client/tests/kvm/tests_base.cfg.sample b/client/tests/kvm/tests_base.cfg.sample index b96755d..fbe416e 100644 --- a/client/tests/kvm/tests_base.cfg.sample +++ b/client/tests/kvm/tests_base.cfg.sample @@ -1296,6 +1296,7 @@ variants: # hold your autoyast file extra_params += --append 'autoyast=floppy console=ttyS0,115200 console=tty0' #extra_params += --append 'autoyast=cdrom console=ttyS0,115200 console=tty0' +wait_no_ack = yes variants: - 11.0.32: @@ -1437,6 +1438,7 @@ variants: #extra_params += --append 'autoyast=cdrom console=ttyS0,115200 console=tty0' kernel = linux initrd = initrd +wait_no_ack = yes variants: - 11.0.32: diff --git a/client/tests/kvm/unattended/OpenSUSE-11.xml b/client/tests/kvm/unattended/OpenSUSE-11.xml index 0ade836..0f11ac9 100644 --- a/client/tests/kvm/unattended/OpenSUSE-11.xml +++ b/client/tests/kvm/unattended/OpenSUSE-11.xml @@ -65,6 +65,7 @@ ask-list config:type=list/ mode confirm config:type=booleanfalse/confirm + halt config:type=booleantrue/halt /mode mouse idnone/id @@ -160,17 +161,7 @@ source![CDATA[dhclient eth0 chkconfig sshd on sed -i -e 's/\(PasswordAuthentication\s\)no/\1yes/g' /etc/ssh/sshd_config -cat EOFserver -#!/usr/bin/python -import socket, os -server=socket.socket(socket.AF_INET, socket.SOCK_STREAM) -server.bind(('',12323)) -server.listen(1) -(client, addr) = server.accept() -client.send(done) -client.close() -EOF -python server +service sshd restart ]]/source /script /init-scripts diff --git a/client/tests/kvm/unattended/SLES-11.xml b/client/tests/kvm/unattended/SLES-11.xml index c694a31..16c25d0 100644 --- a/client/tests/kvm/unattended/SLES-11.xml +++ b/client/tests/kvm/unattended/SLES-11.xml @@ -92,6 +92,7
[PATCH 4/5] KVM test: Make OpenSUSE and SLES to use unattended_cdrom
We had disabled unattended_cdrom usage with SLES and OpenSUSE due to the fact that autoyast=cdrom wasn't working properly to specify a profile. However, the interaction of the linux floppy driver with qemu-kvm and qemu tends to generate some pretty bad guest kernel panics, reason why we came up with unattended_cdrom for Fedora and RHEL in the first place. Thinkering a bit more with autoyast documentation, I've discovered that specifying autoyast=device://scd0/autoinst.xml works fine and so I've changed the default for both distros to use the CD again. Now, people will have a much smoother experience installing those distros under KVM autotest. Signed-off-by: Lucas Meneghel Rodrigues l...@redhat.com --- client/tests/kvm/tests_base.cfg.sample | 56 1 files changed, 28 insertions(+), 28 deletions(-) diff --git a/client/tests/kvm/tests_base.cfg.sample b/client/tests/kvm/tests_base.cfg.sample index fbe416e..5bee383 100644 --- a/client/tests/kvm/tests_base.cfg.sample +++ b/client/tests/kvm/tests_base.cfg.sample @@ -1294,8 +1294,8 @@ variants: unattended_install: # You have to use autoyast=floppy if you want to use floppies to # hold your autoyast file -extra_params += --append 'autoyast=floppy console=ttyS0,115200 console=tty0' -#extra_params += --append 'autoyast=cdrom console=ttyS0,115200 console=tty0' +#extra_params += --append 'autoyast=floppy console=ttyS0,115200 console=tty0' +extra_params += --append 'autoyast=device://scd0/autoinst.xml console=ttyS0,115200 console=tty0' wait_no_ack = yes variants: @@ -1308,8 +1308,8 @@ variants: md5sum_1m_cd1 = c720b30557af758e69de450409516369 unattended_install: unattended_file = unattended/OpenSUSE-11.xml -floppy = images/opensuse-11-0-32/autoyast.vfd -#cdrom_unattended = images/opensuse-11-0-32/autoyast.iso +#floppy = images/opensuse-11-0-32/autoyast.vfd +cdrom_unattended = images/opensuse-11-0-32/autoyast.iso kernel = images/opensuse-11-0-32/linux initrd = images/opensuse-11-0-32/initrd boot_path = boot/i386/loader @@ -1323,8 +1323,8 @@ variants: image_name = openSUSE-11.0-64 unattended_install: unattended_file = unattended/OpenSUSE-11.xml -floppy = images/opensuse-11-0-64/autoyast.vfd -#cdrom_unattended = images/opensuse-11-0-64/autoyast.iso +#floppy = images/opensuse-11-0-64/autoyast.vfd +cdrom_unattended = images/opensuse-11-0-64/autoyast.iso kernel = images/opensuse-11-0-64/linux initrd = images/opensuse-11-0-64/initrd boot_path = boot/x86_64/loader @@ -1342,8 +1342,8 @@ variants: md5sum_1m_cd1 = b70217417468389083429f81ba7ce2bd unattended_install: unattended_file = unattended/OpenSUSE-11.xml -floppy = images/opensuse-11-1-32/autoyast.vfd -#cdrom_unattended = images/opensuse-11-1-32/autoyast.iso +#floppy = images/opensuse-11-1-32/autoyast.vfd +cdrom_unattended = images/opensuse-11-1-32/autoyast.iso kernel = images/opensuse-11-1-32/linux initrd = images/opensuse-11-1-32/initrd boot_path = boot/i386/loader @@ -1361,8 +1361,8 @@ variants: md5sum_1m_cd1 = 768ca32503ef92c28f2d144f2a87e4d0 unattended_install: unattended_file = unattended/OpenSUSE-11.xml -floppy = images/opensuse-11-1-64/autoyast.vfd -#cdrom_unattended = images/opensuse-11-1-64/autoyast.iso +#floppy = images/opensuse-11-1-64/autoyast.vfd +cdrom_unattended = images/opensuse-11-1-64/autoyast.iso kernel = images/opensuse-11-1-64/linux initrd = images/opensuse-11-1-64/initrd boot_path = boot/x86_64/loader @@ -1376,8 +1376,8 @@ variants: image_name = openSUSE-11.2-32 unattended_install: unattended_file = unattended/OpenSUSE-11.xml -floppy =
[PATCH 5/5] KVM test: Introduce OpenSUSE 11.4 guest definitions
For both 32 and 64 bit. Signed-off-by: Lucas Meneghel Rodrigues l...@redhat.com --- client/tests/kvm/tests_base.cfg.sample | 27 +++ 1 files changed, 27 insertions(+), 0 deletions(-) diff --git a/client/tests/kvm/tests_base.cfg.sample b/client/tests/kvm/tests_base.cfg.sample index 5bee383..5d274f8 100644 --- a/client/tests/kvm/tests_base.cfg.sample +++ b/client/tests/kvm/tests_base.cfg.sample @@ -1429,6 +1429,33 @@ variants: md5sum_cd1 = adf5d2a0a03c1e3aaf102fd6a4771b87 md5sum_1m_cd1 = e0dd12dac30d296417256775e1234c6e +- 11.4.32: +image_name = openSUSE-11.4-32 +unattended_install: +unattended_file = unattended/OpenSUSE-11.xml +#floppy = images/opensuse-11-4-32/autoyast.vfd +cdrom_unattended = images/opensuse-11-4-32/autoyast.iso +kernel = images/opensuse-11-4-32/linux +initrd = images/opensuse-11-4-32/initrd +boot_path = boot/x86_64/loader +unattended_install.cdrom: +cdrom_cd1 = isos/linux/openSUSE-11.4-DVD-i586.iso +md5sum_cd1 = 5f6d6d67c3e256b2513311f4ed650515 + +- 11.4.64: +image_name = openSUSE-11.4-64 +unattended_install: +unattended_file = unattended/OpenSUSE-11.xml +#floppy = images/opensuse-11-4-64/autoyast.vfd +cdrom_unattended = images/opensuse-11-4-64/autoyast.iso +kernel = images/opensuse-11-4-64/linux +initrd = images/opensuse-11-4-64/initrd +boot_path = boot/x86_64/loader +unattended_install.cdrom: +cdrom_cd1 = isos/linux/openSUSE-11.4-DVD-x86_64.iso +md5sum_cd1 = 082ebfac494b41cd56b38fb4218c545d +md5sum_1m_cd1 = 2adcc5623e6c50b5d08acb7f84aa3fb1 + - SLES: shell_prompt = ^root@.*[\#\$]\s*$|# unattended_install: -- 1.7.4.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/6] KVM: X86: Make tsc_delta calculation a function of guest tsc
On Sat, Apr 16, 2011 at 06:09:17PM +0200, Jan Kiszka wrote: On 2011-03-25 09:44, Joerg Roedel wrote: + tsc_delta = !vcpu-arch.last_guest_tsc ? 0 : +tsc - vcpu-arch.last_guest_tsc; This patch appears to cause troubles to Linux guests on TSC clocksource and APIC highres timer. The first boot after qemu start is always fine, but after a reboot the guest timer appears to fire incorrectly or even not at all. Was this patch tested with a guest reboot scenario as well? Does it account for the TSC being reset to 0 on reboot? Hmm, probably the last_guest_tsc is not updated correctly in this scenario. I will have a look tomorrow. Joerg To avoid this problem, when the TSC is reset, the overshoot protection where last_guest_tsc is used is specifically disabled: /* Reset of TSC must disable overshoot protection below */ vcpu-arch.hv_clock.tsc_timestamp = 0; vcpu-arch.last_tsc_write = data; vcpu-arch.last_tsc_nsec = ns; You can probably use the same test - last_guest_tsc is only valid if tsc_timestamp above != 0. Zach -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 3/3] KVM: MMU: Optimize guest page table walk
Joerg Roedel j...@8bytes.org wrote: Nice optimization! What scenarios have you used to test it? I used my desktop Phenom II box, running the latest qemu-kvm. So probably, NPT was ON by default. The guest was running a .ogg movie during that test. I am not an MMU expert. So I would be glad if I can know what scenarios should be tested for this patch! Can I test nested SVM easily, e.g.? Thanks, Takuya -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Network performance with small packets
On Thu, 14 Apr 2011 19:03:59 +0300, Michael S. Tsirkin m...@redhat.com wrote: On Thu, Apr 14, 2011 at 08:58:41PM +0930, Rusty Russell wrote: They have to offer the feature, so if the have some way of allocating non-page-aligned amounts of memory, they'll have to add those extra 2 bytes. So I think it's OK... Rusty. To clarify, my concern is that we always seem to try to map these extra 2 bytes, which thinkably might fail? No, if you look at the layout it's clear that there's always most of a page left for this extra room, both in the middle and at the end. Cheers, Rusty. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 3/3] KVM: MMU: Optimize guest page table walk
On 04/19/2011 02:38 AM, Takuya Yoshikawa wrote: From: Takuya Yoshikawa yoshikawa.tak...@oss.ntt.co.jp We optimize multi level guest page table walk as follows: 1. We cache the memslot which, probably, includes the next guest page tables to avoid searching for it many times. Yeah, the hit is very high, after optimizing the algorithm of memslots (http://lwn.net/Articles/429308/), maybe the advantage is not so significant, could you apply this patchset and test again please? 2. We use get_user() instead of copy_from_user(). Note that this is kind of a restricted way of Xiao's more generic work: KVM: optimize memslots searching and cache GPN to GFN. With this patch applied, paging64_walk_addr_generic() has improved as the following tracing results show. Before: 3.169 us | paging64_walk_addr_generic(); 1.880 us | paging64_walk_addr_generic(); 1.243 us | paging64_walk_addr_generic(); 1.517 us | paging64_walk_addr_generic(); 3.009 us | paging64_walk_addr_generic(); 1.814 us | paging64_walk_addr_generic(); 1.340 us | paging64_walk_addr_generic(); 1.659 us | paging64_walk_addr_generic(); 1.748 us | paging64_walk_addr_generic(); 1.488 us | paging64_walk_addr_generic(); After: 1.714 us | paging64_walk_addr_generic(); 0.806 us | paging64_walk_addr_generic(); 0.664 us | paging64_walk_addr_generic(); 0.619 us | paging64_walk_addr_generic(); 0.645 us | paging64_walk_addr_generic(); 0.605 us | paging64_walk_addr_generic(); 1.388 us | paging64_walk_addr_generic(); 0.753 us | paging64_walk_addr_generic(); 0.594 us | paging64_walk_addr_generic(); 0.833 us | paging64_walk_addr_generic(); Signed-off-by: Takuya Yoshikawa yoshikawa.tak...@oss.ntt.co.jp --- arch/x86/kvm/paging_tmpl.h | 37 - 1 files changed, 32 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 109939a..614aa3f 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -109,12 +109,37 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) return access; } +/* + * Read the guest PTE refered to by table_gfn and offset and put it into ptep. + * + * *slot_hint, if not NULL, should point to a memslot which probably includes + * the guest PTE. The actual memslot will be put back into this so that + * callers can cache it. + */ static int FNAME(read_guest_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gfn_t table_gfn, int offset, pt_element_t *ptep) + gfn_t table_gfn, int offset, pt_element_t *ptep, + struct kvm_memory_slot **slot_hint) { - return kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, ptep, -offset, sizeof(*ptep), -PFERR_USER_MASK | PFERR_WRITE_MASK); + unsigned long addr; + pt_element_t __user *ptep_user; + gfn_t real_gfn; + + real_gfn = mmu-translate_gpa(vcpu, gfn_to_gpa(table_gfn), + PFERR_USER_MASK | PFERR_WRITE_MASK); + if (real_gfn == UNMAPPED_GVA) + return -EFAULT; + + real_gfn = gpa_to_gfn(real_gfn); + + if (!(*slot_hint) || !gfn_in_memslot(*slot_hint, real_gfn)) + *slot_hint = gfn_to_memslot(vcpu-kvm, real_gfn); + You forgot to check the result. (if *slot_hint == NULL)? ... ;-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 3/3] KVM: MMU: Optimize guest page table walk
Xiao Guangrong xiaoguangr...@cn.fujitsu.com wrote: We optimize multi level guest page table walk as follows: 1. We cache the memslot which, probably, includes the next guest page tables to avoid searching for it many times. Yeah, the hit is very high, after optimizing the algorithm of memslots (http://lwn.net/Articles/429308/), maybe the advantage is not so significant, could you apply this patchset and test again please? Any sorting, including tree based, strategies have tradoffs. Compared to that, what I wanted to do here was to improve the table walk locally without sacrificing other things. Of course, my strategy depends on the assumption that the page tables will be in the same slot in very high probability. So if certain algorithm seems to be addapted, yes, I will test based on that. IIRC, any practically good algorithm has not been found yet, right? 2. We use get_user() instead of copy_from_user(). Note that this is kind of a restricted way of Xiao's more generic work: KVM: optimize memslots searching and cache GPN to GFN. With this patch applied, paging64_walk_addr_generic() has improved as the following tracing results show. + + if (!(*slot_hint) || !gfn_in_memslot(*slot_hint, real_gfn)) + *slot_hint = gfn_to_memslot(vcpu-kvm, real_gfn); + You forgot to check the result. (if *slot_hint == NULL)? ... ;-) Thank you! I will check later. Takuya -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/2] KVM: PPC: booke: add sregs support
Signed-off-by: Scott Wood scottw...@freescale.com --- Documentation/kvm/api.txt |6 +- arch/powerpc/include/asm/kvm.h | 184 +++ arch/powerpc/include/asm/kvm_44x.h |1 - arch/powerpc/include/asm/kvm_e500.h |1 + arch/powerpc/include/asm/kvm_host.h |3 + arch/powerpc/include/asm/kvm_ppc.h |9 ++ arch/powerpc/kvm/44x.c | 10 ++ arch/powerpc/kvm/booke.c| 153 - arch/powerpc/kvm/e500.c | 75 ++ arch/powerpc/kvm/e500_emulate.c |5 +- arch/powerpc/kvm/e500_tlb.c |8 ++ arch/powerpc/kvm/emulate.c | 13 ++- arch/powerpc/kvm/powerpc.c |4 + include/linux/kvm.h |1 + 14 files changed, 460 insertions(+), 13 deletions(-) diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 1b9eaa7..f64c41f 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -261,7 +261,7 @@ See KVM_GET_REGS for the data structure. 4.13 KVM_GET_SREGS Capability: basic -Architectures: x86 +Architectures: x86, ppc Type: vcpu ioctl Parameters: struct kvm_sregs (out) Returns: 0 on success, -1 on error @@ -279,6 +279,8 @@ struct kvm_sregs { __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; }; +/* ppc -- see arch/powerpc/include/asm/kvm.h */ + interrupt_bitmap is a bitmap of pending external interrupts. At most one bit may be set. This interrupt has been acknowledged by the APIC but not yet injected into the cpu core. @@ -286,7 +288,7 @@ but not yet injected into the cpu core. 4.14 KVM_SET_SREGS Capability: basic -Architectures: x86 +Architectures: x86, ppc Type: vcpu ioctl Parameters: struct kvm_sregs (in) Returns: 0 on success, -1 on error diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 18ea696..d2ca5ed 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -45,6 +45,114 @@ struct kvm_regs { __u64 gpr[32]; }; +#define KVM_SREGS_E_IMPL_NONE 0 +#define KVM_SREGS_E_IMPL_FSL 1 + +#define KVM_SREGS_E_FSL_PIDn (1 0) /* PID1/PID2 */ + +/* + * Feature bits indicate which sections of the sregs struct are valid, + * both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers + * corresponding to unset feature bits will not be modified. This allows + * restoring a checkpoint made without that feature, while keeping the + * default values of the new registers. + * + * KVM_SREGS_E_BASE contains: + * CSRR0/1 (refers to SRR2/3 on 40x) + * ESR + * DEAR + * MCSR + * TSR + * TCR + * DEC + * TB + * VRSAVE (USPRG0) + */ +#define KVM_SREGS_E_BASE (1 0) + +/* + * KVM_SREGS_E_ARCH206 contains: + * + * PIR + * MCSRR0/1 + * DECAR + * IVPR + */ +#define KVM_SREGS_E_ARCH206(1 1) + +/* + * Contains EPCR, plus the upper half of 64-bit registers + * that are 32-bit on 32-bit implementations. + */ +#define KVM_SREGS_E_64 (1 2) + +#define KVM_SREGS_E_SPRG8 (1 3) +#define KVM_SREGS_E_MCIVPR (1 4) + +/* + * IVORs are used -- contains IVOR0-15, plus additional IVORs + * in combination with an appropriate feature bit. + */ +#define KVM_SREGS_E_IVOR (1 5) + +/* + * Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG. + * Also TLBnPS if MMUCFG[MAVN] = 1. + */ +#define KVM_SREGS_E_ARCH206_MMU(1 6) + +/* DBSR, DBCR, IAC, DAC, DVC */ +#define KVM_SREGS_E_DEBUG (1 7) + +/* Enhanced debug -- DSRR0/1, SPRG9 */ +#define KVM_SREGS_E_ED (1 8) + +/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_SPE(1 9) + +/* External Proxy (EXP) -- EPR */ +#define KVM_SREGS_EXP (1 10) + +/* External PID (E.PD) -- EPSC/EPLC */ +#define KVM_SREGS_E_PD (1 11) + +/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_PC (1 12) + +/* Page table (E.PT) -- EPTCFG */ +#define KVM_SREGS_E_PT (1 13) + +/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_PM (1 14) + +/* + * Special updates: + * + * Some registers may change even while a vcpu is not running. + * To avoid losing these changes, by default these registers are + * not updated by KVM_SET_SREGS. To force an update, set the bit + * in u.e.update_special corresponding to the register to be updated. + * + * The update_special field is zero on return from KVM_GET_SREGS. + * + * When restoring a checkpoint, the caller can set update_special + * to 0x to ensure that everything is restored, even new features + * that the caller doesn't know about. + */ +#define KVM_SREGS_E_UPDATE_MCSR(1 0) +#define KVM_SREGS_E_UPDATE_TSR (1 1) +#define KVM_SREGS_E_UPDATE_DEC (1 2) +#define KVM_SREGS_E_UPDATE_DBSR
Re: [PATCH 1/2] KVM: PPC: booke: save/restore VRSAVE (a.k.a. USPRG0)
On 19.04.2011, at 00:31, Scott Wood wrote: Linux doesn't use USPRG0 (now renamed VRSAVE in the architecture, even when Altivec isn't involved), but a guest might. Signed-off-by: Scott Wood scottw...@freescale.com --- Sent to the right list this time... sigh arch/powerpc/include/asm/kvm_host.h |1 + arch/powerpc/kernel/asm-offsets.c |1 + arch/powerpc/kvm/booke_interrupts.S |5 - 3 files changed, 6 insertions(+), 1 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bfb0e0f..34b8732 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -228,6 +228,7 @@ struct kvm_vcpu_arch { ulong hflags; ulong guest_owned_ext; #endif + u32 vrsave; /* also USPRG0 */ u32 mmucr; ulong shadow_msr; ulong sprg4; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 2b223de..7b9e35e 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -395,6 +395,7 @@ int main(void) DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); + DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 54c19d2..cde29c4 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -184,6 +184,8 @@ _GLOBAL(kvmppc_resume_host) stw r3, VCPU_GPR(r4)(r4) mfspr r3, SPRN_SRR0 stw r3, VCPU_PC(r4) + mfspr r3, SPRN_VRSAVE + stw r3, VCPU_VRSAVE(r4) Do we need to swap this on every exit or is vcpu_load/put enough? Alex -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] KVM: PPC: booke: save/restore VRSAVE (a.k.a. USPRG0)
Linux doesn't use USPRG0 (now renamed VRSAVE in the architecture, even when Altivec isn't involved), but a guest might. Signed-off-by: Scott Wood scottw...@freescale.com --- Sent to the right list this time... sigh arch/powerpc/include/asm/kvm_host.h |1 + arch/powerpc/kernel/asm-offsets.c |1 + arch/powerpc/kvm/booke_interrupts.S |5 - 3 files changed, 6 insertions(+), 1 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index bfb0e0f..34b8732 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -228,6 +228,7 @@ struct kvm_vcpu_arch { ulong hflags; ulong guest_owned_ext; #endif + u32 vrsave; /* also USPRG0 */ u32 mmucr; ulong shadow_msr; ulong sprg4; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 2b223de..7b9e35e 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -395,6 +395,7 @@ int main(void) DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); + DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 54c19d2..cde29c4 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -184,6 +184,8 @@ _GLOBAL(kvmppc_resume_host) stw r3, VCPU_GPR(r4)(r4) mfspr r3, SPRN_SRR0 stw r3, VCPU_PC(r4) + mfspr r3, SPRN_VRSAVE + stw r3, VCPU_VRSAVE(r4) /* Restore host stack pointer and PID before IVPR, since the host * exception handlers use them. */ @@ -395,9 +397,10 @@ lightweight_exit: * because host interrupt handlers would get confused. */ lwz r1, VCPU_GPR(r1)(r4) - /* XXX handle USPRG0 */ /* Host interrupt handlers may have clobbered these guest-readable * SPRGs, so we need to reload them here with the guest's values. */ + lwz r3, VCPU_VRSAVE(r4) + mtspr SPRN_VRSAVE, r3 lwz r3, VCPU_SPRG4(r4) mtspr SPRN_SPRG4W, r3 lwz r3, VCPU_SPRG5(r4) -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] KVM: PPC: booke: add sregs support
On 19.04.2011, at 00:31, Scott Wood wrote: Signed-off-by: Scott Wood scottw...@freescale.com --- Documentation/kvm/api.txt |6 +- arch/powerpc/include/asm/kvm.h | 184 +++ arch/powerpc/include/asm/kvm_44x.h |1 - arch/powerpc/include/asm/kvm_e500.h |1 + arch/powerpc/include/asm/kvm_host.h |3 + arch/powerpc/include/asm/kvm_ppc.h |9 ++ arch/powerpc/kvm/44x.c | 10 ++ arch/powerpc/kvm/booke.c| 153 - arch/powerpc/kvm/e500.c | 75 ++ arch/powerpc/kvm/e500_emulate.c |5 +- arch/powerpc/kvm/e500_tlb.c |8 ++ arch/powerpc/kvm/emulate.c | 13 ++- arch/powerpc/kvm/powerpc.c |4 + include/linux/kvm.h |1 + 14 files changed, 460 insertions(+), 13 deletions(-) diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 1b9eaa7..f64c41f 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -261,7 +261,7 @@ See KVM_GET_REGS for the data structure. 4.13 KVM_GET_SREGS Capability: basic -Architectures: x86 +Architectures: x86, ppc Type: vcpu ioctl Parameters: struct kvm_sregs (out) Returns: 0 on success, -1 on error @@ -279,6 +279,8 @@ struct kvm_sregs { __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; }; +/* ppc -- see arch/powerpc/include/asm/kvm.h */ + interrupt_bitmap is a bitmap of pending external interrupts. At most one bit may be set. This interrupt has been acknowledged by the APIC but not yet injected into the cpu core. @@ -286,7 +288,7 @@ but not yet injected into the cpu core. 4.14 KVM_SET_SREGS Capability: basic -Architectures: x86 +Architectures: x86, ppc Type: vcpu ioctl Parameters: struct kvm_sregs (in) Returns: 0 on success, -1 on error diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 18ea696..d2ca5ed 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -45,6 +45,114 @@ struct kvm_regs { __u64 gpr[32]; }; +#define KVM_SREGS_E_IMPL_NONE0 +#define KVM_SREGS_E_IMPL_FSL 1 + +#define KVM_SREGS_E_FSL_PIDn (1 0) /* PID1/PID2 */ + +/* + * Feature bits indicate which sections of the sregs struct are valid, + * both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers + * corresponding to unset feature bits will not be modified. This allows + * restoring a checkpoint made without that feature, while keeping the + * default values of the new registers. + * + * KVM_SREGS_E_BASE contains: + * CSRR0/1 (refers to SRR2/3 on 40x) + * ESR + * DEAR + * MCSR + * TSR + * TCR + * DEC + * TB + * VRSAVE (USPRG0) + */ +#define KVM_SREGS_E_BASE (1 0) + +/* + * KVM_SREGS_E_ARCH206 contains: + * + * PIR + * MCSRR0/1 + * DECAR + * IVPR + */ +#define KVM_SREGS_E_ARCH206 (1 1) + +/* + * Contains EPCR, plus the upper half of 64-bit registers + * that are 32-bit on 32-bit implementations. + */ +#define KVM_SREGS_E_64 (1 2) + +#define KVM_SREGS_E_SPRG8(1 3) +#define KVM_SREGS_E_MCIVPR (1 4) + +/* + * IVORs are used -- contains IVOR0-15, plus additional IVORs + * in combination with an appropriate feature bit. + */ +#define KVM_SREGS_E_IVOR (1 5) + +/* + * Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG. + * Also TLBnPS if MMUCFG[MAVN] = 1. + */ +#define KVM_SREGS_E_ARCH206_MMU (1 6) + +/* DBSR, DBCR, IAC, DAC, DVC */ +#define KVM_SREGS_E_DEBUG(1 7) + +/* Enhanced debug -- DSRR0/1, SPRG9 */ +#define KVM_SREGS_E_ED (1 8) + +/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_SPE (1 9) + +/* External Proxy (EXP) -- EPR */ +#define KVM_SREGS_EXP(1 10) + +/* External PID (E.PD) -- EPSC/EPLC */ +#define KVM_SREGS_E_PD (1 11) + +/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_PC (1 12) + +/* Page table (E.PT) -- EPTCFG */ +#define KVM_SREGS_E_PT (1 13) + +/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */ +#define KVM_SREGS_E_PM (1 14) + +/* + * Special updates: + * + * Some registers may change even while a vcpu is not running. + * To avoid losing these changes, by default these registers are + * not updated by KVM_SET_SREGS. To force an update, set the bit + * in u.e.update_special corresponding to the register to be updated. + * + * The update_special field is zero on return from KVM_GET_SREGS. + * + * When restoring a checkpoint, the caller can set update_special + * to 0x to ensure that everything is restored, even new features + * that the caller doesn't know about. + */ +#define