[PATCH -v3] Add savevm/loadvm support for MCE
MCE registers are saved/load into/from CPUState in kvm_arch_save/load_regs. To simulate the MCG_STATUS clearing upon reset, MSR_MCG_STATUS is set to 0 for KVM_PUT_RESET_STATE. v3: - use msrs[] in kvm_arch_load/save_regs and get_msr_entry directly. v2: - Rebased on new CPU registers save/load framework. Signed-off-by: Huang Ying ying.hu...@intel.com --- qemu-kvm-x86.c | 36 1 file changed, 36 insertions(+) --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -748,7 +748,22 @@ static int get_msr_entry(struct kvm_msr_ case MSR_KVM_WALL_CLOCK: env-wall_clock_msr = entry-data; break; +#ifdef KVM_CAP_MCE +case MSR_MCG_STATUS: +env-mcg_status = entry-data; +break; +case MSR_MCG_CTL: +env-mcg_ctl = entry-data; +break; +#endif default: +#ifdef KVM_CAP_MCE +if (entry-index = MSR_MC0_CTL \ +entry-index MSR_MC0_CTL + (env-mcg_cap 0xff) * 4) { +env-mce_banks[entry-index - MSR_MC0_CTL] = entry-data; +break; +} +#endif printf(Warning unknown msr index 0x%x\n, entry-index); return 1; } @@ -979,6 +994,18 @@ void kvm_arch_load_regs(CPUState *env, i set_msr_entry(msrs[n++], MSR_KVM_SYSTEM_TIME, env-system_time_msr); set_msr_entry(msrs[n++], MSR_KVM_WALL_CLOCK, env-wall_clock_msr); } +#ifdef KVM_CAP_MCE +if (env-mcg_cap) { +if (level == KVM_PUT_RESET_STATE) +set_msr_entry(msrs[n++], MSR_MCG_STATUS, env-mcg_status); +else if (level == KVM_PUT_FULL_STATE) { +set_msr_entry(msrs[n++], MSR_MCG_STATUS, env-mcg_status); +set_msr_entry(msrs[n++], MSR_MCG_CTL, env-mcg_ctl); +for (i = 0; i (env-mcg_cap 0xff); i++) +set_msr_entry(msrs[n++], MSR_MC0_CTL + i, env-mce_banks[i]); +} +} +#endif rc = kvm_set_msrs(env, msrs, n); if (rc == -1) @@ -1144,6 +1171,15 @@ void kvm_arch_save_regs(CPUState *env) msrs[n++].index = MSR_KVM_SYSTEM_TIME; msrs[n++].index = MSR_KVM_WALL_CLOCK; +#ifdef KVM_CAP_MCE +if (env-mcg_cap) { +msrs[n++].index = MSR_MCG_STATUS; +msrs[n++].index = MSR_MCG_CTL; +for (i = 0; i (env-mcg_cap 0xff) * 4; i++) +msrs[n++].index = MSR_MC0_CTL + i; +} +#endif + rc = kvm_get_msrs(env, msrs, n); if (rc == -1) { perror(kvm_get_msrs FAILED); -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM usability
On Tue, Mar 02, 2010 at 06:57:54PM -0800, Ross Boylan wrote: On Mon, 2010-03-01 at 15:59 -0600, Anthony Liguori wrote: * desktop is 1024 x 720 1024x768 and this is what the default is today anyway. That was not my experience, as reported in my post a few days ago (800x600 max resolution), nor is it the experience reported in the message that kicked off this thread. I have been able to get a higher resolution, but it was far from automatic. It depends on the guest OS version. QEMU exposes a cirrus logic card by defualt, and given the lack of vsync/hsync info, the Xorg driver will pick 800x600 as the default resolution in absence of any Xorg.conf About 6 months or so back, we got Xorg guys to add a code to the Xorg cirrus driver that looked for the QEMU PCI subsystem ID and if found, defaults to 1024x768 instead. Of course this is itself still far from optimal as a user experiance. We really want it to be fully configured to any resolution as easily as the user would do with a real graphics card monitor. Regards, Daniel -- |: Red Hat, Engineering, London-o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :| |: http://autobuild.org-o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :| -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM PMU virtualization
On Wed, 2010-03-03 at 11:32 +0800, Zhang, Yanmin wrote: On Tue, 2010-03-02 at 10:36 +0100, Ingo Molnar wrote: * Zhang, Yanmin yanmin_zh...@linux.intel.com wrote: On Fri, 2010-02-26 at 10:17 +0100, Ingo Molnar wrote: My suggestion, as always, would be to start very simple and very minimal: Enable 'perf kvm top' to show guest overhead. Use the exact same kernel image both as a host and as guest (for testing), to not have to deal with the symbol space transport problem initially. Enable 'perf kvm record' to only record guest events by default. Etc. This alone will be a quite useful result already - and gives a basis for further work. No need to spend months to do the big grand design straight away, all of this can be done gradually and in the order of usefulness - and you'll always have something that actually works (and helps your other KVM projects) along the way. It took me for a couple of hours to read the emails on the topic. Based on above idea, I worked out a prototype which is ugly, but does work with top/record when both guest side and host side use the same kernel image, while compiling most needed modules into kernel directly.. The commands are: perf kvm top perf kvm record perf kvm report They just collect guest kernel hot functions. Fantastic, and there's some really interesting KVM guest/host comparison profiles you've done with this prototype! With my patch, I collected dbench data on Nehalem machine (2*4*2 logical cpu). 1) Vanilla host kernel (6G memory): PerfTop: 15491 irqs/sec kernel:93.6% [1000Hz cycles], (all, 16 CPUs) samples pcnt functionDSO ___ _ ___ 99376.00 40.5% ext3_test_allocatable /lib/modules/2.6.33-kvmymz/build/vmlinux 41239.00 16.8% bitmap_search_next_usable_block /lib/modules/2.6.33-kvmymz/build/vmlinux 7019.00 2.9% __ticket_spin_lock /lib/modules/2.6.33-kvmymz/build/vmlinux 5350.00 2.2% copy_user_generic_string /lib/modules/2.6.33-kvmymz/build/vmlinux 5208.00 2.1% do_get_write_access /lib/modules/2.6.33-kvmymz/build/vmlinux 4484.00 1.8% journal_dirty_metadata /lib/modules/2.6.33-kvmymz/build/vmlinux 4078.00 1.7% ext3_free_blocks_sb /lib/modules/2.6.33-kvmymz/build/vmlinux 3856.00 1.6% ext3_new_blocks /lib/modules/2.6.33-kvmymz/build/vmlinux 3485.00 1.4% journal_get_undo_access /lib/modules/2.6.33-kvmymz/build/vmlinux 2803.00 1.1% ext3_try_to_allocate /lib/modules/2.6.33-kvmymz/build/vmlinux 2241.00 0.9% __find_get_block /lib/modules/2.6.33-kvmymz/build/vmlinux 1957.00 0.8% find_revoke_record /lib/modules/2.6.33-kvmymz/build/vmlinux 2) guest os: start one guest os with 4GB memory. PerfTop: 827 irqs/sec kernel: 0.0% [1000Hz cycles], (all, 16 CPUs) samples pcnt functionDSO ___ _ ___ 41701.00 28.1% __ticket_spin_lock /lib/modules/2.6.33-kvmymz/build/vmlinux 33843.00 22.8% ext3_test_allocatable /lib/modules/2.6.33-kvmymz/build/vmlinux 16862.00 11.4% bitmap_search_next_usable_block /lib/modules/2.6.33-kvmymz/build/vmlinux 3278.00 2.2% native_flush_tlb_others /lib/modules/2.6.33-kvmymz/build/vmlinux 3200.00 2.2% copy_user_generic_string /lib/modules/2.6.33-kvmymz/build/vmlinux 3009.00 2.0% do_get_write_access /lib/modules/2.6.33-kvmymz/build/vmlinux 2834.00 1.9% journal_dirty_metadata /lib/modules/2.6.33-kvmymz/build/vmlinux 1965.00 1.3% journal_get_undo_access /lib/modules/2.6.33-kvmymz/build/vmlinux 1907.00 1.3% ext3_new_blocks /lib/modules/2.6.33-kvmymz/build/vmlinux
Re: [RFC][ PATCH 0/3] vhost-net: Add mergeable RX buffer support to vhost-net
On Wed, Mar 03, 2010 at 12:54:25AM -0800, David Stevens wrote: Michael S. Tsirkin m...@redhat.com wrote on 03/02/2010 11:54:32 PM: On Tue, Mar 02, 2010 at 04:20:03PM -0800, David Stevens wrote: These patches add support for mergeable receive buffers to vhost-net, allowing it to use multiple virtio buffer heads for a single receive packet. +-DLS Signed-off-by: David L Stevens dlstev...@us.ibm.com Do you have performance numbers (both with and without mergeable buffers in guest)? Michael, Nothing formal. I did some TCP single-stream throughput tests and was seeing 20-25% improvement on a laptop (ie, low-end hardware). That actually surprised me; I'd think it'd be about the same, except maybe in a test that has mixed packet sizes. Comparisons with the net-next kernel these patches are for showed only ~10% improvement. But I also see a lot of variability both among different configurations and with the same configuration on different runs. So, I don't feel like those numbers are very solid, and I haven't yet done any tests on bigger hardware. Interesting. Since the feature in question is billed first of all a performance optimization, I think we might need some performance numbers as a motivation. Since the patches affect code paths when mergeable RX buffers are disabled as well, I guess the most important point would be to verify whether there's increase in latency and/or CPU utilization, or bandwidth cost when the feature bit is *disabled*. 2 notes: I have a modified version of qemu to get the VHOST_FEATURES flags, including the mergeable RX bufs flag, passed to the guest; I'll be working with your current qemu git trees next, if any changes are needed to support it there. This feature also seems to conflict with zero-copy rx patches from Xin Xiaohui (subject: Provide a zero-copy method on KVM virtio-net) these are not in a mergeable shape yet, so this is not a blocker, but I wonder what your thoughts on the subject are: how will we do feature negotiation if some backends don't support some features? Second, I've found a missing initialization in the patches I sent on the list, so I'll send an updated patch 2 with the fix, If you do, any chance you could use git send-email for this? and qemu patches when they are ready (plus any code-review comments incorporated). Pls take a look here as well http://www.openfabrics.org/~mst/boring.txt -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM PMU virtualization
On Wed, 2010-03-03 at 17:27 +0800, Zhang, Yanmin wrote: +static inline u64 perf_instruction_pointer(struct pt_regs *regs) +{ + u64 ip; + ip = percpu_read(perf_virt_ip.ip); + if (!ip) + ip = instruction_pointer(regs); + else + perf_reset_virt_ip(); + return ip; +} + +static inline unsigned int perf_misc_flags(struct pt_regs *regs) +{ + if (percpu_read(perf_virt_ip.ip)) { + return percpu_read(perf_virt_ip.user_mode) ? + PERF_RECORD_MISC_GUEST_USER : + PERF_RECORD_MISC_GUEST_KERNEL; + } else + return user_mode(regs) ? PERF_RECORD_MISC_USER : +PERF_RECORD_MISC_KERNEL; +} This codes in the assumption that perf_misc_flags() must only be called before perf_instruction_pointer(), which is currently true, but you might want to put a comment near to remind us of this. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM PMU virtualization
On Wed, 2010-03-03 at 17:27 +0800, Zhang, Yanmin wrote: -#ifndef perf_misc_flags -#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ -PERF_RECORD_MISC_KERNEL) -#define perf_instruction_pointer(regs) instruction_pointer(regs) -#endif Ah, that #ifndef is for powerpc, which I think you just broke. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2936094 ] BUG: kvm_dirty_pages_log_enable_slot: invalid parameters
Bugs item #2936094, was opened at 2010-01-21 11:07 Message generated for change (Comment added) made by kolobrod You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2936094group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Closed Resolution: None Priority: 5 Private: No Submitted By: Sergey _ (kolobrod) Assigned to: Nobody/Anonymous (nobody) Summary: BUG: kvm_dirty_pages_log_enable_slot: invalid parameters Initial Comment: Upgraded userspace to qemu-kvm-0.12.2. # uname -srvmpio Linux 2.6.31-gentoo-r6 #1 SMP Fri Dec 18 11:43:50 YEKT 2009 x86_64 Intel(R) Core(TM)2 CPU 6300 @ 1.86GHz GenuineIntel GNU/Linux # emerge qemu-kvm -pv ... [ebuild R ] app-emulation/qemu-kvm-0.12.2 USE=aio alsa curl gnutls ncurses sdl -bluetooth -esd -fdt -hardened -kvm-trace -pulseaudio -sasl -vde QEMU_SOFTMMU_TARGETS=x86_64 -arm -cris -i386 -m68k -microblaze -mips -mips64 -mips64el -mipsel -ppc -ppc64 -ppcemb -sh4 -sh4eb -sparc -sparc64 QEMU_USER_TARGETS=x86_64 -alpha -arm -armeb -cris -i386 -m68k -microblaze -mips -mipsel -ppc -ppc64 -ppc64abi32 -sh4 -sh4eb -sparc -sparc32plus -sparc64 0 kB ... I'm starting vm with kvm -cpu qemu64 -tdf -drive file=/home/sergey/virtual/winsrv2008_1.img,if=virtio,media=disk,boot=on -net nic,model=virtio -net tap,ifname=testtuntap,script=no,downscript=no -m 384 -usbdevice tablet -vga std -runas sergey Have got this on startup: # BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters BUG: kvm_dirty_pages_log_enable_slot: invalid parameters BUG: kvm_dirty_pages_log_disable_slot: invalid parameters It works yet, but messages looks bad. -- Comment By: Sergey _ (kolobrod) Date: 2010-03-03 16:17 Message: Updated to qemu-kvm-0.12.3 . No messages any more. Seems to be fixed.
[ kvm-Bugs-2962575 ] MINIX 3.1.6 works in QEMU-0.12.3 only with KVM disabled
Bugs item #2962575, was opened at 2010-03-03 13:20 Message generated for change (Tracker Item Submitted) made by erikvdk You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2962575group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: intel Group: None Status: Open Resolution: None Priority: 5 Private: No Submitted By: Erik van der Kouwe (erikvdk) Assigned to: Nobody/Anonymous (nobody) Summary: MINIX 3.1.6 works in QEMU-0.12.3 only with KVM disabled Initial Comment: Dear all, If one runs the following commands after installing qemu-0.12.3 or qemu-kvm-0.12.3: wget http://www.minix3.org/download/minix_R3.1.6-r6084.iso.bz2 bunzip2 minix_R3.1.6-r6084.iso.bz2 qemu-system-x86_64 -cdrom minix_R3.1.6-r6084.iso -enable-kvm and presses 1 (Regular MINIX 3), the following error message results when loading MINIX: kvm: unhandled exit 8021 kvm_run returned -22 The guest stops after that. This error message does not occur without the -enable-kvm switch. It does not occur with qemu-kvm-0.11.0 as bundled with Ubuntu. The problem occurs with the qemu binary from qemu-0.12.3 as well as qemu-system-x86_64 from qemu-kvm-0.12.3, but in the former case no error message is printed. The code that is running when it fails is in https://gforge.cs.vu.nl/gf/project/minix/scmsvn/?action=browsepath=%2Ftrunk%2Fsrc%2Fboot%2Fboothead.srevision=5918view=markup. It happens in ext_copy: ext_copy: mov x_dst_desc+2, ax movbx_dst_desc+4, dl ! Set base of destination segment mov ax, 8(bp) mov dx, 10(bp) mov x_src_desc+2, ax movbx_src_desc+4, dl ! Set base of source segment mov si, #x_gdt ! es:si = global descriptor table shr cx, #1 ! Words to move movbah, #0x87 ! Code for extended memory move int 0x15 The line that fails is int 0x15, which performs a BIOS call to copy data from low memory to above the 1MB barrier. The machine is running in 16-bit real mode when this code is executed. Output for uname -a on the host: Linux hp364 2.6.31-20-generic #57-Ubuntu SMP Mon Feb 8 09:05:19 UTC 2010 i686 GNU/Linux Output for cat /proc/cpuinfo on the host: processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 23 model name : Intel(R) Core(TM)2 Duo CPU E8600 @ 3.33GHz stepping: 10 cpu MHz : 1998.000 cache size : 6144 KB physical id : 0 siblings: 2 core id : 0 cpu cores : 2 apicid : 0 initial apicid : 0 fdiv_bug: no hlt_bug : no f00f_bug: no coma_bug: no fpu : yes fpu_exception : yes cpuid level : 13 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx lm constant_tsc arch_perfmon pebs bts pni dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm sse4_1 xsave lahf_lm tpr_shadow vnmi flexpriority bogomips: 6650.50 clflush size: 64 power management: processor : 1 vendor_id : GenuineIntel cpu family : 6 model : 23 model name : Intel(R) Core(TM)2 Duo CPU E8600 @ 3.33GHz stepping: 10 cpu MHz : 1998.000 cache size : 6144 KB physical id : 0 siblings: 2 core id : 1 cpu cores : 2 apicid : 1 initial apicid : 1 fdiv_bug: no hlt_bug : no f00f_bug: no coma_bug: no fpu : yes fpu_exception : yes cpuid level : 13 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx lm constant_tsc arch_perfmon pebs bts pni dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm sse4_1 xsave lahf_lm tpr_shadow vnmi flexpriority bogomips: 6649.80 clflush size: 64 power management: With kind regards, Erik -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2962575group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Another VNC crash, qemu-kvm-0.12.3
On 03/01/2010 12:14 PM, Chris Webb wrote: We've just seen another VNC related qemu-kvm crash, this time an arithmetic exception at vnc.c:1424 in the newly release qemu-kvm 0.12.3. [...] 1423 if (vs-absolute) { 1424 kbd_mouse_event(x * 0x7FFF / (ds_get_width(vs-ds) - 1), 1425 y * 0x7FFF / (ds_get_height(vs-ds) - 1), 1426 dz, buttons); 1427 } else if (vnc_has_feature(vs, VNC_FEATURE_POINTER_TYPE_CHANGE)) { 1428 x -= 0x7FFF; [...] and sure enough: (gdb) p vs-ds-surface-width $1 = 9 (gdb) p vs-ds-surface-height $2 = 1 What a 9x1 display surface is doing on this guest is a mystery to me, but you definitely can't divide by one less than its height! Can you reproduce this reliably? If so, what's the procedure? BTW, I'd suggest filing this at http://bugs.launchpad.net/qemu Regards, Anthony Liguori (gdb) p *vs $3 = {csock = 19, ds = 0x1c60fa0, dirty = {{4294967295, 4294967295, 4294967295, 4294967295, 4294967295}repeats 2048 times}, vd = 0x26a0110, need_update = 1, force_update = 0, features = 67, absolute = 1, last_x = -1, last_y = -1, vnc_encoding = 5, tight_quality = 9 '\t', tight_compression = 9 '\t', major = 3, minor = 8, challenge = ¹{\177\226\200kÕjéPñÄA¤o), output = {capacity = 925115, offset = 0, buffer = 0x28ba4b0 }, input = {capacity = 5120, offset = 6, buffer = 0x28b90a0 \005}, write_pixels = 0x4bb9e0vnc_write_pixels_generic, send_hextile_tile = 0x4bcdf0send_hextile_tile_generic_32, clientds = {flags = 0 '\0', width = 800, height = 600, linesize = 3200, data = 0x7fcd00ab6010 , pf = { bits_per_pixel = 32 ' ', bytes_per_pixel = 4 '\004', depth = 24 '\030', rmask = 0, gmask = 0, bmask = 0, amask = 0, rshift = 16 '\020', gshift = 8 '\b', bshift = 0 '\0', ashift = 24 '\030', rmax = 255 'ÿ', gmax = 255 'ÿ', bmax = 255 'ÿ', amax = 255 'ÿ', rbits = 8 '\b', gbits = 8 '\b', bbits = 8 '\b', abits = 8 '\b'}}, audio_cap = 0x0, as = {freq = 44100, nchannels = 2, fmt = AUD_FMT_S16, endianness = 0}, read_handler = 0x4beac0protocol_client_msg, read_handler_expect = 6, modifiers_state = '\0'repeats 255 times, zlib = {capacity = 0, offset = 0, buffer = 0x0}, zlib_tmp = {capacity = 0, offset = 0, buffer = 0x0}, zlib_stream = {{next_in = 0x0, avail_in = 0, total_in = 0, next_out = 0x0, avail_out = 0, total_out = 0, msg = 0x0, state = 0x0, zalloc = 0, zfree = 0, opaque = 0x0, data_type = 0, adler = 0, reserved = 0}, {next_in = 0x0, avail_in = 0, total_in = 0, next_out = 0x0, avail_out = 0, total_out = 0, msg = 0x0, state = 0x0, zalloc = 0, zfree = 0, opaque = 0x0, data_type = 0, adler = 0, reserved = 0}, {next_in = 0x0, avail_in = 0, total_in = 0, next_out = 0x0, avail_out = 0, total_out = 0, msg = 0x0, state = 0x0, zalloc = 0, zfree = 0, opaque = 0x0, data_type = 0, adler = 0, reserved = 0}, {next_in = 0x0, avail_in = 0, total_in = 0, next_out = 0x0, avail_out = 0, total_out = 0, msg = 0x0, state = 0x0, zalloc = 0, zfree = 0, opaque = 0x0, data_type = 0, adler = 0, reserved = 0}}, next = 0x0} (gdb) p *vs-ds $4 = {surface = 0x1c81f40, opaque = 0x26a0110, gui_timer = 0x0, allocator = 0x8199d0, listeners = 0x1c95fa0, mouse_set = 0, cursor_define = 0, next = 0x0} (gdb) p *vs-ds-surface $5 = {flags = 2 '\002', width = 9, height = 1, linesize = 36, data = 0x7fcd00ab6010 , pf = { bits_per_pixel = 32 ' ', bytes_per_pixel = 4 '\004', depth = 24 '\030', rmask = 16711680, gmask = 65280, bmask = 255, amask = 0, rshift = 16 '\020', gshift = 8 '\b', bshift = 0 '\0', ashift = 24 '\030', rmax = 255 'ÿ', gmax = 255 'ÿ', bmax = 255 'ÿ', amax = 255 'ÿ', rbits = 8 '\b', gbits = 8 '\b', bbits = 8 '\b', abits = 8 '\b'}} Cheers, Chris. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [KVM-AUTOTEST PATCH v4] KVM test: A memory efficient kvm_config implementation
On Tue, 2010-03-02 at 19:30 +0200, Michael Goldish wrote: This patch: - Makes kvm_config use less memory during parsing, by storing config data compactly in arrays during parsing, and generating the final dicts only when requested. On my machine this results in 5-10 times less memory being used (depending on the size of the final generated list). This allows the test configuration to keep expanding without having the parser run out of memory. - Adds config.fork_and_parse(), a function that parses a config file/string in a forked process and then terminates the process. This works around Python's policy of keeping allocated memory to itself even after the objects occupying the memory have been destroyed. If the process that does the parsing is the same one that runs the tests, less memory will be available to the VMs during testing. - Makes parsing 4-5 times faster as a result of the new internal representation. Overall, kvm_config's memory usage should now be negligible in most cases. Changes from v3: - Use the homemade 'configreader' class instead of regular files in parse() and parse_variants() (readline() and/or seek() are very slow). - Use a regex cache dict (regex_cache). - Use a string cache dict in addition to the list (object_cache_indices). - Some changes to fork_and_parse() (disable buffering). Changes from v2: - Merged _get_next_line() and _get_next_line_indent(). - Made _array_get_name() faster. Changes from v1: - Added config.get_generator() which is similar to get_list() but returns a dict generator instead of a list. This should save some more memory and will make tests start sooner. - Use get_generator() in control. - Call waitpid() at the end of fork_and_parse(). As the generated patch is kinda fragmented for posting comments inline, I am going to throw just a block of minor comments after I have reviewed the code: Observations: * When a file is missing, it's more appropriate to raise a IOError than an Exception, so we must change that. Also, it's important to follow the coding standards for raising exceptions. • I was wondering whether make fork_and_parse a public interface for the config object was the right decision, maybe all calls to parse_file should be done in a fork_and_parse fashion? I guess I got your point in making it a public interface and separate it from parse_file, but isn't that kinda confusing for the users (I mean, people writing control files for kvm autotest)? • About buffering on fork_and_parse: The performance penalties in disabling buffering varies, with caches dropped it was something like 3-5%, after 'warming up' it was something like 8-11%, so it's small stuff. But we can favour speed in this case so the final version won't disable buffering. Compliments: • The configreader class was a very interesting move, simple, clean and fast. Congrats! • The output of the config system is good for debugging purposes, so we'll stick with it. • Thank you very much for your work, now we have faster parsing, that consumes a lot less memory, so smaller boxes will benefit a *lot* from that. What I am going to do: • I will re-send the version with the tiny changes I made so it gets recorded on patchwork, and soon after I'll apply it upstream. I think from this point on we might have only minor tweaks to make. Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/control | 30 +- client/tests/kvm/control.parallel | 21 +- client/tests/kvm/kvm_config.py| 832 ++--- 3 files changed, 535 insertions(+), 348 deletions(-) diff --git a/client/tests/kvm/control b/client/tests/kvm/control index 163286e..15c4539 100644 --- a/client/tests/kvm/control +++ b/client/tests/kvm/control @@ -30,34 +30,38 @@ import kvm_utils, kvm_config # set English environment (command output might be localized, need to be safe) os.environ['LANG'] = 'en_US.UTF-8' -build_cfg_path = os.path.join(kvm_test_dir, build.cfg) -build_cfg = kvm_config.config(build_cfg_path) -# Make any desired changes to the build configuration here. For example: -#build_cfg.parse_string( +str = +# This string will be parsed after build.cfg. Make any desired changes to the +# build configuration here. For example: #release_tag = 84 -#) -if not kvm_utils.run_tests(build_cfg.get_list(), job): + +build_cfg = kvm_config.config() +build_cfg_path = os.path.join(kvm_test_dir, build.cfg) +build_cfg.fork_and_parse(build_cfg_path, str) +if not kvm_utils.run_tests(build_cfg.get_generator(), job): logging.error(KVM build step failed, exiting.) sys.exit(1) -tests_cfg_path = os.path.join(kvm_test_dir, tests.cfg) -tests_cfg = kvm_config.config(tests_cfg_path) -# Make any desired changes to the test configuration here. For example: -#tests_cfg.parse_string( +str = +# This string will be parsed after tests.cfg. Make any
[PATCH] KVM test: A memory efficient kvm_config implementation v5
This patch: - Makes kvm_config use less memory during parsing, by storing config data compactly in arrays during parsing, and generating the final dicts only when requested. On my machine this results in 5-10 times less memory being used (depending on the size of the final generated list). This allows the test configuration to keep expanding without having the parser run out of memory. - Adds config.fork_and_parse(), a function that parses a config file/string in a forked process and then terminates the process. This works around Python's policy of keeping allocated memory to itself even after the objects occupying the memory have been destroyed. If the process that does the parsing is the same one that runs the tests, less memory will be available to the VMs during testing. - Makes parsing 4-5 times faster as a result of the new internal representation. Overall, kvm_config's memory usage should now be negligible in most cases. Changes from v3: - Use the homemade 'configreader' class instead of regular files in parse() and parse_variants() (readline() and/or seek() are very slow). - Use a regex cache dict (regex_cache). - Use a string cache dict in addition to the list (object_cache_indices). Changes from v2: - Merged _get_next_line() and _get_next_line_indent(). - Made _array_get_name() faster. Changes from v1: - Added config.get_generator() which is similar to get_list() but returns a dict generator instead of a list. This should save some more memory and will make tests start sooner. - Use get_generator() in control. - Call waitpid() at the end of fork_and_parse(). Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/control | 33 +- client/tests/kvm/control.parallel | 21 +- client/tests/kvm/kvm_config.py| 837 ++--- 3 files changed, 541 insertions(+), 350 deletions(-) diff --git a/client/tests/kvm/control b/client/tests/kvm/control index 163286e..a69eacf 100644 --- a/client/tests/kvm/control +++ b/client/tests/kvm/control @@ -30,34 +30,41 @@ import kvm_utils, kvm_config # set English environment (command output might be localized, need to be safe) os.environ['LANG'] = 'en_US.UTF-8' -build_cfg_path = os.path.join(kvm_test_dir, build.cfg) -build_cfg = kvm_config.config(build_cfg_path) -# Make any desired changes to the build configuration here. For example: -#build_cfg.parse_string( +str = +# This string will be parsed after build.cfg. Make any desired changes to the +# build configuration here. For example: #release_tag = 84 -#) -if not kvm_utils.run_tests(build_cfg.get_list(), job): + +build_cfg = kvm_config.config() +# As the base test config is quite large, in order to save memory, we use the +# fork_and_parse() method, that creates another parser process and destroys it +# at the end of the parsing, so the memory spent can be given back to the OS. +build_cfg_path = os.path.join(kvm_test_dir, build.cfg) +build_cfg.fork_and_parse(build_cfg_path, str) +if not kvm_utils.run_tests(build_cfg.get_generator(), job): logging.error(KVM build step failed, exiting.) sys.exit(1) -tests_cfg_path = os.path.join(kvm_test_dir, tests.cfg) -tests_cfg = kvm_config.config(tests_cfg_path) -# Make any desired changes to the test configuration here. For example: -#tests_cfg.parse_string( +str = +# This string will be parsed after tests.cfg. Make any desired changes to the +# test configuration here. For example: #display = sdl #install|setup: timeout_multiplier = 3 -#) + +tests_cfg = kvm_config.config() +tests_cfg_path = os.path.join(kvm_test_dir, tests.cfg) +tests_cfg.fork_and_parse(tests_cfg_path, str) pools_cfg_path = os.path.join(kvm_test_dir, address_pools.cfg) tests_cfg.parse_file(pools_cfg_path) hostname = os.uname()[1].split(.)[0] -if tests_cfg.filter(^ + hostname): +if tests_cfg.count(^ + hostname): tests_cfg.parse_string(only ^%s % hostname) else: tests_cfg.parse_string(only ^default_host) # Run the tests -kvm_utils.run_tests(tests_cfg.get_list(), job) +kvm_utils.run_tests(tests_cfg.get_generator(), job) # Generate a nice HTML report inside the job's results dir kvm_utils.create_report(kvm_test_dir, job.resultdir) diff --git a/client/tests/kvm/control.parallel b/client/tests/kvm/control.parallel index 343f694..07bc6e5 100644 --- a/client/tests/kvm/control.parallel +++ b/client/tests/kvm/control.parallel @@ -160,19 +160,22 @@ if not params.get(mode) == noinstall: # -- import kvm_config -filename = os.path.join(pwd, kvm_tests.cfg) -cfg = kvm_config.config(filename) - -# If desirable, make changes to the test configuration here. For example: -# cfg.parse_string(install|setup: timeout_multiplier = 2) -# cfg.parse_string(only fc8_quick) -# cfg.parse_string(display = sdl) +str = +# This string will be parsed after tests.cfg. Make any desired changes to the +# test configuration here.
Re: KVM usability
On Wed, 2010-03-03 at 08:55 +, Daniel P. Berrange wrote: On Tue, Mar 02, 2010 at 06:57:54PM -0800, Ross Boylan wrote: On Mon, 2010-03-01 at 15:59 -0600, Anthony Liguori wrote: * desktop is 1024 x 720 1024x768 and this is what the default is today anyway. That was not my experience, as reported in my post a few days ago (800x600 max resolution), nor is it the experience reported in the message that kicked off this thread. I have been able to get a higher resolution, but it was far from automatic. It depends on the guest OS version. QEMU exposes a cirrus logic card by defualt, QEMU docs recommend -std vga for higher resolutions; I used that. and given the lack of vsync/hsync info, the Xorg driver will pick 800x600 as the default resolution in absence of any Xorg.conf About 6 months or so back, we got Xorg guys to add a code to the Xorg cirrus driver that looked for the QEMU PCI subsystem ID and if found, defaults to 1024x768 instead. So presumably that logic wouldn't have kicked in. I had xorg 7.5 on Debian squeeze as the guest. Of course this is itself still far from optimal as a user experiance. We really want it to be fully configured to any resolution as easily as the user would do with a real graphics card monitor. Is there some obstacle to getting the virtual monitor to provide configuration info when it's queried? That seems like the most direct solution. Ross -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] KVM: x86 emulator mark VMMCALL and LMSW as privileged
LMSW is present in both group tables. It was marked privileged only in one of them. Intel analog of VMMCALL is already marked privileged. Signed-off-by: Gleb Natapov g...@redhat.com diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 3d2e115..3af63d2 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -363,9 +363,9 @@ static u32 group_table[] = { static u32 group2_table[] = { [Group7*8] = - SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM, + SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv, SrcNone | ModRM | DstMem | Mov, 0, - SrcMem16 | ModRM | Mov, 0, + SrcMem16 | ModRM | Mov | Priv, 0, [Group9*8] = 0, 0, 0, 0, 0, 0, 0, 0, }; -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4 03/10] x86: Extend validity of cpu_is_bsp
On Mon, Mar 01, 2010 at 06:17:22PM +0100, Jan Kiszka wrote: As we hard-wire the BSP to CPU 0 anyway and cpuid_apic_id equals cpu_index, cpu_is_bsp can also be based on the latter directly. This will help an early user of it: KVM while initializing mp_state. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/pc.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/hw/pc.c b/hw/pc.c index b90a79e..58c32ea 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -767,7 +767,8 @@ static void pc_init_ne2k_isa(NICInfo *nd) int cpu_is_bsp(CPUState *env) { -return env-cpuid_apic_id == 0; +/* We hard-wire the BSP to the first CPU. */ +return env-cpu_index == 0; } We should not assume that. The function was written like that specifically so the code around it will not rely on this assumption. Now you change that specifically to write code that will do incorrect assumptions. I don't see the logic here. CPUState *pc_new_cpu(const char *cpu_model) -- 1.6.0.2 -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [KVM-AUTOTEST PATCH v4] KVM test: A memory efficient kvm_config implementation
- Lucas Meneghel Rodrigues l...@redhat.com wrote: On Tue, 2010-03-02 at 19:30 +0200, Michael Goldish wrote: This patch: - Makes kvm_config use less memory during parsing, by storing config data compactly in arrays during parsing, and generating the final dicts only when requested. On my machine this results in 5-10 times less memory being used (depending on the size of the final generated list). This allows the test configuration to keep expanding without having the parser run out of memory. - Adds config.fork_and_parse(), a function that parses a config file/string in a forked process and then terminates the process. This works around Python's policy of keeping allocated memory to itself even after the objects occupying the memory have been destroyed. If the process that does the parsing is the same one that runs the tests, less memory will be available to the VMs during testing. - Makes parsing 4-5 times faster as a result of the new internal representation. Overall, kvm_config's memory usage should now be negligible in most cases. Changes from v3: - Use the homemade 'configreader' class instead of regular files in parse() and parse_variants() (readline() and/or seek() are very slow). - Use a regex cache dict (regex_cache). - Use a string cache dict in addition to the list (object_cache_indices). - Some changes to fork_and_parse() (disable buffering). Changes from v2: - Merged _get_next_line() and _get_next_line_indent(). - Made _array_get_name() faster. Changes from v1: - Added config.get_generator() which is similar to get_list() but returns a dict generator instead of a list. This should save some more memory and will make tests start sooner. - Use get_generator() in control. - Call waitpid() at the end of fork_and_parse(). As the generated patch is kinda fragmented for posting comments inline, I am going to throw just a block of minor comments after I have reviewed the code: Observations: * When a file is missing, it's more appropriate to raise a IOError than an Exception, so we must change that. Also, it's important to follow the coding standards for raising exceptions. I agree. That Exception is nothing new but it's been overlooked so far. • I was wondering whether make fork_and_parse a public interface for the config object was the right decision, maybe all calls to parse_file should be done in a fork_and_parse fashion? I guess I got your point in making it a public interface and separate it from parse_file, but isn't that kinda confusing for the users (I mean, people writing control files for kvm autotest)? If the test set is selected in a string that gets parsed after the config file, then you have to parse both the file and the string in the same forked process. If you do the forking in parse_file() and parse_string() separately, and call them one after the other, the data generated by parse_file() is transferred to the parent process and consumes a lot of memory. If you parse_file() and parse_string() in the same forked process, only a small amount of data that remains after parse_string() gets transferred back to the parent process. So in any case, we need a function that forks and parses a file and a string together, which is what fork_and_parse() does. We could make parse_file() and parse_string() fork too, but that would be redundant because fork_and_parse() can be called with any of the parameters omitted. • About buffering on fork_and_parse: The performance penalties in disabling buffering varies, with caches dropped it was something like 3-5%, after 'warming up' it was something like 8-11%, so it's small stuff. But we can favour speed in this case so the final version won't disable buffering. Compliments: • The configreader class was a very interesting move, simple, clean and fast. Congrats! • The output of the config system is good for debugging purposes, so we'll stick with it. • Thank you very much for your work, now we have faster parsing, that consumes a lot less memory, so smaller boxes will benefit a *lot* from that. What I am going to do: • I will re-send the version with the tiny changes I made so it gets recorded on patchwork, and soon after I'll apply it upstream. I think from this point on we might have only minor tweaks to make. Signed-off-by: Michael Goldish mgold...@redhat.com --- client/tests/kvm/control | 30 +- client/tests/kvm/control.parallel | 21 +- client/tests/kvm/kvm_config.py| 832 ++--- 3 files changed, 535 insertions(+), 348 deletions(-) diff --git a/client/tests/kvm/control b/client/tests/kvm/control index 163286e..15c4539 100644 --- a/client/tests/kvm/control +++ b/client/tests/kvm/control @@ -30,34 +30,38 @@ import kvm_utils, kvm_config # set English environment (command output might
Re: [RFC][ PATCH 0/3] vhost-net: Add mergeable RX buffer support to vhost-net
Interesting. Since the feature in question is billed first of all a performance optimization... By whom? Although I see some improved performance, I think its real benefit is improving memory utilization on the guest. Instead of using 75K for an ARP packet, mergeable RX buffers only uses 4K. :-) Since the patches affect code paths when mergeable RX buffers are disabled as well, I guess the most important point would be to verify whether there's increase in latency and/or CPU utilization, or bandwidth cost when the feature bit is *disabled*. Actually, when the feature bit is disabled, it'll only get a single head, doesn't use the special vnet_hdr, and the codepath reduces to the essentially to the original. But the answer is no; I saw no regressions when using it without the feature bit. The only substantive difference in that case is that the new code avoids copying the vnet header as the original does, so it should actually be faster, but I don't think that's measurable above the variability I already see. 2 notes: I have a modified version of qemu to get the VHOST_FEATURES flags, including the mergeable RX bufs flag, passed to the guest; I'll be working with your current qemu git trees next, if any changes are needed to support it there. This feature also seems to conflict with zero-copy rx patches from Xin Xiaohui (subject: Provide a zero-copy method on KVM virtio-net) these are not in a mergeable shape yet, so this is not a blocker, but I wonder what your thoughts on the subject are: how will we do feature negotiation if some backends don't support some features? The qemu code I have basically sends the set features and get features all the way to vhost (ie, it's the guest negotiating with vhost), except, of course, for the magic qemu-only bits. I think that's the right model. I'll definitely take a look at the patch you mention and maybe comment further. +-DLS -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 03/18] KVM: MMU: Make set_cr3 a function pointer in kvm_mmu
This is necessary to implement Nested Nested Paging. As a side effect this allows some cleanups in the SVM nested paging code. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/include/asm/kvm_host.h |1 + arch/x86/kvm/mmu.c |4 +++- 2 files changed, 4 insertions(+), 1 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e7bef19..887a1f7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -241,6 +241,7 @@ struct kvm_pio_request { */ struct kvm_mmu { void (*new_cr3)(struct kvm_vcpu *vcpu); + void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5c66c99..ec3da11 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2398,6 +2398,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context-shadow_root_level = kvm_x86_ops-get_tdp_level(); context-root_hpa = INVALID_PAGE; vcpu-arch.mmu.tdp_enabled = true; + vcpu-arch.mmu.set_cr3 = kvm_x86_ops-set_cr3; if (!is_paging(vcpu)) { context-gva_to_gpa = nonpaging_gva_to_gpa; @@ -2437,6 +2438,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) vcpu-arch.mmu.base_role.glevels = vcpu-arch.mmu.root_level; vcpu-arch.mmu.tdp_enabled = false; + vcpu-arch.mmu.set_cr3 = kvm_x86_ops-set_cr3; return r; } @@ -2482,7 +2484,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) if (r) goto out; /* set_cr3() should ensure TLB has been flushed */ - kvm_x86_ops-set_cr3(vcpu, vcpu-arch.mmu.root_hpa); + vcpu-arch.mmu.set_cr3(vcpu, vcpu-arch.mmu.root_hpa); out: return r; } -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/18][RFC] Nested Paging support for Nested SVM (aka NPT-Virtualization)
Hi, here are the patches that implement nested paging support for nested svm. They are somewhat intrusive to the soft-mmu so I post them as RFC in the first round to get feedback about the general direction of the changes. Nevertheless I am proud to report that with these patches the famous kernel-compile benchmark runs only 4% slower in the l2 guest as in the l1 guest when l2 is single-processor. With SMP guests the situation is very different. The more vcpus the guest has the more is the performance drop from l1 to l2. Anyway, this post is to get feedback about the overall concept of these patches. Please review and give feedback :-) Thanks, Joerg Diffstat: arch/x86/include/asm/kvm_host.h | 21 ++ arch/x86/kvm/mmu.c | 152 ++- arch/x86/kvm/mmu.h |2 + arch/x86/kvm/paging_tmpl.h | 81 ++--- arch/x86/kvm/svm.c | 126 +++- arch/x86/kvm/vmx.c |9 +++ arch/x86/kvm/x86.c | 19 +- include/linux/kvm.h |1 + include/linux/kvm_host.h|5 ++ 9 files changed, 354 insertions(+), 62 deletions(-) Shortlog: Joerg Roedel (18): KVM: MMU: Check for root_level instead of long mode KVM: MMU: Make tdp_enabled a mmu-context parameter KVM: MMU: Make set_cr3 a function pointer in kvm_mmu KVM: X86: Introduce a tdp_set_cr3 function KVM: MMU: Introduce get_cr3 function pointer KVM: MMU: Introduce inject_page_fault function pointer KVM: SVM: Implement MMU helper functions for Nested Nested Paging KVM: MMU: Change init_kvm_softmmu to take a context as parameter KVM: MMU: Let is_rsvd_bits_set take mmu context instead of vcpu KVM: MMU: Introduce generic walk_addr function KVM: MMU: Add infrastructure for two-level page walker KVM: MMU: Implement nested gva_to_gpa functions KVM: MMU: Introduce Nested MMU context KVM: SVM: Initialize Nested Nested MMU context on VMRUN KVM: MMU: Propagate the right fault back to the guest after gva_to_gpa KVM: X86: Add callback to let modules decide over some supported cpuid bits KVM: SVM: Report Nested Paging support to userspace KVM: X86: Add KVM_CAP_SVM_CPUID_FIXED -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 17/18] KVM: SVM: Report Nested Paging support to userspace
This patch implements the reporting of the nested paging feature support to userspace. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/kvm/svm.c | 10 ++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fe1398e..ce71023 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3289,6 +3289,16 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) { + switch (func) { + case 0x800A: + if (!npt_enabled) + break; + + /* NPT feature is supported by Nested SVM */ + entry-edx = SVM_FEATURE_NPT; + + break; + } } static const struct trace_print_flags svm_exit_reasons_str[] = { -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 13/18] KVM: MMU: Introduce Nested MMU context
This patch introduces a second MMU context which will hold the paging information for the l2 guest. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/include/asm/kvm_host.h |8 ++ arch/x86/kvm/mmu.c | 48 ++- 2 files changed, 55 insertions(+), 1 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 20dd1ce..66a698e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -264,6 +264,13 @@ struct kvm_mmu { u64 *pae_root; u64 rsvd_bits_mask[2][4]; + + /* +* If true the mmu runs in two-level mode. +* vcpu-arch.nested_mmu needs to contain meaningful +* values then. +*/ + bool nested; }; struct kvm_vcpu_arch { @@ -296,6 +303,7 @@ struct kvm_vcpu_arch { struct kvm_mmu mmu; + /* This will hold the mmu context of the second level guest */ struct kvm_mmu nested_mmu; /* only needed in kvm_pv_mmu_op() path, but it's hot so diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c831955..ccaf6b1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2154,6 +2154,18 @@ static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *error) return gpa; } +static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *error) +{ + u32 access; + + BUG_ON(!vcpu-arch.mmu.nested); + + /* NPT walks are treated as user writes */ + access = PFERR_WRITE_MASK | PFERR_USER_MASK; + + return vcpu-arch.nested_mmu.gva_to_gpa(vcpu, gpa, access, error); +} + static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, u32 *error) { @@ -2476,11 +2488,45 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) return r; } +static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) +{ + struct kvm_mmu *g_context = vcpu-arch.nested_mmu; + struct kvm_mmu *h_context = vcpu-arch.mmu; + + g_context-get_cr3 = get_cr3; + g_context-translate_gpa = translate_nested_gpa; + g_context-inject_page_fault = kvm_inject_page_fault; + + /* +* Note that arch.mmu.gva_to_gpa translates l2_gva to l1_gpa. The +* translation of l2_gpa to l1_gpa addresses is done using the +* arch.nested_mmu.gva_to_gpa function. Basically the gva_to_gpa +* functions between mmu and nested_mmu are swapped. +*/ + if (!is_paging(vcpu)) { + g_context-root_level = 0; + h_context-gva_to_gpa = nonpaging_gva_to_gpa_nested; + } else if (is_long_mode(vcpu)) { + g_context-root_level = PT64_ROOT_LEVEL; + h_context-gva_to_gpa = paging64_gva_to_gpa_nested; + } else if (is_pae(vcpu)) { + g_context-root_level = PT32E_ROOT_LEVEL; + h_context-gva_to_gpa = paging64_gva_to_gpa_nested; + } else { + g_context-root_level = PT32_ROOT_LEVEL; + h_context-gva_to_gpa = paging32_gva_to_gpa_nested; + } + + return 0; +} + static int init_kvm_mmu(struct kvm_vcpu *vcpu) { vcpu-arch.update_pte.pfn = bad_pfn; - if (tdp_enabled) + if (vcpu-arch.mmu.nested) + return init_kvm_nested_mmu(vcpu); + else if (tdp_enabled) return init_kvm_tdp_mmu(vcpu); else return init_kvm_softmmu(vcpu); -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 12/18] KVM: MMU: Implement nested gva_to_gpa functions
This patch adds the functions to do a nested l2_gva to l1_gpa page table walk. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/include/asm/kvm_host.h |3 +++ arch/x86/kvm/mmu.c |8 arch/x86/kvm/paging_tmpl.h | 31 +++ 3 files changed, 42 insertions(+), 0 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 76c8b5f..20dd1ce 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -295,6 +295,9 @@ struct kvm_vcpu_arch { bool tpr_access_reporting; struct kvm_mmu mmu; + + struct kvm_mmu nested_mmu; + /* only needed in kvm_pv_mmu_op() path, but it's hot so * put it here to avoid allocation */ struct kvm_pv_mmu_op_buffer mmu_op_buffer; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ec3830c..c831955 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2162,6 +2162,14 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, return vaddr; } +static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, +u32 access, u32 *error) +{ + if (error) + *error = 0; + return vcpu-arch.nested_mmu.translate_gpa(vcpu, vaddr, error); +} + static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code) { diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a72d5ea..c0158d8 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -282,6 +282,16 @@ static int FNAME(walk_addr)(struct guest_walker *walker, write_fault, user_fault, fetch_fault); } +static int FNAME(walk_addr_nested)(struct guest_walker *walker, + struct kvm_vcpu *vcpu, gva_t addr, + int write_fault, int user_fault, + int fetch_fault) +{ + return FNAME(walk_addr_generic)(walker, vcpu, vcpu-arch.nested_mmu, + addr, write_fault, user_fault, + fetch_fault); +} + static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, u64 *spte, const void *pte) { @@ -541,6 +551,27 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, return gpa; } +static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, + u32 access, u32 *error) +{ + struct guest_walker walker; + gpa_t gpa = UNMAPPED_GVA; + int r; + + r = FNAME(walk_addr_nested)(walker, vcpu, vaddr, + !!(access PFERR_WRITE_MASK), + !!(access PFERR_USER_MASK), + !!(access PFERR_FETCH_MASK)); + + if (r) { + gpa = gfn_to_gpa(walker.gfn); + gpa |= vaddr ~PAGE_MASK; + } else if (error) + *error = walker.error_code; + + return gpa; +} + static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 06/18] KVM: MMU: Introduce inject_page_fault function pointer
This patch introduces an inject_page_fault function pointer into struct kvm_mmu which will be used to inject a page fault. This will be used later when Nested Nested Paging is implemented. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/include/asm/kvm_host.h |3 +++ arch/x86/kvm/mmu.c |3 ++- 2 files changed, 5 insertions(+), 1 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 37d0145..c0b5576 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -244,6 +244,9 @@ struct kvm_mmu { void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); + void (*inject_page_fault)(struct kvm_vcpu *vcpu, + unsigned long addr, + u32 error_code); void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, u32 *error); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 189c68d..8f835f1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2263,7 +2263,7 @@ static void inject_page_fault(struct kvm_vcpu *vcpu, u64 addr, u32 err_code) { - kvm_inject_page_fault(vcpu, addr, err_code); + vcpu-arch.mmu.inject_page_fault(vcpu, addr, err_code); } static void paging_free(struct kvm_vcpu *vcpu) @@ -2446,6 +2446,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) vcpu-arch.mmu.tdp_enabled = false; vcpu-arch.mmu.set_cr3 = kvm_x86_ops-set_cr3; vcpu-arch.mmu.get_cr3 = get_cr3; + vcpu-arch.mmu.inject_page_fault = kvm_inject_page_fault; return r; } -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 14/18] KVM: SVM: Initialize Nested Nested MMU context on VMRUN
This patch adds code to initialize the Nested Nested Paging MMU context when the L1 guest executes a VMRUN instruction and has nested paging enabled in its VMCB. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/kvm/mmu.c |1 + arch/x86/kvm/svm.c | 56 ++- 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ccaf6b1..b929d84 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2573,6 +2573,7 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu) { mmu_free_roots(vcpu); } +EXPORT_SYMBOL_GPL(kvm_mmu_unload); static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a6c08e0..bce10fe 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -93,7 +93,6 @@ struct nested_state { /* Nested Paging related state */ u64 nested_cr3; - }; #define MSRPM_OFFSETS 16 @@ -282,6 +281,15 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu) force_new_asid(vcpu); } +static int get_npt_level(void) +{ +#ifdef CONFIG_X86_64 + return PT64_ROOT_LEVEL; +#else + return PT32E_ROOT_LEVEL; +#endif +} + static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) { if (!npt_enabled !(efer EFER_LMA)) @@ -1578,6 +1586,27 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, nested_svm_vmexit(svm); } +static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) +{ + int r; + + r = kvm_init_shadow_mmu(vcpu, vcpu-arch.mmu); + + vcpu-arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; + vcpu-arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; + vcpu-arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; + vcpu-arch.mmu.shadow_root_level = get_npt_level(); + vcpu-arch.nested_mmu.gva_to_gpa = vcpu-arch.mmu.gva_to_gpa; + vcpu-arch.mmu.nested= true; + + return r; +} + +static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) +{ + vcpu-arch.mmu.nested = false; +} + static int nested_svm_check_permissions(struct vcpu_svm *svm) { if (!(svm-vcpu.arch.efer EFER_SVME) @@ -1942,6 +1971,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) kvm_clear_exception_queue(svm-vcpu); kvm_clear_interrupt_queue(svm-vcpu); + svm-nested.nested_cr3 = 0; + /* Restore selected save entries */ svm-vmcb-save.es = hsave-save.es; svm-vmcb-save.cs = hsave-save.cs; @@ -1968,6 +1999,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) nested_svm_unmap(page); + nested_svm_uninit_mmu_context(svm-vcpu); kvm_mmu_reset_context(svm-vcpu); kvm_mmu_load(svm-vcpu); @@ -2021,6 +2053,13 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) if (!nested_vmcb) return false; + /* Do check if nested paging is allowed for the guest */ + if (nested_vmcb-control.nested_ctl !npt_enabled) { + nested_vmcb-control.exit_code = SVM_EXIT_ERR; + nested_svm_unmap(page); + return false; + } + trace_kvm_nested_vmrun(svm-vmcb-save.rip - 3, vmcb_gpa, nested_vmcb-save.rip, nested_vmcb-control.int_ctl, @@ -2065,6 +2104,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) else svm-vcpu.arch.hflags = ~HF_HIF_MASK; + if (nested_vmcb-control.nested_ctl) { + kvm_mmu_unload(svm-vcpu); + svm-nested.nested_cr3 = nested_vmcb-control.nested_cr3; + nested_svm_init_mmu_context(svm-vcpu); + } + /* Load the nested guest state */ svm-vmcb-save.es = nested_vmcb-save.es; svm-vmcb-save.cs = nested_vmcb-save.cs; @@ -3233,15 +3278,6 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } -static int get_npt_level(void) -{ -#ifdef CONFIG_X86_64 - return PT64_ROOT_LEVEL; -#else - return PT32E_ROOT_LEVEL; -#endif -} - static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { return 0; -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 11/18] KVM: MMU: Add infrastructure for two-level page walker
This patch introduces a mmu-callback to translate gpa addresses in the walk_addr code. This is later used to translate l2_gpa addresses into l1_gpa addresses. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/include/asm/kvm_host.h |1 + arch/x86/kvm/mmu.c |7 +++ arch/x86/kvm/paging_tmpl.h | 19 +++ include/linux/kvm_host.h|5 + 4 files changed, 32 insertions(+), 0 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c0b5576..76c8b5f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -250,6 +250,7 @@ struct kvm_mmu { void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, u32 *error); + gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *error); void (*prefetch_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page); int (*sync_page)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 647353d..ec3830c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2149,6 +2149,11 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) spin_unlock(vcpu-kvm-mmu_lock); } +static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *error) +{ + return gpa; +} + static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, u32 *error) { @@ -2399,6 +2404,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context-new_cr3 = nonpaging_new_cr3; context-page_fault = tdp_page_fault; context-free = nonpaging_free; + context-translate_gpa = translate_gpa; context-prefetch_page = nonpaging_prefetch_page; context-sync_page = nonpaging_sync_page; context-invlpg = nonpaging_invlpg; @@ -2443,6 +2449,7 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) else r = paging32_init_context(vcpu, context); + vcpu-arch.mmu.translate_gpa = translate_gpa; vcpu-arch.mmu.base_role.glevels = vcpu-arch.mmu.root_level; vcpu-arch.mmu.tdp_enabled = false; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6c55a31..a72d5ea 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -122,6 +122,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, unsigned index, pt_access, pte_access; gpa_t pte_gpa; int rsvd_fault = 0; + u32 error; trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, fetch_fault); @@ -150,6 +151,15 @@ walk: table_gfn = gpte_to_gfn(pte); pte_gpa = gfn_to_gpa(table_gfn); pte_gpa += index * sizeof(pt_element_t); + + pte_gpa = mmu-translate_gpa(vcpu, pte_gpa, error); + if (pte_gpa == UNMAPPED_GVA) { + walker-error_code = error; + return 0; + } + /* pte_gpa might have changed - recalculate table_gfn */ + table_gfn = gpa_to_gfn(pte_gpa); + walker-table_gfn[walker-level - 1] = table_gfn; walker-pte_gpa[walker-level - 1] = pte_gpa; @@ -209,6 +219,15 @@ walk: is_cpuid_PSE36()) walker-gfn += pse36_gfn_delta(pte); + /* Do the final translation */ + pte_gpa = gfn_to_gpa(walker-gfn); + pte_gpa = mmu-translate_gpa(vcpu, pte_gpa, error); + if (pte_gpa == UNMAPPED_GVA) { + walker-error_code = error; + return 0; + } + walker-gfn = gpa_to_gfn(pte_gpa); + break; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a3fd0f9..ef2e81a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -503,6 +503,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn) return (gpa_t)gfn PAGE_SHIFT; } +static inline gfn_t gpa_to_gfn(gpa_t gpa) +{ + return (gfn_t)gpa PAGE_SHIFT; +} + static inline hpa_t pfn_to_hpa(pfn_t pfn) { return (hpa_t)pfn PAGE_SHIFT; -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 10/18] KVM: MMU: Introduce generic walk_addr function
This is the first patch in the series towards a generic walk_addr implementation which could walk two-dimensional page tables in the end. In this first step the walk_addr function is renamed into walk_addr_generic which takes an mmu context as an additional parameter. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/kvm/paging_tmpl.h | 30 -- 1 files changed, 20 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 8608439..6c55a31 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -112,9 +112,10 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) /* * Fetch a guest pte for a guest virtual address */ -static int FNAME(walk_addr)(struct guest_walker *walker, - struct kvm_vcpu *vcpu, gva_t addr, - int write_fault, int user_fault, int fetch_fault) +static int FNAME(walk_addr_generic)(struct guest_walker *walker, + struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + gva_t addr, int write_fault, + int user_fault, int fetch_fault) { pt_element_t pte; gfn_t table_gfn; @@ -125,10 +126,12 @@ static int FNAME(walk_addr)(struct guest_walker *walker, trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, fetch_fault); walk: - walker-level = vcpu-arch.mmu.root_level; - pte = vcpu-arch.mmu.get_cr3(vcpu); + + walker-level = mmu-root_level; + pte = mmu-get_cr3(vcpu); + #if PTTYPE == 64 - if (vcpu-arch.mmu.root_level == PT32E_ROOT_LEVEL) { + if (walker-level == PT32E_ROOT_LEVEL) { pte = kvm_pdptr_read(vcpu, (addr 30) 3); trace_kvm_mmu_paging_element(pte, walker-level); if (!is_present_gpte(pte)) @@ -137,7 +140,7 @@ walk: } #endif ASSERT((!is_long_mode(vcpu) is_pae(vcpu)) || - (vcpu-arch.mmu.get_cr3(vcpu) CR3_NONPAE_RESERVED_BITS) == 0); + (mmu-get_cr3(vcpu) CR3_NONPAE_RESERVED_BITS) == 0); pt_access = ACC_ALL; @@ -158,8 +161,7 @@ walk: if (!is_present_gpte(pte)) goto not_present; - rsvd_fault = is_rsvd_bits_set(vcpu-arch.mmu, pte, - walker-level); + rsvd_fault = is_rsvd_bits_set(mmu, pte, walker-level); if (rsvd_fault) goto access_error; @@ -195,7 +197,7 @@ walk: (PTTYPE == 64 || is_pse(vcpu))) || ((walker-level == PT_PDPE_LEVEL) (pte PT_PAGE_SIZE_MASK) - vcpu-arch.mmu.root_level == PT64_ROOT_LEVEL)) { + mmu-root_level == PT64_ROOT_LEVEL)) { int lvl = walker-level; walker-gfn = gpte_to_gfn_lvl(pte, lvl); @@ -253,6 +255,14 @@ err: return 0; } +static int FNAME(walk_addr)(struct guest_walker *walker, + struct kvm_vcpu *vcpu, gva_t addr, + int write_fault, int user_fault, int fetch_fault) +{ + return FNAME(walk_addr_generic)(walker, vcpu, vcpu-arch.mmu, addr, + write_fault, user_fault, fetch_fault); +} + static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, u64 *spte, const void *pte) { -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 15/18] KVM: MMU: Propagate the right fault back to the guest after gva_to_gpa
This patch implements logic to make sure that either a page-fault/page-fault-vmexit or a nested-page-fault-vmexit is propagated back to the guest. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/kvm/mmu.h |1 + arch/x86/kvm/paging_tmpl.h |2 ++ arch/x86/kvm/x86.c | 15 ++- 3 files changed, 17 insertions(+), 1 deletions(-) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 64f619b..b42b27e 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -47,6 +47,7 @@ #define PFERR_USER_MASK (1U 2) #define PFERR_RSVD_MASK (1U 3) #define PFERR_FETCH_MASK (1U 4) +#define PFERR_NESTED_MASK (1U 31) int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index c0158d8..9fc5fb1 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -154,6 +154,7 @@ walk: pte_gpa = mmu-translate_gpa(vcpu, pte_gpa, error); if (pte_gpa == UNMAPPED_GVA) { + error |= PFERR_NESTED_MASK; walker-error_code = error; return 0; } @@ -223,6 +224,7 @@ walk: pte_gpa = gfn_to_gpa(walker-gfn); pte_gpa = mmu-translate_gpa(vcpu, pte_gpa, error); if (pte_gpa == UNMAPPED_GVA) { + error |= PFERR_NESTED_MASK; walker-error_code = error; return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2883ce8..9f8b02d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -314,6 +314,19 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); } +void kvm_propagate_fault(struct kvm_vcpu *vcpu, unsigned long addr, u32 error_code) +{ + u32 nested, error; + + nested = error_code PFERR_NESTED_MASK; + error = error_code ~PFERR_NESTED_MASK; + + if (vcpu-arch.mmu.nested !(error_code PFERR_NESTED_MASK)) + vcpu-arch.nested_mmu.inject_page_fault(vcpu, addr, error); + else + vcpu-arch.mmu.inject_page_fault(vcpu, addr, error); +} + void kvm_inject_nmi(struct kvm_vcpu *vcpu) { vcpu-arch.nmi_pending = 1; @@ -3546,7 +3559,7 @@ static int pio_copy_data(struct kvm_vcpu *vcpu) ret = kvm_read_guest_virt(q, p, bytes, vcpu, error_code); if (ret == X86EMUL_PROPAGATE_FAULT) - kvm_inject_page_fault(vcpu, q, error_code); + kvm_propagate_fault(vcpu, q, error_code); return ret; } -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 07/18] KVM: SVM: Implement MMU helper functions for Nested Nested Paging
This patch adds the helper functions which will be used in the mmu context for handling nested nested page faults. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/kvm/svm.c | 33 + 1 files changed, 33 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a7a76f5..a6c08e0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -91,6 +91,9 @@ struct nested_state { u32 intercept_exceptions; u64 intercept; + /* Nested Paging related state */ + u64 nested_cr3; + }; #define MSRPM_OFFSETS 16 @@ -1545,6 +1548,36 @@ static int vmmcall_interception(struct vcpu_svm *svm) return 1; } +static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + return svm-nested.nested_cr3; +} + +static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, + unsigned long root) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + svm-vmcb-control.nested_cr3 = root; + force_new_asid(vcpu); +} + +static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, + unsigned long addr, + u32 error_code) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + svm-vmcb-control.exit_code = SVM_EXIT_NPF; + svm-vmcb-control.exit_code_hi = 0; + svm-vmcb-control.exit_info_1 = error_code; + svm-vmcb-control.exit_info_2 = addr; + + nested_svm_vmexit(svm); +} + static int nested_svm_check_permissions(struct vcpu_svm *svm) { if (!(svm-vcpu.arch.efer EFER_SVME) -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 05/18] KVM: MMU: Introduce get_cr3 function pointer
This function pointer in the MMU context is required to implement Nested Nested Paging. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/include/asm/kvm_host.h |1 + arch/x86/kvm/mmu.c |9 - arch/x86/kvm/paging_tmpl.h |4 ++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1bf8501..37d0145 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -242,6 +242,7 @@ struct kvm_pio_request { struct kvm_mmu { void (*new_cr3)(struct kvm_vcpu *vcpu); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); + unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 84e3209..189c68d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2071,7 +2071,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) int direct = 0; u64 pdptr; - root_gfn = vcpu-arch.cr3 PAGE_SHIFT; + root_gfn = vcpu-arch.mmu.get_cr3(vcpu) PAGE_SHIFT; if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { hpa_t root = vcpu-arch.mmu.root_hpa; @@ -2254,6 +2254,11 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu) mmu_free_roots(vcpu); } +static unsigned long get_cr3(struct kvm_vcpu *vcpu) +{ + return vcpu-arch.cr3; +} + static void inject_page_fault(struct kvm_vcpu *vcpu, u64 addr, u32 err_code) @@ -2399,6 +2404,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context-root_hpa = INVALID_PAGE; vcpu-arch.mmu.tdp_enabled = true; vcpu-arch.mmu.set_cr3 = kvm_x86_ops-set_tdp_cr3; + vcpu-arch.mmu.get_cr3 = get_cr3; if (!is_paging(vcpu)) { context-gva_to_gpa = nonpaging_gva_to_gpa; @@ -2439,6 +2445,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) vcpu-arch.mmu.base_role.glevels = vcpu-arch.mmu.root_level; vcpu-arch.mmu.tdp_enabled = false; vcpu-arch.mmu.set_cr3 = kvm_x86_ops-set_cr3; + vcpu-arch.mmu.get_cr3 = get_cr3; return r; } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 92b6bb5..1149daa 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -126,7 +126,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, fetch_fault); walk: walker-level = vcpu-arch.mmu.root_level; - pte = vcpu-arch.cr3; + pte = vcpu-arch.mmu.get_cr3(vcpu); #if PTTYPE == 64 if (vcpu-arch.mmu.root_level == PT32E_ROOT_LEVEL) { pte = kvm_pdptr_read(vcpu, (addr 30) 3); @@ -137,7 +137,7 @@ walk: } #endif ASSERT((!is_long_mode(vcpu) is_pae(vcpu)) || - (vcpu-arch.cr3 CR3_NONPAE_RESERVED_BITS) == 0); + (vcpu-arch.mmu.get_cr3(vcpu) CR3_NONPAE_RESERVED_BITS) == 0); pt_access = ACC_ALL; -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 04/18] KVM: X86: Introduce a tdp_set_cr3 function
This patch introduces a special set_tdp_cr3 function pointer in kvm_x86_ops which is only used for tpd enabled mmu contexts. This allows to remove some hacks from svm code. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/include/asm/kvm_host.h |1 + arch/x86/kvm/mmu.c |2 +- arch/x86/kvm/svm.c | 23 ++- arch/x86/kvm/vmx.c |3 +++ 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 887a1f7..1bf8501 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -537,6 +537,7 @@ struct kvm_x86_ops { u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); int (*get_lpage_level)(void); bool (*rdtscp_supported)(void); + void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); const struct trace_print_flags *exit_reasons_str; }; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ec3da11..84e3209 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2398,7 +2398,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context-shadow_root_level = kvm_x86_ops-get_tdp_level(); context-root_hpa = INVALID_PAGE; vcpu-arch.mmu.tdp_enabled = true; - vcpu-arch.mmu.set_cr3 = kvm_x86_ops-set_cr3; + vcpu-arch.mmu.set_cr3 = kvm_x86_ops-set_tdp_cr3; if (!is_paging(vcpu)) { context-gva_to_gpa = nonpaging_gva_to_gpa; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 15b8852..a7a76f5 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3044,9 +3044,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) gs_selector = kvm_read_gs(); ldt_selector = kvm_read_ldt(); svm-vmcb-save.cr2 = vcpu-arch.cr2; - /* required for live migration with NPT */ - if (npt_enabled) - svm-vmcb-save.cr3 = vcpu-arch.cr3; clgi(); @@ -3155,16 +3152,22 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) { struct vcpu_svm *svm = to_svm(vcpu); - if (npt_enabled) { - svm-vmcb-control.nested_cr3 = root; - force_new_asid(vcpu); - return; - } - svm-vmcb-save.cr3 = root; force_new_asid(vcpu); } +static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + svm-vmcb-control.nested_cr3 = root; + + /* Also sync guest cr3 here in case we live migrate */ + svm-vmcb-save.cr3 = vcpu-arch.cr3; + + force_new_asid(vcpu); +} + static int is_disabled(void) { u64 vm_cr; @@ -3361,6 +3364,8 @@ static struct kvm_x86_ops svm_x86_ops = { .cpuid_update = svm_cpuid_update, .rdtscp_supported = svm_rdtscp_supported, + + .set_tdp_cr3 = set_tdp_cr3, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ae3217d..530d14d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4212,11 +4212,14 @@ static struct kvm_x86_ops vmx_x86_ops = { .get_mt_mask = vmx_get_mt_mask, .exit_reasons_str = vmx_exit_reasons_str, + .get_lpage_level = vmx_get_lpage_level, .cpuid_update = vmx_cpuid_update, .rdtscp_supported = vmx_rdtscp_supported, + + .set_tdp_cr3 = vmx_set_cr3, }; static int __init vmx_init(void) -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 02/18] KVM: MMU: Make tdp_enabled a mmu-context parameter
This patch changes the tdp_enabled flag from its global meaning to the mmu-context. This is necessary for Nested SVM with emulation of Nested Paging where we need an extra MMU context to shadow the Nested Nested Page Table. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/include/asm/kvm_host.h |1 + arch/x86/kvm/mmu.c |8 +--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ec891a2..e7bef19 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -254,6 +254,7 @@ struct kvm_mmu { int root_level; int shadow_root_level; union kvm_mmu_page_role base_role; + bool tdp_enabled; u64 *pae_root; u64 rsvd_bits_mask[2][4]; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 741373e..5c66c99 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1812,7 +1812,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= shadow_user_mask; if (level PT_PAGE_TABLE_LEVEL) spte |= PT_PAGE_SIZE_MASK; - if (tdp_enabled) + if (vcpu-arch.mmu.tdp_enabled) spte |= kvm_x86_ops-get_mt_mask(vcpu, gfn, kvm_is_mmio_pfn(pfn)); @@ -2077,7 +2077,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) hpa_t root = vcpu-arch.mmu.root_hpa; ASSERT(!VALID_PAGE(root)); - if (tdp_enabled) + if (vcpu-arch.mmu.tdp_enabled) direct = 1; if (mmu_check_root(vcpu, root_gfn)) return 1; @@ -2090,7 +2090,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) return 0; } direct = !is_paging(vcpu); - if (tdp_enabled) + if (vcpu-arch.mmu.tdp_enabled) direct = 1; for (i = 0; i 4; ++i) { hpa_t root = vcpu-arch.mmu.pae_root[i]; @@ -2397,6 +2397,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context-invlpg = nonpaging_invlpg; context-shadow_root_level = kvm_x86_ops-get_tdp_level(); context-root_hpa = INVALID_PAGE; + vcpu-arch.mmu.tdp_enabled = true; if (!is_paging(vcpu)) { context-gva_to_gpa = nonpaging_gva_to_gpa; @@ -2435,6 +2436,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) r = paging32_init_context(vcpu); vcpu-arch.mmu.base_role.glevels = vcpu-arch.mmu.root_level; + vcpu-arch.mmu.tdp_enabled = false; return r; } -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 16/18] KVM: X86: Add callback to let modules decide over some supported cpuid bits
This patch adds the get_supported_cpuid callback to kvm_x86_ops. It will be used in do_cpuid_ent to delegate the decission about some supported cpuid bits to the architecture modules. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/include/asm/kvm_host.h |2 ++ arch/x86/kvm/svm.c |6 ++ arch/x86/kvm/vmx.c |6 ++ arch/x86/kvm/x86.c |3 +++ 4 files changed, 17 insertions(+), 0 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 66a698e..7d649f9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -555,6 +555,8 @@ struct kvm_x86_ops { bool (*rdtscp_supported)(void); void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); + void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); + const struct trace_print_flags *exit_reasons_str; }; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index bce10fe..fe1398e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3287,6 +3287,10 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) { } +static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) +{ +} + static const struct trace_print_flags svm_exit_reasons_str[] = { { SVM_EXIT_READ_CR0,read_cr0 }, { SVM_EXIT_READ_CR3,read_cr3 }, @@ -3435,6 +3439,8 @@ static struct kvm_x86_ops svm_x86_ops = { .rdtscp_supported = svm_rdtscp_supported, .set_tdp_cr3 = set_tdp_cr3, + + .set_supported_cpuid = svm_set_supported_cpuid, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 530d14d..9216867 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4146,6 +4146,10 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) } } +static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) +{ +} + static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -4220,6 +4224,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .rdtscp_supported = vmx_rdtscp_supported, .set_tdp_cr3 = vmx_set_cr3, + + .set_supported_cpuid = vmx_set_supported_cpuid, }; static int __init vmx_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9f8b02d..53360de 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1921,6 +1921,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry-ecx = kvm_supported_word6_x86_features; break; } + + kvm_x86_ops-set_supported_cpuid(function, entry); + put_cpu(); } -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 18/18] KVM: X86: Add KVM_CAP_SVM_CPUID_FIXED
This capability shows userspace that is can trust the values of cpuid[0x800A] that it gets from the kernel. Old behavior was to just return the host cpuid values which is broken because all additional svm-features need support in the svm emulation code. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/kvm/x86.c |1 + include/linux/kvm.h |1 + 2 files changed, 2 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 53360de..51bad08 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1557,6 +1557,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PCI_SEGMENT: case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: + case KVM_CAP_SVM_CPUID_FIXED: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/include/linux/kvm.h b/include/linux/kvm.h index ce28767..86caf32 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -507,6 +507,7 @@ struct kvm_ioeventfd { #define KVM_CAP_DEBUGREGS 50 #endif #define KVM_CAP_X86_ROBUST_SINGLESTEP 51 +#define KVM_CAP_SVM_CPUID_FIXED 52 #ifdef KVM_CAP_IRQ_ROUTING -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 08/18] KVM: MMU: Change init_kvm_softmmu to take a context as parameter
Some logic of this function is required to build the Nested Nested Paging context. So factor the required logic into a seperate function and export it. Also make the whole init path suitable for more than one mmu context. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/kvm/mmu.c | 60 ++- arch/x86/kvm/mmu.h |1 + 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8f835f1..560ecb6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2225,10 +2225,9 @@ static void nonpaging_free(struct kvm_vcpu *vcpu) mmu_free_roots(vcpu); } -static int nonpaging_init_context(struct kvm_vcpu *vcpu) +static int nonpaging_init_context(struct kvm_vcpu *vcpu, + struct kvm_mmu *context) { - struct kvm_mmu *context = vcpu-arch.mmu; - context-new_cr3 = nonpaging_new_cr3; context-page_fault = nonpaging_page_fault; context-gva_to_gpa = nonpaging_gva_to_gpa; @@ -2287,9 +2286,10 @@ static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level) #include paging_tmpl.h #undef PTTYPE -static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) +static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, + struct kvm_mmu *context, + int level) { - struct kvm_mmu *context = vcpu-arch.mmu; int maxphyaddr = cpuid_maxphyaddr(vcpu); u64 exb_bit_rsvd = 0; @@ -2342,9 +2342,11 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) } } -static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) +static int paging64_init_context_common(struct kvm_vcpu *vcpu, + struct kvm_mmu *context, + int level) { - struct kvm_mmu *context = vcpu-arch.mmu; + reset_rsvds_bits_mask(vcpu, context, level); ASSERT(is_pae(vcpu)); context-new_cr3 = paging_new_cr3; @@ -2360,17 +2362,17 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) return 0; } -static int paging64_init_context(struct kvm_vcpu *vcpu) +static int paging64_init_context(struct kvm_vcpu *vcpu, +struct kvm_mmu *context) { - reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL); - return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); + return paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL); } -static int paging32_init_context(struct kvm_vcpu *vcpu) +static int paging32_init_context(struct kvm_vcpu *vcpu, +struct kvm_mmu *context) { - struct kvm_mmu *context = vcpu-arch.mmu; + reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL); - reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL); context-new_cr3 = paging_new_cr3; context-page_fault = paging32_page_fault; context-gva_to_gpa = paging32_gva_to_gpa; @@ -2384,10 +2386,10 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) return 0; } -static int paging32E_init_context(struct kvm_vcpu *vcpu) +static int paging32E_init_context(struct kvm_vcpu *vcpu, + struct kvm_mmu *context) { - reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL); - return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); + return paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL); } static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) @@ -2410,15 +2412,15 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context-gva_to_gpa = nonpaging_gva_to_gpa; context-root_level = 0; } else if (is_long_mode(vcpu)) { - reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL); + reset_rsvds_bits_mask(vcpu, context, PT64_ROOT_LEVEL); context-gva_to_gpa = paging64_gva_to_gpa; context-root_level = PT64_ROOT_LEVEL; } else if (is_pae(vcpu)) { - reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL); + reset_rsvds_bits_mask(vcpu, context, PT32E_ROOT_LEVEL); context-gva_to_gpa = paging64_gva_to_gpa; context-root_level = PT32E_ROOT_LEVEL; } else { - reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL); + reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL); context-gva_to_gpa = paging32_gva_to_gpa; context-root_level = PT32_ROOT_LEVEL; } @@ -2426,24 +2428,32 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) return 0; } -static int init_kvm_softmmu(struct kvm_vcpu *vcpu) +int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { int r; - ASSERT(vcpu); ASSERT(!VALID_PAGE(vcpu-arch.mmu.root_hpa)); if (!is_paging(vcpu)) -
[PATCH 01/18] KVM: MMU: Check for root_level instead of long mode
The walk_addr function checks for !is_long_mode in its 64 bit version. But what is meant here is a check for pae paging. Change the condition to really check for pae paging so that it also works with nested nested paging. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/kvm/paging_tmpl.h |4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 81eab9a..92b6bb5 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -128,7 +128,7 @@ walk: walker-level = vcpu-arch.mmu.root_level; pte = vcpu-arch.cr3; #if PTTYPE == 64 - if (!is_long_mode(vcpu)) { + if (vcpu-arch.mmu.root_level == PT32E_ROOT_LEVEL) { pte = kvm_pdptr_read(vcpu, (addr 30) 3); trace_kvm_mmu_paging_element(pte, walker-level); if (!is_present_gpte(pte)) @@ -194,7 +194,7 @@ walk: (PTTYPE == 64 || is_pse(vcpu))) || ((walker-level == PT_PDPE_LEVEL) (pte PT_PAGE_SIZE_MASK) - is_long_mode(vcpu))) { + vcpu-arch.mmu.root_level == PT64_ROOT_LEVEL)) { int lvl = walker-level; walker-gfn = gpte_to_gfn_lvl(pte, lvl); -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 09/18] KVM: MMU: Let is_rsvd_bits_set take mmu context instead of vcpu
This patch changes is_rsvd_bits_set() function prototype to take only a kvm_mmu context instead of a full vcpu. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/kvm/mmu.c |4 ++-- arch/x86/kvm/paging_tmpl.h |3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 560ecb6..647353d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2270,12 +2270,12 @@ static void paging_free(struct kvm_vcpu *vcpu) nonpaging_free(vcpu); } -static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level) +static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) { int bit7; bit7 = (gpte 7) 1; - return (gpte vcpu-arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0; + return (gpte mmu-rsvd_bits_mask[bit7][level-1]) != 0; } #define PTTYPE 64 diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 1149daa..8608439 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -158,7 +158,8 @@ walk: if (!is_present_gpte(pte)) goto not_present; - rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker-level); + rsvd_fault = is_rsvd_bits_set(vcpu-arch.mmu, pte, + walker-level); if (rsvd_fault) goto access_error; -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/2][RFC] Nested Paging support for Nested SVM (userspace part)
Hi, this is the small userspace part. They are necessary for the l1 guest to see the npt feature bit with cpuid. Please review and give feedback :-) Thanks, Joerg Diffstat: kvm/include/linux/kvm.h |2 ++ qemu-kvm-x86.c | 15 +-- target-i386/cpu.h |2 ++ target-i386/helper.c|3 ++- 4 files changed, 19 insertions(+), 3 deletions(-) Shortlog: Joerg Roedel (2): QEMU-KVM: Fix ext3_feature propagation QEMU-KVM: Ask kernel about supported svm features -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/2] QEMU-KVM: Ask kernel about supported svm features
This patch adds code to ask the kernel about the svm features it supports for its guests and propagates them to the guest. The new capability is necessary because the old behavior of the kernel was to just return the host svm features but every svm-feature needs emulation in the nested svm kernel code. The new capability indicates that the kernel is aware of that when returning svm cpuid information. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- kvm/include/linux/kvm.h |2 ++ qemu-kvm-x86.c | 15 +-- target-i386/cpu.h |2 ++ target-i386/helper.c|2 +- 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/kvm/include/linux/kvm.h b/kvm/include/linux/kvm.h index 6485981..aeb2c9b 100644 --- a/kvm/include/linux/kvm.h +++ b/kvm/include/linux/kvm.h @@ -500,6 +500,8 @@ struct kvm_ioeventfd { #define KVM_CAP_PCI_SEGMENT 47 +#define KVM_CAP_SVM_CPUID_FIXED 52 + #ifdef KVM_CAP_IRQ_ROUTING struct kvm_irq_routing_irqchip { diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 7a5925a..60e6d26 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -1291,8 +1291,19 @@ int kvm_arch_init_vcpu(CPUState *cenv) qemu_kvm_cpuid_on_env(copy); limit = copy.regs[R_EAX]; -for (i = 0x8000; i = limit; ++i) - do_cpuid_ent(cpuid_ent[cpuid_nent++], i, 0, copy); +for (i = 0x8000; i = limit; ++i) { + do_cpuid_ent(cpuid_ent[cpuid_nent], i, 0, copy); + switch (i) { + case 0x800a: + if (!kvm_check_extension(kvm_state, KVM_CAP_SVM_CPUID_FIXED)) + break; + cpuid_ent[cpuid_nent].eax = kvm_arch_get_supported_cpuid(cenv, 0x800a, R_EAX); + cpuid_ent[cpuid_nent].ebx = kvm_arch_get_supported_cpuid(cenv, 0x800a, R_EBX); + cpuid_ent[cpuid_nent].edx = kvm_arch_get_supported_cpuid(cenv, 0x800a, R_EDX); + break; + } + cpuid_nent += 1; +} kvm_setup_cpuid2(cenv, cpuid_nent, cpuid_ent); diff --git a/target-i386/cpu.h b/target-i386/cpu.h index b64bd02..adcc19f 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -421,6 +421,8 @@ #define CPUID_EXT3_IBS (1 10) #define CPUID_EXT3_SKINIT (1 12) +#define CPUID_SVM_NPT (1 0) + #define CPUID_VENDOR_INTEL_1 0x756e6547 /* Genu */ #define CPUID_VENDOR_INTEL_2 0x49656e69 /* ineI */ #define CPUID_VENDOR_INTEL_3 0x6c65746e /* ntel */ diff --git a/target-i386/helper.c b/target-i386/helper.c index 73d8389..109f656 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -2220,7 +2220,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *eax = 0x0001; /* SVM Revision */ *ebx = 0x0010; /* nr of ASIDs */ *ecx = 0; -*edx = 0; /* optional features */ +*edx = 0; break; default: /* reserved values: zero */ -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] QEMU-KVM: Fix ext3_feature propagation
This patch fixes the propagation of the ext3_features from the qemu cpu-model to kvm. This is required for the guest to see the svm flag. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- target-i386/helper.c |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/target-i386/helper.c b/target-i386/helper.c index e595a3e..73d8389 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -759,6 +759,7 @@ static int cpu_x86_register (CPUX86State *env, const char *cpu_model) env-pat = 0x0007040600070406ULL; env-cpuid_ext_features = def-ext_features; env-cpuid_ext2_features = def-ext2_features; +env-cpuid_ext3_features = def-ext3_features; env-cpuid_xlevel = def-xlevel; env-cpuid_kvm_features = def-kvm_features; { -- 1.7.0 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
segfault at start with latest qemu-kvm.git
With latest qemu-kvm.git I am getting a segfault at start: /tmp/qemu-kvm-test/bin/qemu-system-x86_64 -m 1024 -smp 2 \ -drive file=/images/f12-x86_64.img,if=virtio,cache=none,boot=on kvm_create_vcpu: Invalid argument Segmentation fault (core dumped) git bisect points to: Bisecting: 0 revisions left to test after this (roughly 0 steps) [52b03dd70261934688cb00768c4b1e404716a337] qemu-kvm: Move kvm_set_boot_cpu_id $ git show commit 7811d4e8ec057d25db68f900be1f09a142faca49 Author: Marcelo Tosatti mtosa...@redhat.com Date: Mon Mar 1 21:36:31 2010 -0300 If I manually back out the patch it will boot fine. -- David -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: IVSHMEM and limits on shared memory
On Wed, Mar 3, 2010 at 12:06 AM, Khaled Ibrahim kz...@hotmail.com wrote: Hi Cam, I used your patches successfully to support shared memory on KVM and used the test cases successfully, but qemu-kvm crashes when I increased the size of the shared memory. I applied the ivshmem patch to qemu-kvm-0.12.3 (some manual patching was needed). It worked flawlessly for up to 128MB of shared memory on my system. I am running on a machine with 64GB memory running opensuse (kernel 2.6.27) on AMD opteron. Qemu crashes with smp=4 and the shared memory requested in 256MB, (512MB with smp=1), even though the shared memory file is created. I debugged the problem and it seems that some memory corruptions happens. Can you please provide the full command-line for the smp=1 instance? It crashes in the subpage_register for rtl8139 pci driver!, tracked back to rtl8139_mmio_map. The problem starts with corrupted value in the config field in the struct for the rtl8139 driver. At offset 20 of this field the address should indicate that the address is uninitialized at that time of crash, but surprisingly the value changes over the course of execution and gets the SIZE of the shared memory allocated (related to ivshmem). I failed to identify what changes/corrupts that field. I tried some padding for allocation but the field always gets updated with the size of the shared memory in a very consistent way. As far as you know does anything in the guest trigger the corruption? Does the corruption happen immediately or after running some of the test programs? Thanks, Cam -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: IVSHMEM and limits on shared memory
Date: Wed, 3 Mar 2010 15:09:17 -0700 Subject: Re: IVSHMEM and limits on shared memory From: c...@cs.ualberta.ca To: kz...@hotmail.com CC: kvm@vger.kernel.org On Wed, Mar 3, 2010 at 12:06 AM, Khaled Ibrahim wrote: Hi Cam, I used your patches successfully to support shared memory on KVM and used the test cases successfully, but qemu-kvm crashes when I increased the size of the shared memory. I applied the ivshmem patch to qemu-kvm-0.12.3 (some manual patching was needed). It worked flawlessly for up to 128MB of shared memory on my system. I am running on a machine with 64GB memory running opensuse (kernel 2.6.27) on AMD opteron. Qemu crashes with smp=4 and the shared memory requested in 256MB, (512MB with smp=1), even though the shared memory file is created. I debugged the problem and it seems that some memory corruptions happens. Can you please provide the full command-line for the smp=1 instance? qemu-system-x86_64 ./qemudisk0.raw \ -net nic,model=rtl8139,macaddr=52:54:00:12:34:50\ -net tap,ifname=tap0,script=no,downscript=no \ -m 4096 \ -ivshmem 512,kvmshmem\ -smp 1 \ -usb \ -usbdevice tablet \ -localtime It crashes in the subpage_register for rtl8139 pci driver!, tracked back to rtl8139_mmio_map. The problem starts with corrupted value in the config field in the struct for the rtl8139 driver. At offset 20 of this field the address should indicate that the address is uninitialized at that time of crash, but surprisingly the value changes over the course of execution and gets the SIZE of the shared memory allocated (related to ivshmem). I failed to identify what changes/corrupts that field. I tried some padding for allocation but the field always gets updated with the size of the shared memory in a very consistent way. As far as you know does anything in the guest trigger the corruption? Does the corruption happen immediately or after running some of the test programs? The boot process does not complete, and it fails before it reach grub. _ Hotmail: Trusted email with Microsoft’s powerful SPAM protection. http://clk.atdmt.com/GBL/go/201469226/direct/01/-- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] QEMU-KVM: Ask kernel about supported svm features
Am 03.03.2010 um 20:15 schrieb Joerg Roedel joerg.roe...@amd.com: This patch adds code to ask the kernel about the svm features it supports for its guests and propagates them to the guest. The new capability is necessary because the old behavior of the kernel was to just return the host svm features but every svm-feature needs emulation in the nested svm kernel code. The new capability indicates that the kernel is aware of that when returning svm cpuid information. Do we really need that complexity? By default the kernel masks out unsupported cpuid features anyway. So if we don't have npt guest support (enabled), the kernel module should just mask it out. IOW, always passing npt should work. No capability should make it get masked out. Alex -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: segfault at start with latest qemu-kvm.git
David S. Ahern wrote: With latest qemu-kvm.git I am getting a segfault at start: /tmp/qemu-kvm-test/bin/qemu-system-x86_64 -m 1024 -smp 2 \ -drive file=/images/f12-x86_64.img,if=virtio,cache=none,boot=on kvm_create_vcpu: Invalid argument Segmentation fault (core dumped) git bisect points to: Bisecting: 0 revisions left to test after this (roughly 0 steps) [52b03dd70261934688cb00768c4b1e404716a337] qemu-kvm: Move kvm_set_boot_cpu_id $ git show commit 7811d4e8ec057d25db68f900be1f09a142faca49 Author: Marcelo Tosatti mtosa...@redhat.com Date: Mon Mar 1 21:36:31 2010 -0300 If I manually back out the patch it will boot fine. Problem persists after removing the build directory and doing a fresh configure make? I'm asking before taking the bug (which would be mine, likely) as I recently spent some hours debugging a volatile build system issue. Jan signature.asc Description: OpenPGP digital signature
Re: [PATCH 0/18][RFC] Nested Paging support for Nested SVM (aka NPT-Virtualization)
Joerg Roedel wrote: Hi, here are the patches that implement nested paging support for nested svm. They are somewhat intrusive to the soft-mmu so I post them as RFC in the first round to get feedback about the general direction of the changes. Nevertheless I am proud to report that with these patches the famous kernel-compile benchmark runs only 4% slower in the l2 guest as in the l1 guest when l2 is single-processor. With SMP guests the Wow! Jan situation is very different. The more vcpus the guest has the more is the performance drop from l1 to l2. Anyway, this post is to get feedback about the overall concept of these patches. Please review and give feedback :-) Thanks, Joerg Diffstat: arch/x86/include/asm/kvm_host.h | 21 ++ arch/x86/kvm/mmu.c | 152 ++- arch/x86/kvm/mmu.h |2 + arch/x86/kvm/paging_tmpl.h | 81 ++--- arch/x86/kvm/svm.c | 126 +++- arch/x86/kvm/vmx.c |9 +++ arch/x86/kvm/x86.c | 19 +- include/linux/kvm.h |1 + include/linux/kvm_host.h|5 ++ 9 files changed, 354 insertions(+), 62 deletions(-) Shortlog: Joerg Roedel (18): KVM: MMU: Check for root_level instead of long mode KVM: MMU: Make tdp_enabled a mmu-context parameter KVM: MMU: Make set_cr3 a function pointer in kvm_mmu KVM: X86: Introduce a tdp_set_cr3 function KVM: MMU: Introduce get_cr3 function pointer KVM: MMU: Introduce inject_page_fault function pointer KVM: SVM: Implement MMU helper functions for Nested Nested Paging KVM: MMU: Change init_kvm_softmmu to take a context as parameter KVM: MMU: Let is_rsvd_bits_set take mmu context instead of vcpu KVM: MMU: Introduce generic walk_addr function KVM: MMU: Add infrastructure for two-level page walker KVM: MMU: Implement nested gva_to_gpa functions KVM: MMU: Introduce Nested MMU context KVM: SVM: Initialize Nested Nested MMU context on VMRUN KVM: MMU: Propagate the right fault back to the guest after gva_to_gpa KVM: X86: Add callback to let modules decide over some supported cpuid bits KVM: SVM: Report Nested Paging support to userspace KVM: X86: Add KVM_CAP_SVM_CPUID_FIXED signature.asc Description: OpenPGP digital signature
Re: segfault at start with latest qemu-kvm.git
On 03/03/2010 04:08 PM, Jan Kiszka wrote: David S. Ahern wrote: With latest qemu-kvm.git I am getting a segfault at start: /tmp/qemu-kvm-test/bin/qemu-system-x86_64 -m 1024 -smp 2 \ -drive file=/images/f12-x86_64.img,if=virtio,cache=none,boot=on kvm_create_vcpu: Invalid argument Segmentation fault (core dumped) git bisect points to: Bisecting: 0 revisions left to test after this (roughly 0 steps) [52b03dd70261934688cb00768c4b1e404716a337] qemu-kvm: Move kvm_set_boot_cpu_id $ git show commit 7811d4e8ec057d25db68f900be1f09a142faca49 Author: Marcelo Tosatti mtosa...@redhat.com Date: Mon Mar 1 21:36:31 2010 -0300 If I manually back out the patch it will boot fine. Problem persists after removing the build directory and doing a fresh configure make? I'm asking before taking the bug (which would be mine, likely) as I recently spent some hours debugging a volatile build system issue. Jan Before sending the email I pulled a fresh clone in a completely different directory (/tmp) to determine if it was something I introduced. I then went back to my usual location, unapplied the patch and it worked fine. David -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH -v3] Add savevm/loadvm support for MCE
Huang Ying wrote: MCE registers are saved/load into/from CPUState in kvm_arch_save/load_regs. To simulate the MCG_STATUS clearing upon reset, MSR_MCG_STATUS is set to 0 for KVM_PUT_RESET_STATE. v3: - use msrs[] in kvm_arch_load/save_regs and get_msr_entry directly. Looks good! v2: - Rebased on new CPU registers save/load framework. Signed-off-by: Huang Ying ying.hu...@intel.com Acked-by: Jan Kiszka jan.kis...@siemens.com --- qemu-kvm-x86.c | 36 1 file changed, 36 insertions(+) --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -748,7 +748,22 @@ static int get_msr_entry(struct kvm_msr_ case MSR_KVM_WALL_CLOCK: env-wall_clock_msr = entry-data; break; +#ifdef KVM_CAP_MCE +case MSR_MCG_STATUS: +env-mcg_status = entry-data; +break; +case MSR_MCG_CTL: +env-mcg_ctl = entry-data; +break; +#endif default: +#ifdef KVM_CAP_MCE +if (entry-index = MSR_MC0_CTL \ +entry-index MSR_MC0_CTL + (env-mcg_cap 0xff) * 4) { +env-mce_banks[entry-index - MSR_MC0_CTL] = entry-data; +break; +} +#endif printf(Warning unknown msr index 0x%x\n, entry-index); return 1; } @@ -979,6 +994,18 @@ void kvm_arch_load_regs(CPUState *env, i set_msr_entry(msrs[n++], MSR_KVM_SYSTEM_TIME, env-system_time_msr); set_msr_entry(msrs[n++], MSR_KVM_WALL_CLOCK, env-wall_clock_msr); } +#ifdef KVM_CAP_MCE +if (env-mcg_cap) { +if (level == KVM_PUT_RESET_STATE) +set_msr_entry(msrs[n++], MSR_MCG_STATUS, env-mcg_status); +else if (level == KVM_PUT_FULL_STATE) { +set_msr_entry(msrs[n++], MSR_MCG_STATUS, env-mcg_status); +set_msr_entry(msrs[n++], MSR_MCG_CTL, env-mcg_ctl); +for (i = 0; i (env-mcg_cap 0xff); i++) +set_msr_entry(msrs[n++], MSR_MC0_CTL + i, env-mce_banks[i]); +} +} +#endif rc = kvm_set_msrs(env, msrs, n); if (rc == -1) @@ -1144,6 +1171,15 @@ void kvm_arch_save_regs(CPUState *env) msrs[n++].index = MSR_KVM_SYSTEM_TIME; msrs[n++].index = MSR_KVM_WALL_CLOCK; +#ifdef KVM_CAP_MCE +if (env-mcg_cap) { +msrs[n++].index = MSR_MCG_STATUS; +msrs[n++].index = MSR_MCG_CTL; +for (i = 0; i (env-mcg_cap 0xff) * 4; i++) +msrs[n++].index = MSR_MC0_CTL + i; +} +#endif + rc = kvm_get_msrs(env, msrs, n); if (rc == -1) { perror(kvm_get_msrs FAILED); signature.asc Description: OpenPGP digital signature
Re: segfault at start with latest qemu-kvm.git
David S. Ahern wrote: On 03/03/2010 04:08 PM, Jan Kiszka wrote: David S. Ahern wrote: With latest qemu-kvm.git I am getting a segfault at start: /tmp/qemu-kvm-test/bin/qemu-system-x86_64 -m 1024 -smp 2 \ -drive file=/images/f12-x86_64.img,if=virtio,cache=none,boot=on kvm_create_vcpu: Invalid argument Segmentation fault (core dumped) git bisect points to: Bisecting: 0 revisions left to test after this (roughly 0 steps) [52b03dd70261934688cb00768c4b1e404716a337] qemu-kvm: Move kvm_set_boot_cpu_id $ git show commit 7811d4e8ec057d25db68f900be1f09a142faca49 Author: Marcelo Tosatti mtosa...@redhat.com Date: Mon Mar 1 21:36:31 2010 -0300 If I manually back out the patch it will boot fine. Problem persists after removing the build directory and doing a fresh configure make? I'm asking before taking the bug (which would be mine, likely) as I recently spent some hours debugging a volatile build system issue. Jan Before sending the email I pulled a fresh clone in a completely different directory (/tmp) to determine if it was something I introduced. I then went back to my usual location, unapplied the patch and it worked fine. OK, that reason can be excluded. What's your host kernel kvm version? (Of course, the issue does not show up here. But virtio currently does not boot for me - independent of my patch.) Jan signature.asc Description: OpenPGP digital signature
Re: segfault at start with latest qemu-kvm.git
On 03/03/2010 04:20 PM, Jan Kiszka wrote: David S. Ahern wrote: On 03/03/2010 04:08 PM, Jan Kiszka wrote: David S. Ahern wrote: With latest qemu-kvm.git I am getting a segfault at start: /tmp/qemu-kvm-test/bin/qemu-system-x86_64 -m 1024 -smp 2 \ -drive file=/images/f12-x86_64.img,if=virtio,cache=none,boot=on kvm_create_vcpu: Invalid argument Segmentation fault (core dumped) git bisect points to: Bisecting: 0 revisions left to test after this (roughly 0 steps) [52b03dd70261934688cb00768c4b1e404716a337] qemu-kvm: Move kvm_set_boot_cpu_id $ git show commit 7811d4e8ec057d25db68f900be1f09a142faca49 Author: Marcelo Tosatti mtosa...@redhat.com Date: Mon Mar 1 21:36:31 2010 -0300 If I manually back out the patch it will boot fine. Problem persists after removing the build directory and doing a fresh configure make? I'm asking before taking the bug (which would be mine, likely) as I recently spent some hours debugging a volatile build system issue. Jan Before sending the email I pulled a fresh clone in a completely different directory (/tmp) to determine if it was something I introduced. I then went back to my usual location, unapplied the patch and it worked fine. OK, that reason can be excluded. What's your host kernel kvm version? (Of course, the issue does not show up here. But virtio currently does not boot for me - independent of my patch.) Jan Fedora Core 12, Linux daahern-lx 2.6.31.12-174.2.22.fc12.x86_64 #1 SMP Fri Feb 19 18:55:03 UTC 2010 x86_64 x86_64 x86_64 GNU/Linux David -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4 03/10] x86: Extend validity of cpu_is_bsp
Gleb Natapov wrote: On Mon, Mar 01, 2010 at 06:17:22PM +0100, Jan Kiszka wrote: As we hard-wire the BSP to CPU 0 anyway and cpuid_apic_id equals cpu_index, cpu_is_bsp can also be based on the latter directly. This will help an early user of it: KVM while initializing mp_state. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/pc.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/hw/pc.c b/hw/pc.c index b90a79e..58c32ea 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -767,7 +767,8 @@ static void pc_init_ne2k_isa(NICInfo *nd) int cpu_is_bsp(CPUState *env) { -return env-cpuid_apic_id == 0; +/* We hard-wire the BSP to the first CPU. */ +return env-cpu_index == 0; } We should not assume that. The function was written like that specifically so the code around it will not rely on this assumption. Now you change that specifically to write code that will do incorrect assumptions. I don't see the logic here. The logic is that we do not support any other mapping yet - with or without this change. Without it, we complicate the APIC initialization for (so far) no good reason. Once we want to support different BSP assignments, we need to go through the code and rework some parts anyway. Jan signature.asc Description: OpenPGP digital signature
Re: [PATCH 17/18] KVM: SVM: Report Nested Paging support to userspace
On 03.03.2010, at 20:12, Joerg Roedel wrote: This patch implements the reporting of the nested paging feature support to userspace. Signed-off-by: Joerg Roedel joerg.roe...@amd.com --- arch/x86/kvm/svm.c | 10 ++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fe1398e..ce71023 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3289,6 +3289,16 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) { + switch (func) { + case 0x800A: + if (!npt_enabled) + break; if (!nested) break; Alex-- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/18][RFC] Nested Paging support for Nested SVM (aka NPT-Virtualization)
On 03.03.2010, at 20:12, Joerg Roedel wrote: Hi, here are the patches that implement nested paging support for nested svm. They are somewhat intrusive to the soft-mmu so I post them as RFC in the first round to get feedback about the general direction of the changes. Nevertheless I am proud to report that with these patches the famous kernel-compile benchmark runs only 4% slower in the l2 guest as in the l1 guest when l2 is single-processor. With SMP guests the situation is very different. The more vcpus the guest has the more is the performance drop from l1 to l2. Anyway, this post is to get feedback about the overall concept of these patches. Please review and give feedback :-) Nice job! It's great to see you finally got around to it :-). Have you tracked what slows down SMP l2 guests yet? So far I've been assuming that IPIs just completely kill the performance, but I guess it shouldn't be that bad, especially now where you have sped up the #VMEXIT path that much. Alex-- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: segfault at start with latest qemu-kvm.git
David S. Ahern wrote: On 03/03/2010 04:20 PM, Jan Kiszka wrote: David S. Ahern wrote: On 03/03/2010 04:08 PM, Jan Kiszka wrote: David S. Ahern wrote: With latest qemu-kvm.git I am getting a segfault at start: /tmp/qemu-kvm-test/bin/qemu-system-x86_64 -m 1024 -smp 2 \ -drive file=/images/f12-x86_64.img,if=virtio,cache=none,boot=on kvm_create_vcpu: Invalid argument Segmentation fault (core dumped) git bisect points to: Bisecting: 0 revisions left to test after this (roughly 0 steps) [52b03dd70261934688cb00768c4b1e404716a337] qemu-kvm: Move kvm_set_boot_cpu_id $ git show commit 7811d4e8ec057d25db68f900be1f09a142faca49 Author: Marcelo Tosatti mtosa...@redhat.com Date: Mon Mar 1 21:36:31 2010 -0300 If I manually back out the patch it will boot fine. Problem persists after removing the build directory and doing a fresh configure make? I'm asking before taking the bug (which would be mine, likely) as I recently spent some hours debugging a volatile build system issue. Jan Before sending the email I pulled a fresh clone in a completely different directory (/tmp) to determine if it was something I introduced. I then went back to my usual location, unapplied the patch and it worked fine. OK, that reason can be excluded. What's your host kernel kvm version? (Of course, the issue does not show up here. But virtio currently does not boot for me - independent of my patch.) Jan Fedora Core 12, Linux daahern-lx 2.6.31.12-174.2.22.fc12.x86_64 #1 SMP Fri Feb 19 18:55:03 UTC 2010 x86_64 x86_64 x86_64 GNU/Linux Reproduced after switching back to kvm-kmod-2.6.31, will debug. Thanks, Jan signature.asc Description: OpenPGP digital signature
Re: Windows guest freezes with black screen
On Tue, Mar 02, 2010 at 10:55:45AM -0600, Brian Jackson wrote: On Tuesday 02 March 2010 10:33:19 am Harald Braumann wrote: quite often my Windows guest freezes. The window is just black and it uses 100% CPU. Have you tried to reproduce without vmware vga? That support was developed against the linux drivers (and possibly some loose specifications) and has had known issues in the past. It would at least be a data point of where to look. So far no freezes with `-vga std'. So there is a very high probability that vmware is the culprit. Anyway, std works much better. Vmware had a lot of graphic glitches, like sometimes the screen wouldnt be updated and I had to move windows so they would be redrawn. Cheers, harry -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/18][RFC] Nested Paging support for Nested SVM (aka NPT-Virtualization)
On 03/03/2010 01:12 PM, Joerg Roedel wrote: Hi, here are the patches that implement nested paging support for nested svm. They are somewhat intrusive to the soft-mmu so I post them as RFC in the first round to get feedback about the general direction of the changes. Nevertheless I am proud to report that with these patches the famous kernel-compile benchmark runs only 4% slower in the l2 guest as in the l1 guest when l2 is single-processor. That's an awesome result. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM PMU virtualization
On Wed, 2010-03-03 at 11:13 +0100, Peter Zijlstra wrote: On Wed, 2010-03-03 at 17:27 +0800, Zhang, Yanmin wrote: +static inline u64 perf_instruction_pointer(struct pt_regs *regs) +{ + u64 ip; + ip = percpu_read(perf_virt_ip.ip); + if (!ip) + ip = instruction_pointer(regs); + else + perf_reset_virt_ip(); + return ip; +} + +static inline unsigned int perf_misc_flags(struct pt_regs *regs) +{ + if (percpu_read(perf_virt_ip.ip)) { + return percpu_read(perf_virt_ip.user_mode) ? + PERF_RECORD_MISC_GUEST_USER : + PERF_RECORD_MISC_GUEST_KERNEL; + } else + return user_mode(regs) ? PERF_RECORD_MISC_USER : +PERF_RECORD_MISC_KERNEL; +} This codes in the assumption that perf_misc_flags() must only be called before perf_instruction_pointer(), which is currently true, but you might want to put a comment near to remind us of this. I will change the logic with a clear reset operation in caller. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM PMU virtualization
On Wed, 2010-03-03 at 11:15 +0100, Peter Zijlstra wrote: On Wed, 2010-03-03 at 17:27 +0800, Zhang, Yanmin wrote: -#ifndef perf_misc_flags -#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ -PERF_RECORD_MISC_KERNEL) -#define perf_instruction_pointer(regs) instruction_pointer(regs) -#endif Ah, that #ifndef is for powerpc, which I think you just broke. Thanks for the reminder. I deleted powerpc codes when building cscope lib. It seems perf_save_virt_ip/perf_reset_virt_ip interfaces are ugly. I plan to change them to a callback function struct and kvm registers its version to perf. Such like: struct perf_guest_info_callbacks { int (*is_in_guest)(); u64 (*get_guest_ip)(); int (*copy_guest_stack)(); int (*reset_in_guest)(); ... }; int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *); int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *); It's more scalable and neater. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/4] KVM: Rework VCPU state writeback API
On Tue, Mar 02, 2010 at 11:29:10PM -0300, Marcelo Tosatti wrote: On Tue, Mar 02, 2010 at 05:31:09PM +0100, Jan Kiszka wrote: Marcelo Tosatti wrote: On Tue, Mar 02, 2010 at 09:00:04AM +0100, Jan Kiszka wrote: Marcelo Tosatti wrote: On Mon, Mar 01, 2010 at 07:10:30PM +0100, Jan Kiszka wrote: This grand cleanup drops all reset and vmsave/load related synchronization points in favor of four(!) generic hooks: - cpu_synchronize_all_states in qemu_savevm_state_complete (initial sync from kernel before vmsave) - cpu_synchronize_all_post_init in qemu_loadvm_state (writeback after vmload) - cpu_synchronize_all_post_init in main after machine init - cpu_synchronize_all_post_reset in qemu_system_reset (writeback after system reset) These writeback points + the existing one of VCPU exec after cpu_synchronize_state map on three levels of writeback: - KVM_PUT_RUNTIME_STATE (during runtime, other VCPUs continue to run) - KVM_PUT_RESET_STATE (on synchronous system reset, all VCPUs stopped) - KVM_PUT_FULL_STATE(on init or vmload, all VCPUs stopped as well) This level is passed to the arch-specific VCPU state writing function that will decide which concrete substates need to be written. That way, no writer of load, save or reset functions that interact with in-kernel KVM states will ever have to worry about synchronization again. That also means that a lot of reasons for races, segfaults and deadlocks are eliminated. cpu_synchronize_state remains untouched, just as Anthony suggested. We continue to need it before reading or writing of VCPU states that are also tracked by in-kernel KVM subsystems. Consequently, this patch removes many cpu_synchronize_state calls that are now redundant, just like remaining explicit register syncs. Signed-off-by: Jan Kiszka jan.kis...@siemens.com Jan, This patch breaks system reset of WinXP.32 install (more easily reproducible without iothread enabled). Screenshot attached. Strange - no issues with qemu-kvm? Any special command line switch? /me goes scrounging for some installation XP32 CD in the meantime... No issues with qemu-kvm. Could not spot anything obvious. And, of course, my WinXP installation did not trigger any reset issue, even in non-iothreaded mode. :( The regression seems to be caused by seabios commit d7e998f. Kevin, the failure can be seen on the attached screenshot, which happens on the first reboot of WinXP 32 installation (after copying files etc). attachment: uqmaster-failure.png
Re: IVSHMEM and limits on shared memory
On Wed, Mar 3, 2010 at 3:38 PM, Khaled Ibrahim kz...@hotmail.com wrote: Date: Wed, 3 Mar 2010 15:09:17 -0700 Subject: Re: IVSHMEM and limits on shared memory From: c...@cs.ualberta.ca To: kz...@hotmail.com CC: kvm@vger.kernel.org On Wed, Mar 3, 2010 at 12:06 AM, Khaled Ibrahim wrote: Hi Cam, I used your patches successfully to support shared memory on KVM and used the test cases successfully, but qemu-kvm crashes when I increased the size of the shared memory. I applied the ivshmem patch to qemu-kvm-0.12.3 (some manual patching was needed). It worked flawlessly for up to 128MB of shared memory on my system. I am running on a machine with 64GB memory running opensuse (kernel 2.6.27) on AMD opteron. Qemu crashes with smp=4 and the shared memory requested in 256MB, (512MB with smp=1), even though the shared memory file is created. I debugged the problem and it seems that some memory corruptions happens. Can you please provide the full command-line for the smp=1 instance? qemu-system-x86_64 ./qemudisk0.raw \ -net nic,model=rtl8139,macaddr=52:54:00:12:34:50\ -net tap,ifname=tap0,script=no,downscript=no \ -m 4096 \ -ivshmem 512,kvmshmem\ -smp 1 \ -usb \ -usbdevice tablet \ -localtime It crashes in the subpage_register for rtl8139 pci driver!, tracked back to rtl8139_mmio_map. The problem starts with corrupted value in the config field in the struct for the rtl8139 driver. At offset 20 of this field the address should indicate that the address is uninitialized at that time of crash, but surprisingly the value changes over the course of execution and gets the SIZE of the shared memory allocated (related to ivshmem). I failed to identify what changes/corrupts that field. I tried some padding for allocation but the field always gets updated with the size of the shared memory in a very consistent way. Good debugging. I've been able to reproduce your error when applying my patch to qemu-kvm-0.12.3 and can trace the error to the subpage_register. Curiously, this bug does not occur with the latest version from the git repo. I've tested up to 1 GB without problem. So I'm not sure if it's an error in my patch or elsewhere in the memory management that has since been fixed. As a test, I removed anywhere my patch stored the size of the shared memory region and hard coded the size of 512 MB into qemu_ram_alloc and pci_register_bar, so that my patch never writes the size of the memory region anywhere. And I discovered that the value of 512MB still shows up at the offset you mention, so it seems something else is storing that value in the wrong location and corrupting memory. Can you try using the version from the git repo and see if the error recurs? Cam As far as you know does anything in the guest trigger the corruption? Does the corruption happen immediately or after running some of the test programs? The boot process does not complete, and it fails before it reach grub. _ Hotmail: Trusted email with Microsoft’s powerful SPAM protection. http://clk.atdmt.com/GBL/go/201469226/direct/01/ -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/4] KVM: Rework VCPU state writeback API
On Thu, Mar 04, 2010 at 01:21:12AM -0300, Marcelo Tosatti wrote: The regression seems to be caused by seabios commit d7e998f. Kevin, the failure can be seen on the attached screenshot, which happens on the first reboot of WinXP 32 installation (after copying files etc). Sorry - I also noticed a bug in that commit recently. I pushed the fix I had in my local tree. -Kevin -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v4 03/10] x86: Extend validity of cpu_is_bsp
On Thu, Mar 04, 2010 at 12:34:22AM +0100, Jan Kiszka wrote: Gleb Natapov wrote: On Mon, Mar 01, 2010 at 06:17:22PM +0100, Jan Kiszka wrote: As we hard-wire the BSP to CPU 0 anyway and cpuid_apic_id equals cpu_index, cpu_is_bsp can also be based on the latter directly. This will help an early user of it: KVM while initializing mp_state. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- hw/pc.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/hw/pc.c b/hw/pc.c index b90a79e..58c32ea 100644 --- a/hw/pc.c +++ b/hw/pc.c @@ -767,7 +767,8 @@ static void pc_init_ne2k_isa(NICInfo *nd) int cpu_is_bsp(CPUState *env) { -return env-cpuid_apic_id == 0; +/* We hard-wire the BSP to the first CPU. */ +return env-cpu_index == 0; } We should not assume that. The function was written like that specifically so the code around it will not rely on this assumption. Now you change that specifically to write code that will do incorrect assumptions. I don't see the logic here. The logic is that we do not support any other mapping yet - with or without this change. Without it, we complicate the APIC initialization for (so far) no good reason. Once we want to support different BSP assignments, we need to go through the code and rework some parts anyway. As far as I remember the only part that was missing was a command line to specify apic IDs for each CPU and what CPU is BSP. The code was ready otherwise. I's very sad if this was broken by other modifications. But changes like that actually pushes us back from our goal. Why not rework code so it will work with correct cpu_is_bsp() function instead of introducing this hack? -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] KVM-Test: Add a kvm subtest format_disk
Add a kvm subtest format_disk. This test will simply create a file system on disk, mount it and write a file with some content to the disk. Check whether the write could be succeeded. Signed-off-by: sshang ssh...@redhat.com --- client/tests/kvm/tests/format_disk.py | 63 client/tests/kvm/tests_base.cfg.sample | 27 ++ 2 files changed, 90 insertions(+), 0 deletions(-) create mode 100644 client/tests/kvm/tests/format_disk.py diff --git a/client/tests/kvm/tests/format_disk.py b/client/tests/kvm/tests/format_disk.py new file mode 100644 index 000..7e340ad --- /dev/null +++ b/client/tests/kvm/tests/format_disk.py @@ -0,0 +1,63 @@ +import logging +from autotest_lib.client.common_lib import error +import kvm_test_utils, kvm_utils + +def run_format_disk(test, params, env): + +Format guest disk: +1) Boot guest with second disk +2) Log into guest +3) Sent sequence commands which format disk1 and mount it to guest +4) Write some random str into one file within guest disk1 and read it, make sure all right. + +@param test: kvm test object +@param params: Dictionary with the test parameters +@param env: Dictionary with test environment. + +vm = kvm_test_utils.get_living_vm(env, params.get(main_vm)) +session = kvm_test_utils.wait_for_login(vm, + timeout=int(params.get(login_timeout, 360))) + +# Create a partition on disk +create_partition_cmd = params.get(create_partition_cmd) +if create_partition_cmd: + s, o = session.get_command_status_output(create_partition_cmd) + if s != 0: + raise error.TestFail, Failed to create partition with error: %s % o + logging.info(Output of command of create partition on disk: %s % o) + +# Format the disk +format_cmd = params.get(format_cmd) +if format_cmd: + s, o = session.get_command_status_output(format_cmd, timeout=1200) + if s != 0: + raise error.TestFail, Failed to format with error: %s % o + logging.info(Output of format disk command: %s % o) + +# Mount the disk +mount_cmd = params.get(mount_cmd) +if mount_cmd: + s, o = session.get_command_status_output(mount_cmd) + if s != 0: + raise error.TestFail, Failed to mount with error: %s % o + logging.info(Output of mount disk command: %s % o) + +# Write some random string to test file +testfile_name = params.get(testfile_name) +ranstr = kvm_utils.generate_random_string(100) + +writefile_cmd = params.get(writefile_cmd) +wfilecmd = writefile_cmd + + ranstr + + testfile_name +s, o = session.get_command_status_output(wfilecmd) +if s != 0: + raise error.TestFail(Write to file error: %s % o) + +# Read in the file to see whether content is changed +readfile_cmd = params.get(readfile_cmd) +rfilecmd = readfile_cmd + + testfile_name +s, o = session.get_command_status_output(rfilecmd) +if s != 0: + raise error.TestFail(Read file error: %s % o) +if o.strip() != ranstr: + raise error.TestFail(The content writen to file is changed) +session.close() diff --git a/client/tests/kvm/tests_base.cfg.sample b/client/tests/kvm/tests_base.cfg.sample index 040d0c3..20897f9 100644 --- a/client/tests/kvm/tests_base.cfg.sample +++ b/client/tests/kvm/tests_base.cfg.sample @@ -300,6 +300,17 @@ variants: shutdown_method = shell kill_vm = yes kill_vm_gracefully = no + +- format_disk: +type = format_disk +images += disk1 +boot_drive_disk1 = yes +image_boot_disk1 = no +image_name_disk1 = storage +image_size_disk1 = 10G +force_create_image_disk1 = yes +writefile_cmd = echo +kill_vm = yes # Do not define test variants below shutdown @@ -329,6 +340,11 @@ variants: file_transfer_port = 22 mem_chk_cmd = dmidecode -t 17 | awk -F: '/Size/ {print $2}' cpu_chk_cmd = grep -c processor /proc/cpuinfo +format_disk: +format_cmd = cd /dev ls | egrep [shv]db | xargs mkfs.ext3 +mount_cmd = cd /dev ls | egrep [shv]db | xargs -I dev mount -t ext3 dev /media +testfile_name = /media/txt.txt +readfile_cmd = cat variants: - Fedora: @@ -531,6 +547,9 @@ variants: steps=RHEL-3.9-i386.steps unattended_install: unattended_file = unattended/RHEL-3-series.ks +format_disk: +format_cmd = cd /dev echo hdb | xargs mkfs.ext3 +mount_cmd = test -d /media || mkdir /media cd /dev mount -t ext3 hdb /media - 3.9.x86_64: no setup autotest linux_s3 @@ -543,6 +562,9 @@ variants: steps=RHEL-3.9-x86_64.steps
[PATCH] KVM test: Add a subtest cpuflags
This test mainly tests whether all guest cpu flags are supported by host machine. Signed-off-by: sshang ssh...@redhat.com --- client/tests/kvm/tests/cpuflags.py | 80 client/tests/kvm/tests_base.cfg.sample |9 +++- 2 files changed, 88 insertions(+), 1 deletions(-) create mode 100644 client/tests/kvm/tests/cpuflags.py diff --git a/client/tests/kvm/tests/cpuflags.py b/client/tests/kvm/tests/cpuflags.py new file mode 100644 index 000..5f51d65 --- /dev/null +++ b/client/tests/kvm/tests/cpuflags.py @@ -0,0 +1,80 @@ +import logging,os,commands +from autotest_lib.client.common_lib import error +import kvm_test_utils + +def run_cpuflags(test,params,env): + +Check guest cpu extension flags supported by host +1) Log into guest +2) Get guest cpu information and host cpu information +3) Compare with each other make sure host cpu extension flags + bits contain guest + +@param test: kvm test object +@param params: Dictionary with the test parameters +@param env: Dictionary with test environment. + +vm = kvm_test_utils.get_living_vm(env, params.get(main_vm)) +session = kvm_test_utils.wait_for_login(vm, + timeout=int(params.get(login_timeout, 360))) + +get_cpuflags_cmd = params.get(getcpuflags) +s, o = session.get_command_status_output(get_cpuflags_cmd) +if s != 0: +raise error.TestFail, Could not read guest cpu flags +guest_cpuflags_list = o.splitlines()[0].split(':')[1].split() +host_cpuflags_list = commands.getoutput(get_cpuflags_cmd).\ + splitlines()[0].split(':')[1].split() + +logging.debug(Host flags %s % host_cpuflags_list) +logging.debug(Guest flags %s % guest_cpuflags_list) + +# There are some special flags, for example 'hypervisor', 'sep', +# present in guests but not in the hosts, exclude these flags from +# comparison. +ban_flags_list = params.get(ban_flags).split() + +guest_cpuflags_set = set(guest_cpuflags_list) +host_cpuflags_set = set(host_cpuflags_list) + +# If the excluded flags provided by the config file that exist in the +# host, remove them from the ban_flags_list, because we require kvm +# virtualize/simulate the host. +if params.get(strict_check) == yes: +for flag in ban_flags_list: +if flag in host_cpuflags_list: +ban_flags_list.remove(flag) + +# exclude the banned flags from guest flags set. +for flag in ban_flags_list: +if flag in guest_cpuflags_set: +guest_cpuflags_set.remove(flag) + +if guest_cpuflags_set.issubset(host_cpuflags_set): +logging.info(Guest cpu flags all supported by host) +else: +invalidflags_set = guest_cpuflags_set - host_cpuflags_set +host_cpuflags_str = str(host_cpuflags_set)[4:-1] +invalidflags_str = '' +for i in invalidflags_set: +if host_cpuflags_str.find(i.strip()) == -1: +invalidflags_str = invalidflags_str + i + ',' + +if invalidflags_str.strip() != '': +raise error.TestFail(Unsupported cpu flags by host: %s % \ +invalidflags_str[0:-1]) + +# check the extra cpuflags in guest. +extra_flags_set = set(params.get(extra_flags).split()) +if extra_flags_set.issubset(guest_cpuflags_set): +logging.info(All extra flags are found in guest.) +else: +invalidflags_set = extra_flags_set - guest_cpuflags_set +invalidflags_str = '' +for i in invalidflags_set: +invalidflags_str = invalidflags_str + i + ',' +raise error.TestFail(Unsupported extra flags by guest: %s % \ +invalidflags_str[0:-1]) + +session.close() + diff --git a/client/tests/kvm/tests_base.cfg.sample b/client/tests/kvm/tests_base.cfg.sample index 040d0c3..f7dcbb6 100644 --- a/client/tests/kvm/tests_base.cfg.sample +++ b/client/tests/kvm/tests_base.cfg.sample @@ -300,6 +300,13 @@ variants: shutdown_method = shell kill_vm = yes kill_vm_gracefully = no + +- cpuflags: +type = cpuflags +getcpuflags = grep 'flags' /proc/cpuinfo +ban_flags = up sep hypervisor sep_good +extra_flags = +strict_check = yes # Do not define test variants below shutdown @@ -1001,7 +1008,7 @@ variants: md5sum = 9fae22f2666369968a76ef59e9a81ced -linux_s3: +linux_s3|cpuflags: only Linux -- 1.5.5.6 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html