[COMMIT master] make-release: don't use --tmpdir mktemp option
From: Eduardo Habkost ehabk...@redhat.com This allows the script to work on older systems, where 'mktemp --tmpdir' is not available. Signed-off-by: Eduardo Habkost ehabk...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/kvm/scripts/make-release b/kvm/scripts/make-release index 64e77f9..c5f8c92 100755 --- a/kvm/scripts/make-release +++ b/kvm/scripts/make-release @@ -12,7 +12,7 @@ formal= releasedir=~/sf-release [[ -z $TMP ]] TMP=/tmp -tmpdir=`mktemp -d --tmpdir=$TMP qemu-kvm-make-release.XX` +tmpdir=`mktemp -d $TMP/qemu-kvm-make-release.XX` while [[ $1 = -* ]]; do opt=$1 shift -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] p2v monitor command: translate guest physical address to host virtual address
From: Max Asbock masb...@linux.vnet.ibm.com Add command p2v to translate guest physical address to host virtual address. The p2v command provides one step in a chain of translations from guest virtual to guest physical to host virtual to host physical. Host physical is then used to inject a machine check error. As a consequence the HWPOISON code on the host and the MCE injection code in qemu-kvm are exercised. Signed-off-by: Max Asbock masb...@linux.vnet.ibm.com Signed-off-by: Jiajia Zheng jiajia.zh...@intel.com Signed-off-by: Huang Ying ying.hu...@intel.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/monitor.c b/monitor.c index 9072c06..df0cb33 100644 --- a/monitor.c +++ b/monitor.c @@ -2301,6 +2301,18 @@ static void do_inject_mce(Monitor *mon, const QDict *qdict) } #endif +static void do_p2v(Monitor *mon, const QDict *qdict) +{ +target_phys_addr_t size = TARGET_PAGE_SIZE; +target_phys_addr_t addr = qdict_get_int(qdict, addr); +void *vaddr; + +vaddr = cpu_physical_memory_map(addr, size, 0); +monitor_printf(mon, Guest physical address %p is mapped at + host virtual address %p\n, (void *)addr, vaddr); +cpu_physical_memory_unmap(vaddr, size, 0, 0); +} + static int do_getfd(Monitor *mon, const QDict *qdict, QObject **ret_data) { const char *fdname = qdict_get_str(qdict, fdname); diff --git a/qemu-monitor.hx b/qemu-monitor.hx index 620f937..2698a42 100644 --- a/qemu-monitor.hx +++ b/qemu-monitor.hx @@ -459,6 +459,19 @@ Start gdbserver session (default @var{port}=1234) ETEXI { +.name = p2v, +.args_type = fmt:/,addr:l, +.params = /fmt addr, +.help = translate guest physical 'addr' to host virtual address, +.mhandler.cmd = do_p2v, +}, +STEXI +...@item p2v @var{addr} +...@findex mce +Translate guest physical @var{addr} to host virtual address. +ETEXI + +{ .name = x, .args_type = fmt:/,addr:l, .params = /fmt addr, -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] make-release: don't use --mtime and --transform tar options
From: Eduardo Habkost ehabk...@redhat.com Those options are not available on older systems. Instead of --transform, just create the file inside the expected directory. Instead of --mtime, use 'touch' to set file mtime before running tar. Signed-off-by: Eduardo Habkost ehabk...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/kvm/scripts/make-release b/kvm/scripts/make-release index c5f8c92..56302c3 100755 --- a/kvm/scripts/make-release +++ b/kvm/scripts/make-release @@ -52,20 +52,22 @@ mkdir -p $(dirname $tarball) git archive --prefix=$name/ --format=tar $commit $tarball mtime=`git show --format=%ct $commit^{commit} --` -tarargs=--owner=root --group=root --mti...@$mtime +tarargs=--owner=root --group=root -mkdir -p $tmpdir +mkdir -p $tmpdir/$name git cat-file -p ${commit}:roms | awk ' { print $4, $3 } ' \ - $tmpdir/EXTERNAL_DEPENDENCIES -tar -rf $tarball --transform s,^,$name/, -C $tmpdir \ + $tmpdir/$name/EXTERNAL_DEPENDENCIES +touch -d @$mtime $tmpdir/$name/EXTERNAL_DEPENDENCIES +tar -rf $tarball -C $tmpdir \ $tarargs \ -EXTERNAL_DEPENDENCIES +$name/EXTERNAL_DEPENDENCIES rm -rf $tmpdir if [[ -n $formal ]]; then -mkdir -p $tmpdir -echo $name $tmpdir/KVM_VERSION -tar -rf $tarball --transform s,^,$name/, -C $tmpdir KVM_VERSION \ +mkdir -p $tmpdir/$name +echo $name $tmpdir/$name/KVM_VERSION +touch -d @$mtime $tmpdir/$name/KVM_VERSION +tar -rf $tarball -C $tmpdir $name/KVM_VERSION \ $tarargs rm -rf $tmpdir fi -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] kvm: allow tpr patching to write to write-protected vapic option rom
From: Avi Kivity a...@redhat.com Now that we allow the bios to write protect option roms, we need to allow the tpr patching code to write to this write protected memory. This means using cpu_physical_memory_write_rom() instead of the usual APIs. Fixes Windows XP without flexpriority. Signed-off-by: Avi Kivity a...@redhat.com diff --git a/kvm-tpr-opt.c b/kvm-tpr-opt.c index 46890e2..c929fc8 100644 --- a/kvm-tpr-opt.c +++ b/kvm-tpr-opt.c @@ -73,7 +73,7 @@ static uint8_t read_byte_virt(CPUState *env, target_ulong virt) static void write_byte_virt(CPUState *env, target_ulong virt, uint8_t b) { -stb_phys(map_addr(env, virt, NULL), b); +cpu_physical_memory_write_rom(map_addr(env, virt, NULL), b, 1); } struct vapic_bios { @@ -107,7 +107,7 @@ static void update_vbios_real_tpr(void) cpu_physical_memory_rw(vbios_desc_phys, (void *)vapic_bios, sizeof vapic_bios, 0); vapic_bios.real_tpr = real_tpr; vapic_bios.vcpu_shift = 7; -cpu_physical_memory_rw(vbios_desc_phys, (void *)vapic_bios, sizeof vapic_bios, 1); +cpu_physical_memory_write_rom(vbios_desc_phys, (void *)vapic_bios, sizeof vapic_bios); } static unsigned modrm_reg(uint8_t modrm) @@ -174,6 +174,7 @@ static int bios_is_mapped(CPUState *env, uint64_t rip) unsigned perms; uint32_t i; uint32_t offset, fixup, start = vapic_bios_addr ? : 0xe; +uint32_t patch; if (bios_enabled) return 1; @@ -198,7 +199,8 @@ static int bios_is_mapped(CPUState *env, uint64_t rip) for (i = vapic_bios.fixup_start; i vapic_bios.fixup_end; i += 4) { offset = ldl_phys(phys + i - vapic_bios.virt_base); fixup = phys + offset; - stl_phys(fixup, ldl_phys(fixup) + bios_addr - vapic_bios.virt_base); +patch = ldl_phys(fixup) + bios_addr - vapic_bios.virt_base; +cpu_physical_memory_write_rom(fixup, (uint8_t *)patch, 4); } vapic_phys = vapic_bios.vapic - vapic_bios.virt_base + phys; return 1; @@ -225,7 +227,7 @@ int kvm_tpr_enable_vapic(CPUState *env) return 0; kvm_enable_vapic(env, vapic_phys + (pcr_cpu 7)); -cpu_physical_memory_rw(vapic_phys + (pcr_cpu 7) + 4, one, 1, 1); +cpu_physical_memory_write_rom(vapic_phys + (pcr_cpu 7) + 4, one, 1); env-kvm_vcpu_update_vapic = 0; bios_enabled = 1; return 1; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] Revert p2v monitor command: translate guest physical address to host virtual address
From: Avi Kivity a...@redhat.com This reverts commit c3fe515c911b9828d6e3e659ac794cb7dc76cd66; needs to go through qemu.git. Signed-off-by: Avi Kivity a...@redhat.com diff --git a/monitor.c b/monitor.c index df0cb33..9072c06 100644 --- a/monitor.c +++ b/monitor.c @@ -2301,18 +2301,6 @@ static void do_inject_mce(Monitor *mon, const QDict *qdict) } #endif -static void do_p2v(Monitor *mon, const QDict *qdict) -{ -target_phys_addr_t size = TARGET_PAGE_SIZE; -target_phys_addr_t addr = qdict_get_int(qdict, addr); -void *vaddr; - -vaddr = cpu_physical_memory_map(addr, size, 0); -monitor_printf(mon, Guest physical address %p is mapped at - host virtual address %p\n, (void *)addr, vaddr); -cpu_physical_memory_unmap(vaddr, size, 0, 0); -} - static int do_getfd(Monitor *mon, const QDict *qdict, QObject **ret_data) { const char *fdname = qdict_get_str(qdict, fdname); diff --git a/qemu-monitor.hx b/qemu-monitor.hx index 2698a42..620f937 100644 --- a/qemu-monitor.hx +++ b/qemu-monitor.hx @@ -459,19 +459,6 @@ Start gdbserver session (default @var{port}=1234) ETEXI { -.name = p2v, -.args_type = fmt:/,addr:l, -.params = /fmt addr, -.help = translate guest physical 'addr' to host virtual address, -.mhandler.cmd = do_p2v, -}, -STEXI -...@item p2v @var{addr} -...@findex mce -Translate guest physical @var{addr} to host virtual address. -ETEXI - -{ .name = x, .args_type = fmt:/,addr:l, .params = /fmt addr, -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: x86 emulator: Eliminate compilation warning in x86_decode_insn()
From: Sheng Yang sh...@linux.intel.com Eliminate: arch/x86/kvm/emulate.c:801: warning: ‘sv’ may be used uninitialized in this function on gcc 4.1.2 Signed-off-by: Sheng Yang sh...@linux.intel.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index aead72e..d0df25d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -798,7 +798,7 @@ done: static void fetch_bit_operand(struct decode_cache *c) { - long sv, mask; + long sv = 0, mask; if (c-dst.type == OP_MEM c-src.type == OP_REG) { mask = ~(c-dst.bytes * 8 - 1); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: update 'root_hpa' out of loop in PAE shadow path
From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com The value of 'vcpu-arch.mmu.pae_root' is not modified, so we can update 'root_hpa' out of the loop. Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c94c432..3630046 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2393,8 +2393,8 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) ++sp-root_count; spin_unlock(vcpu-kvm-mmu_lock); vcpu-arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; - vcpu-arch.mmu.root_hpa = __pa(vcpu-arch.mmu.pae_root); } + vcpu-arch.mmu.root_hpa = __pa(vcpu-arch.mmu.pae_root); } else BUG(); @@ -2466,8 +2466,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) spin_unlock(vcpu-kvm-mmu_lock); vcpu-arch.mmu.pae_root[i] = root | pm_mask; - vcpu-arch.mmu.root_hpa = __pa(vcpu-arch.mmu.pae_root); } + vcpu-arch.mmu.root_hpa = __pa(vcpu-arch.mmu.pae_root); /* * If we shadow a 32 bit page table with a long mode page -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: set access bit for direct mapping
From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Set access bit while setup up direct page table if it's nonpaing or npt enabled, it's good for CPU's speculate access Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3630046..88203fa 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2240,7 +2240,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, __set_spte(iterator.sptep, __pa(sp-spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK - | shadow_user_mask | shadow_x_mask); + | shadow_user_mask | shadow_x_mask + | shadow_accessed_mask); } } return pt_write; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: cleanup for error mask set while walk guest page table
From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Small cleanup for set page fault error code Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 2bdd843..a83ff37 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -224,9 +224,7 @@ walk: is_cpuid_PSE36()) gfn += pse36_gfn_delta(pte); - access |= write_fault ? PFERR_WRITE_MASK : 0; - access |= fetch_fault ? PFERR_FETCH_MASK : 0; - access |= user_fault ? PFERR_USER_MASK : 0; + access |= write_fault | fetch_fault | user_fault; real_gpa = mmu-translate_gpa(vcpu, gfn_to_gpa(gfn), access); @@ -268,10 +266,9 @@ error: walker-error_code = 0; if (present) walker-error_code |= PFERR_PRESENT_MASK; - if (write_fault) - walker-error_code |= PFERR_WRITE_MASK; - if (user_fault) - walker-error_code |= PFERR_USER_MASK; + + walker-error_code |= write_fault | user_fault; + if (fetch_fault mmu-nx) walker-error_code |= PFERR_FETCH_MASK; if (rsvd_fault) @@ -673,9 +670,9 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, int r; r = FNAME(walk_addr)(walker, vcpu, vaddr, -!!(access PFERR_WRITE_MASK), -!!(access PFERR_USER_MASK), -!!(access PFERR_FETCH_MASK)); +access PFERR_WRITE_MASK, +access PFERR_USER_MASK, +access PFERR_FETCH_MASK); if (r) { gpa = gfn_to_gpa(walker.gfn); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: audit: fix vcpu's spte walking
From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com After nested nested paging, it may using long mode to shadow 32/PAE paging guest, so this patch fix it Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index bd2b1be..dcca3e7 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -51,7 +51,7 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn) if (!VALID_PAGE(vcpu-arch.mmu.root_hpa)) return; - if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { + if (vcpu-arch.mmu.root_level == PT64_ROOT_LEVEL) { hpa_t root = vcpu-arch.mmu.root_hpa; sp = page_header(root); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: audit: unregister audit tracepoints before module unloaded
From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com fix: Call Trace: [a01e46ba] ? kvm_mmu_pte_write+0x229/0x911 [kvm] [a01c6ba9] ? gfn_to_memslot+0x39/0xa0 [kvm] [a01c6c26] ? mark_page_dirty+0x16/0x2e [kvm] [a01c6d6f] ? kvm_write_guest_page+0x67/0x7f [kvm] [81066fbd] ? local_clock+0x2a/0x3b [a01d52ce] emulator_write_phys+0x46/0x54 [kvm] .. Code: Bad RIP value. RIP [a0172056] 0xa0172056 RSP 880134f69a70 CR2: a0172056 Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 88203fa..afde64b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3355,15 +3355,6 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu) return init_kvm_mmu(vcpu); } -void kvm_mmu_destroy(struct kvm_vcpu *vcpu) -{ - ASSERT(vcpu); - - destroy_kvm_mmu(vcpu); - free_mmu_pages(vcpu); - mmu_free_memory_caches(vcpu); -} - void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) { struct kvm_mmu_page *sp; @@ -3662,4 +3653,16 @@ EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy); #ifdef CONFIG_KVM_MMU_AUDIT #include mmu_audit.c +#else +static void mmu_audit_disable(void) { } #endif + +void kvm_mmu_destroy(struct kvm_vcpu *vcpu) +{ + ASSERT(vcpu); + + destroy_kvm_mmu(vcpu); + free_mmu_pages(vcpu); + mmu_free_memory_caches(vcpu); + mmu_audit_disable(); +} -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: audit: introduce audit_printk to cleanup audit code
From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Introduce audit_printk, and record audit point instead audit name Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index dcca3e7..66219af 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -19,7 +19,11 @@ #include linux/ratelimit.h -static const char *audit_msg; +static int audit_point; + +#define audit_printk(fmt, args...) \ + printk(KERN_ERR audit: (%s) error:\ + fmt, audit_point_name[audit_point], ##args) typedef void (*inspect_spte_fn) (struct kvm_vcpu *vcpu, u64 *sptep, int level); @@ -93,21 +97,18 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) if (sp-unsync) { if (level != PT_PAGE_TABLE_LEVEL) { - printk(KERN_ERR audit: (%s) error: unsync sp: %p level = %d\n, - audit_msg, sp, level); + audit_printk(unsync sp: %p level = %d\n, sp, level); return; } if (*sptep == shadow_notrap_nonpresent_pte) { - printk(KERN_ERR audit: (%s) error: notrap spte in unsync sp: %p\n, - audit_msg, sp); + audit_printk(notrap spte in unsync sp: %p\n, sp); return; } } if (sp-role.direct *sptep == shadow_notrap_nonpresent_pte) { - printk(KERN_ERR audit: (%s) error: notrap spte in direct sp: %p\n, - audit_msg, sp); + audit_printk(notrap spte in direct sp: %p\n, sp); return; } @@ -124,10 +125,8 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) hpa = pfn PAGE_SHIFT; if ((*sptep PT64_BASE_ADDR_MASK) != hpa) - printk(KERN_ERR xx audit error: (%s) levels %d - pfn %llx hpa %llx ent %llxn, - audit_msg, vcpu-arch.mmu.root_level, - pfn, hpa, *sptep); + audit_printk(levels %d pfn %llx hpa %llx ent %llxn, + vcpu-arch.mmu.root_level, pfn, hpa, *sptep); } static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) @@ -143,11 +142,9 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) if (!gfn_to_memslot(kvm, gfn)) { if (!printk_ratelimit()) return; - printk(KERN_ERR %s: no memslot for gfn %llx\n, -audit_msg, gfn); - printk(KERN_ERR %s: index %ld of sp (gfn=%llx)\n, - audit_msg, (long int)(sptep - rev_sp-spt), - rev_sp-gfn); + audit_printk(no memslot for gfn %llx\n, gfn); + audit_printk(index %ld of sp (gfn=%llx)\n, + (long int)(sptep - rev_sp-spt), rev_sp-gfn); dump_stack(); return; } @@ -156,8 +153,7 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) if (!*rmapp) { if (!printk_ratelimit()) return; - printk(KERN_ERR %s: no rmap for writable spte %llx\n, -audit_msg, *sptep); + audit_printk(no rmap for writable spte %llx\n, *sptep); dump_stack(); } } @@ -198,10 +194,8 @@ void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) spte = rmap_next(kvm, rmapp, NULL); while (spte) { if (is_writable_pte(*spte)) - printk(KERN_ERR %s: (%s) shadow page has - writable mappings: gfn %llx role %x\n, - __func__, audit_msg, sp-gfn, - sp-role.word); + audit_printk(shadow page has writable mappings: gfn +%llx role %x\n, sp-gfn, sp-role.word); spte = rmap_next(kvm, rmapp, spte); } } @@ -228,14 +222,14 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu) mmu_spte_walk(vcpu, audit_spte); } -static void kvm_mmu_audit(void *ignore, struct kvm_vcpu *vcpu, int audit_point) +static void kvm_mmu_audit(void *ignore, struct kvm_vcpu *vcpu, int point) { static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); if (!__ratelimit(ratelimit_state)) return; - audit_msg = audit_point_name[audit_point]; + audit_point = point; audit_all_active_sps(vcpu-kvm); audit_vcpu_spte(vcpu); } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info
[COMMIT master] KVM: MMU: Avoid sign extension in mmu_alloc_direct_roots() pae root address
From: Avi Kivity a...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ba7e764..dc1b4fb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2374,7 +2374,7 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) { struct kvm_mmu_page *sp; - int i; + unsigned i; if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { spin_lock(vcpu-kvm-mmu_lock); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: audit: check whether have unsync sps after root sync
From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com After root synced, all unsync sps are synced, this patch add a check to make sure it's no unsync sps in VCPU's page table Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index afde64b..ba7e764 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -53,14 +53,18 @@ enum { AUDIT_PRE_PAGE_FAULT, AUDIT_POST_PAGE_FAULT, AUDIT_PRE_PTE_WRITE, - AUDIT_POST_PTE_WRITE + AUDIT_POST_PTE_WRITE, + AUDIT_PRE_SYNC, + AUDIT_POST_SYNC }; char *audit_point_name[] = { pre page fault, post page fault, pre pte write, - post pte write + post pte write, + pre sync, + post sync }; #undef MMU_DEBUG @@ -2516,6 +2520,8 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) if (!VALID_PAGE(vcpu-arch.mmu.root_hpa)) return; + + trace_kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); if (vcpu-arch.mmu.root_level == PT64_ROOT_LEVEL) { hpa_t root = vcpu-arch.mmu.root_hpa; sp = page_header(root); @@ -2531,6 +2537,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) mmu_sync_children(vcpu, sp); } } + trace_kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); } void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 66219af..4aee32c 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -164,6 +164,14 @@ static void audit_sptes_have_rmaps(struct kvm_vcpu *vcpu, u64 *sptep, int level) inspect_spte_has_rmap(vcpu-kvm, sptep); } +static void audit_spte_after_sync(struct kvm_vcpu *vcpu, u64 *sptep, int level) +{ + struct kvm_mmu_page *sp = page_header(__pa(sptep)); + + if (audit_point == AUDIT_POST_SYNC sp-unsync) + audit_printk(meet unsync sp(%p) after sync root.\n, sp); +} + static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) { int i; @@ -179,7 +187,7 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) } } -void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) +static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) { struct kvm_memory_slot *slot; unsigned long *rmapp; @@ -215,6 +223,7 @@ static void audit_spte(struct kvm_vcpu *vcpu, u64 *sptep, int level) { audit_sptes_have_rmaps(vcpu, sptep, level); audit_mappings(vcpu, sptep, level); + audit_spte_after_sync(vcpu, sptep, level); } static void audit_vcpu_spte(struct kvm_vcpu *vcpu) -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: move access code parsing to FNAME(walk_addr) function
From: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Move access code parsing from caller site to FNAME(walk_addr) function Signed-off-by: Xiao Guangrong xiaoguangr...@cn.fujitsu.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a83ff37..9a5f7bb 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -116,16 +116,18 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) */ static int FNAME(walk_addr_generic)(struct guest_walker *walker, struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gva_t addr, int write_fault, - int user_fault, int fetch_fault) + gva_t addr, u32 access) { pt_element_t pte; gfn_t table_gfn; unsigned index, pt_access, uninitialized_var(pte_access); gpa_t pte_gpa; bool eperm, present, rsvd_fault; - int offset; - u32 access = 0; + int offset, write_fault, user_fault, fetch_fault; + + write_fault = access PFERR_WRITE_MASK; + user_fault = access PFERR_USER_MASK; + fetch_fault = access PFERR_FETCH_MASK; trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, fetch_fault); @@ -215,6 +217,7 @@ walk: int lvl = walker-level; gpa_t real_gpa; gfn_t gfn; + u32 ac; gfn = gpte_to_gfn_lvl(pte, lvl); gfn += (addr PT_LVL_OFFSET_MASK(lvl)) PAGE_SHIFT; @@ -224,10 +227,10 @@ walk: is_cpuid_PSE36()) gfn += pse36_gfn_delta(pte); - access |= write_fault | fetch_fault | user_fault; + ac = write_fault | fetch_fault | user_fault; real_gpa = mmu-translate_gpa(vcpu, gfn_to_gpa(gfn), - access); + ac); if (real_gpa == UNMAPPED_GVA) return 0; @@ -282,21 +285,18 @@ error: } static int FNAME(walk_addr)(struct guest_walker *walker, - struct kvm_vcpu *vcpu, gva_t addr, - int write_fault, int user_fault, int fetch_fault) + struct kvm_vcpu *vcpu, gva_t addr, u32 access) { return FNAME(walk_addr_generic)(walker, vcpu, vcpu-arch.mmu, addr, - write_fault, user_fault, fetch_fault); + access); } static int FNAME(walk_addr_nested)(struct guest_walker *walker, struct kvm_vcpu *vcpu, gva_t addr, - int write_fault, int user_fault, - int fetch_fault) + u32 access) { return FNAME(walk_addr_generic)(walker, vcpu, vcpu-arch.nested_mmu, - addr, write_fault, user_fault, - fetch_fault); + addr, access); } static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, @@ -532,7 +532,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, { int write_fault = error_code PFERR_WRITE_MASK; int user_fault = error_code PFERR_USER_MASK; - int fetch_fault = error_code PFERR_FETCH_MASK; struct guest_walker walker; u64 *sptep; int write_pt = 0; @@ -550,8 +549,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, /* * Look up the guest pte for the faulting address. */ - r = FNAME(walk_addr)(walker, vcpu, addr, write_fault, user_fault, -fetch_fault); + r = FNAME(walk_addr)(walker, vcpu, addr, error_code); /* * The page is not mapped by the guest. Let the guest handle it. @@ -669,10 +667,7 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, gpa_t gpa = UNMAPPED_GVA; int r; - r = FNAME(walk_addr)(walker, vcpu, vaddr, -access PFERR_WRITE_MASK, -access PFERR_USER_MASK, -access PFERR_FETCH_MASK); + r = FNAME(walk_addr)(walker, vcpu, vaddr, access); if (r) { gpa = gfn_to_gpa(walker.gfn); @@ -690,10 +685,7 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, gpa_t gpa = UNMAPPED_GVA; int r; - r = FNAME(walk_addr_nested)(walker, vcpu, vaddr, - access PFERR_WRITE_MASK, -
[COMMIT master] KVM: Disable interrupts around get_kernel_ns()
From: Avi Kivity a...@redhat.com get_kernel_ns() wants preemption disabled. It doesn't make a lot of sense during the get/set ioctls (no way to make them non-racy) but the callee wants it. Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3729bcb..899acbb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3469,8 +3469,10 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; r = 0; + local_irq_disable(); now_ns = get_kernel_ns(); delta = user_ns.clock - now_ns; + local_irq_enable(); kvm-arch.kvmclock_offset = delta; break; } @@ -3478,8 +3480,10 @@ long kvm_arch_vm_ioctl(struct file *filp, struct kvm_clock_data user_ns; u64 now_ns; + local_irq_disable(); now_ns = get_kernel_ns(); user_ns.clock = kvm-arch.kvmclock_offset + now_ns; + local_irq_enable(); user_ns.flags = 0; r = -EFAULT; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: USB tablet CPU usage pattern. Possible bug?
05.10.2010 09:36, Thomas Løcke wrote: Hey all, The other day I upgraded the kernel on one of my KVM hosts. I went from 2.6.34.1 to 2.6.35.7, and immediately I noticed that my Windows XP guests was now using significantly more CPU while idle, compared to the 2.6.34.1 kernel. All the Windows XP guests are running with -usbdevice tablet. Using the 2.6.34.1 kernel idle CPU usage for the Windows XP guests was sitting at ~5%, with spikes going as high as 10%. Using 2.6.35.7 these numbers were ~20%, with spikes going as high as 35%. Everything appeared to work as usual, except for this higher idle load. I'm using qemu-kvm 0.12.50. All images are raw. No SMP for the Windows XP guests. I downgraded to 2.6.34.7, and the CPU load pattern is now back to normal. The server is a Sun Fire X4270 (dual quad core Xeon 5520, 24GB RAM) running Slackware 13.1 x86_64. I've no idea whether this is intentional, or if it is a bug in the kernel, the KVM modules or in qemu-kvm. It's not only usb tablet, it's any usb device. There were a long thread started by me a while back, initially titled high load with win7 usb tablet and later renamed to high load with usb device here on k...@vger. But there's nothing conclusive in there. There are also several threads on LKML (linux kernel mailing list) about high power consumption and high idle load with recent (2.6.35+ and even some stable) kernels, also without anything conclusive. The two (high cpu utilisation in kvm while guest is only checking usb devices and high idle load in recent kernels) may be related or may be not. It'd be nice to find out what the problem(s) is(are), but so far it weren't done. /mjt -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: unit tests and get_user_pages_ptes_fast()
On 10/05/2010 01:59 AM, Marcelo Tosatti wrote: Yep, the drawback is the unnecessary write fault. What i have here is: --- kvm.orig/virt/kvm/kvm_main.c +++ kvm/virt/kvm/kvm_main.c @@ -827,7 +827,7 @@ unsigned long gfn_to_hva(struct kvm *kvm } EXPORT_SYMBOL_GPL(gfn_to_hva); -pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) +pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn, int *writable) { struct page *page[1]; unsigned long addr; @@ -842,8 +842,16 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t return page_to_pfn(bad_page); } + *writable = 1; npages = get_user_pages_fast(addr, 1, 1, page); + /* attempt to map read-only */ + if (unlikely(npages != 1)) { + npages = get_user_pages_fast(addr, 1, 0, page); + if (npages == 1) + *writable = 0; + } + if (unlikely(npages != 1)) { struct vm_area_struct *vma; Can rebase and resend, if you'd like. That will work for me but not for ksm. I guess it's good to get things going, so please to post it. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 14/18] KVM test: Add a netperf subtest
In Todo list i find TCP_CRR UDP_RR test case failures. 2) netperf 17:35:11 DEBUG| Execute netperf client test: /root/autotest/client/tests/netperf2/netperf-2.4.5/src/netperf -t TCP_CRR -H 10.16.74.142 -l 60 -- -m 1 17:35:45 ERROR| Fail to execute netperf test, protocol:TCP_CRR 17:35:45 DEBUG| Execute netperf client test: /root/autotest/client/tests/netperf2/netperf-2.4.5/src/netperf -t UDP_RR -H 10.16.74.142 -l 60 -- -m 1 17:36:06 ERROR| Fail to execute netperf test, protocol:UDP_RR I havent noticed any issues with UDP_RR But with RHEL 5.5 guest TCP_CRR fails. with other RHEL latest distro it works fine. Need to figure out if its test issue or RHEL 5.5 issue. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [v2 RFC PATCH 0/4] Implement multiqueue virtio-net
Michael S. Tsirkin m...@redhat.com wrote on 09/19/2010 06:14:43 PM: Could you document how exactly do you measure multistream bandwidth: netperf flags, etc? All results were without any netperf flags or system tuning: for i in $list do netperf -c -C -l 60 -H 192.168.122.1 /tmp/netperf.$$.$i done wait Another script processes the result files. It also displays the start time/end time of each iteration to make sure skew due to parallel netperfs is minimal. I changed the vhost functionality once more to try to get the best model, the new model being: 1. #numtxqs=1 - #vhosts=1, this thread handles both RX/TX. 2. #numtxqs1 - vhost[0] handles RX and vhost[1-MAX] handles TX[0-n], where MAX is 4. Beyond numtxqs=4, the remaining TX queues are handled by vhost threads in round-robin fashion. Results from here on are with these changes, and only tuning is to set each vhost's affinity to CPUs[0-3] (taskset -p f vhost-pids). Any idea where does this come from? Do you see more TX interrupts? RX interrupts? Exits? Do interrupts bounce more between guest CPUs? 4. Identify reasons for single netperf BW regression. After testing various combinations of #txqs, #vhosts, #netperf sessions, I think the drop for 1 stream is due to TX and RX for a flow being processed on different cpus. I did two more tests: 1. Pin vhosts to same CPU: - BW drop is much lower for 1 stream case (- 5 to -8% range) - But performance is not so high for more sessions. 2. Changed vhost to be single threaded: - No degradation for 1 session, and improvement for upto 8, sometimes 16 streams (5-12%). - BW degrades after that, all the way till 128 netperf sessions. - But overall CPU utilization improves. Summary of the entire run (for 1-128 sessions): txq=4: BW: (-2.3) CPU: (-16.5)RCPU: (-5.3) txq=16: BW: (-1.9) CPU: (-24.9)RCPU: (-9.6) I don't see any reasons mentioned above. However, for higher number of netperf sessions, I see a big increase in retransmissions: ___ #netperf ORG NEW BW (#retr)BW (#retr) ___ 1 70244 (0) 64102 (0) 4 21421 (0) 36570 (416) 8 21746 (0) 38604 (148) 16 21783 (0) 40632 (464) 32 22677 (0) 37163 (1053) 64 23648 (4) 36449 (2197) 12823251 (2) 31676 (3185) ___ Single netperf case didn't have any retransmissions so that is not the cause for drop. I tested ixgbe (MQ): ___ #netperf ixgbe ixgbe (pin intrs to cpu#0 on both server/client) BW (#retr) BW (#retr) ___ 1 3567 (117) 6000 (251) 2 4406 (477) 6298 (725) 4 6119 (1085) 7208 (3387) 8 6595 (4276) 7381 (15296) 16 6651 (11651)6856 (30394) ___ 5. Test perf in more scenarious: small packets 512 byte packets - BW drop for upto 8 (sometimes 16) netperf sessions, but increases with #sessions: ___ # BW1 BW2 (%) CPU1CPU2 (%)RCPU1 RCPU2 (%) ___ 1 40433800 (-6.0) 50 50 (0) 86 98 (13.9) 2 83587485 (-10.4)153 178 (16.3) 230 264 (14.7) 4 20664 13567 (-34.3) 448 490 (9.3) 530 624 (17.7) 8 25198 17590 (-30.1) 967 1021 (5.5) 10851257 (15.8) 16 23791 24057 (1.1) 19042220 (16.5) 21562578 (19.5) 24 23055 26378 (14.4)28073378 (20.3) 32253901 (20.9) 32 22873 27116 (18.5)37484525 (20.7) 43075239 (21.6) 40 22876 29106 (27.2)47055717 (21.5) 53886591 (22.3) 48 23099 31352 (35.7)56426986 (23.8) 64758085 (24.8) 64 22645 30563 (34.9)75279027 (19.9) 861910656 (23.6) 80 22497 31922 (41.8)937511390 (21.4)10736 13485 (25.6) 96 22509 32718 (45.3)11271 13710 (21.6)12927 16269 (25.8) 128 22255 32397 (45.5)15036 18093 (20.3)17144 21608 (26.0) ___ SUM:BW: (16.7) CPU: (20.6) RCPU: (24.3) ___ host - guest ___ # BW1 BW2 (%) CPU1CPU2 (%)RCPU1
Re: [PATCH] virtio: Use ioeventfd for virtqueue notify
Hi, W.r.t: Note that this is a tradeoff. If an idle core is available and the scheduler places the iothread on that core, then the heavyweight exit is replaced by a lightweight exit + IPI. If the iothread is co-located with the vcpu, then we'll take a heavyweight exit in any case. Q: Does the kvm kernel code check for such a condition and take a heavyweight exit? The first case is very likely if the host cpu is undercommitted and there is heavy I/O activity. This is a typical subsystem benchmark scenario (as opposed to a system benchmark like specvirt). My feeling is that total system throughput will be decreased unless the scheduler is clever enough to place the iothread and vcpu on the same host cpu when the system is overcommitted. Q: Sorry if the answer is obvious here. If the heavyweight exit is taken when both threads are assigned to the same core, how will the system throughput increase? Thanks Rukhsana -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] virtio: Use ioeventfd for virtqueue notify
On 10/05/2010 01:00 PM, rukhsana ansari wrote: Hi, W.r.t: Note that this is a tradeoff. If an idle core is available and the scheduler places the iothread on that core, then the heavyweight exit is replaced by a lightweight exit + IPI. If the iothread is co-located with the vcpu, then we'll take a heavyweight exit in any case. Q: Does the kvm kernel code check for such a condition and take a heavyweight exit? No. The heavyweight exit is caused by a context switch (partial) or return to userspace (full). The first case is very likely if the host cpu is undercommitted and there is heavy I/O activity. This is a typical subsystem benchmark scenario (as opposed to a system benchmark like specvirt). My feeling is that total system throughput will be decreased unless the scheduler is clever enough to place the iothread and vcpu on the same host cpu when the system is overcommitted. Q: Sorry if the answer is obvious here. If the heavyweight exit is taken when both threads are assigned to the same core, how will the system throughput increase? Co-locating threads on the same core reduces cross-core traffic. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 0/3] allow read-only memory mappings
This should probably exit as MMIO instead of custom exit code, for IO_MEM_ROM slots. But then, unsure if IO_MEM_ROM areas should be mprotected (meaning QEMU has to handle SIGBUS for its own accesses), or the attribute set in a slots flag. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 1/3] KVM: VMX: remove setting of shadow_base_ptes for EPT
The EPT present/writable bits use the same position as normal pagetable bits. Since direct_map passes ACC_ALL to mmu_set_spte, thus always setting the writable bit on sptes, use the generic PT_PRESENT shadow_base_pte. Also pass present/writable error code information from EPT violation to generic pagefault handler. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Index: kvm/arch/x86/kvm/vmx.c === --- kvm.orig/arch/x86/kvm/vmx.c +++ kvm/arch/x86/kvm/vmx.c @@ -3483,7 +3483,7 @@ static int handle_ept_violation(struct k gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); trace_kvm_page_fault(gpa, exit_qualification); - return kvm_mmu_page_fault(vcpu, gpa PAGE_MASK, 0); + return kvm_mmu_page_fault(vcpu, gpa, exit_qualification 0x3); } static u64 ept_rsvd_mask(u64 spte, int level) @@ -4408,8 +4408,6 @@ static int __init vmx_init(void) if (enable_ept) { bypass_guest_pf = 0; - kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | - VMX_EPT_WRITABLE_MASK); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, VMX_EPT_EXECUTABLE_MASK); kvm_enable_tdp(); -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch 3/3] KVM: handle read-only host ptes
Instantiate read-only spte if host pte is read-only, and exit to userspace if guest attempts to write. With this in place userspace can mprotect(PROT_READ) guest memory and handle write attempts. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Index: kvm/arch/x86/kvm/mmu.c === --- kvm.orig/arch/x86/kvm/mmu.c +++ kvm/arch/x86/kvm/mmu.c @@ -2205,7 +2205,14 @@ static void direct_pte_prefetch(struct k __direct_pte_prefetch(vcpu, sp, sptep); } -static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, +static int kvm_report_unallowed_write(struct kvm_vcpu *vcpu, gpa_t gpa) +{ + vcpu-run-exit_reason = KVM_EXIT_UNALLOWED_WRITE; + vcpu-run-unallowed_write.gpa = gpa; + return -EPERM; +} + +static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, int dirty, int level, gfn_t gfn, pfn_t pfn) { struct kvm_shadow_walk_iterator iterator; @@ -2216,7 +2223,7 @@ static int __direct_map(struct kvm_vcpu for_each_shadow_entry(vcpu, (u64)gfn PAGE_SHIFT, iterator) { if (iterator.level == level) { mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, -0, write, 1, pt_write, +0, write, dirty, pt_write, level, gfn, pfn, false, true); direct_pte_prefetch(vcpu, iterator.sptep); ++vcpu-stat.pf_fixed; @@ -2269,13 +2276,15 @@ static int kvm_handle_bad_page(struct kv return 1; } -static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) +static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write) { int r; int level; int writable; + int dirty = 1; pfn_t pfn; unsigned long mmu_seq; + gfn_t gfn = v PAGE_SHIFT; level = mapping_level(vcpu, gfn); @@ -2293,14 +2302,22 @@ static int nonpaging_map(struct kvm_vcpu pfn = gfn_to_pfn(vcpu-kvm, gfn, writable); /* mmio */ - if (is_error_pfn(pfn) || !writable) + if (is_error_pfn(pfn)) return kvm_handle_bad_page(vcpu-kvm, gfn, pfn); + if (!writable) { + if (write) { + kvm_release_pfn_clean(pfn); + return kvm_report_unallowed_write(vcpu, v); + } + /* instantiate read-only spte */ + dirty = 0; + } spin_lock(vcpu-kvm-mmu_lock); if (mmu_notifier_retry(vcpu, mmu_seq)) goto out_unlock; kvm_mmu_free_some_pages(vcpu); - r = __direct_map(vcpu, v, write, level, gfn, pfn); + r = __direct_map(vcpu, v, write, dirty, level, gfn, pfn); spin_unlock(vcpu-kvm-mmu_lock); @@ -2559,7 +2576,6 @@ static gpa_t nonpaging_gva_to_gpa_nested static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code) { - gfn_t gfn; int r; pgprintk(%s: gva %lx error %x\n, __func__, gva, error_code); @@ -2570,10 +2586,7 @@ static int nonpaging_page_fault(struct k ASSERT(vcpu); ASSERT(VALID_PAGE(vcpu-arch.mmu.root_hpa)); - gfn = gva PAGE_SHIFT; - - return nonpaging_map(vcpu, gva PAGE_MASK, -error_code PFERR_WRITE_MASK, gfn); + return nonpaging_map(vcpu, gva, error_code PFERR_WRITE_MASK); } static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, @@ -2584,6 +2597,7 @@ static int tdp_page_fault(struct kvm_vcp int level; int writable; int write = error_code PFERR_WRITE_MASK; + int dirty = 1; gfn_t gfn = gpa PAGE_SHIFT; unsigned long mmu_seq; @@ -2601,13 +2615,22 @@ static int tdp_page_fault(struct kvm_vcp mmu_seq = vcpu-kvm-mmu_notifier_seq; smp_rmb(); pfn = gfn_to_pfn(vcpu-kvm, gfn, writable); - if (is_error_pfn(pfn) || !writable) + if (is_error_pfn(pfn)) return kvm_handle_bad_page(vcpu-kvm, gfn, pfn); + if (!writable) { + if (write) { + kvm_release_pfn_clean(pfn); + return kvm_report_unallowed_write(vcpu, gpa); + } + /* instantiate read-only spte */ + dirty = 0; + } + spin_lock(vcpu-kvm-mmu_lock); if (mmu_notifier_retry(vcpu, mmu_seq)) goto out_unlock; kvm_mmu_free_some_pages(vcpu); - r = __direct_map(vcpu, gpa, write, level, gfn, pfn); + r = __direct_map(vcpu, gpa, write, dirty, level, gfn, pfn); spin_unlock(vcpu-kvm-mmu_lock); return r; @@ -3261,8 +3284,11 @@ int kvm_mmu_page_fault(struct kvm_vcpu * enum emulation_result er; r = vcpu-arch.mmu.page_fault(vcpu, cr2, error_code); - if (r 0) + if (r 0) { +
[patch 2/3] KVM: dont require read-only host ptes
gfn_to_pfn requires a writable host pte, failing otherwise. Change it to fallback to read-only acquision', informing the callers. Hopefully the ptes are cache-hot so the overhead is minimal. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Index: kvm/arch/ia64/kvm/kvm-ia64.c === --- kvm.orig/arch/ia64/kvm/kvm-ia64.c +++ kvm/arch/ia64/kvm/kvm-ia64.c @@ -1589,7 +1589,7 @@ int kvm_arch_prepare_memory_region(struc return -ENOMEM; for (i = 0; i npages; i++) { - pfn = gfn_to_pfn(kvm, base_gfn + i); + pfn = gfn_to_pfn(kvm, base_gfn + i, NULL); if (!kvm_is_mmio_pfn(pfn)) { kvm_set_pmt_entry(kvm, base_gfn + i, pfn PAGE_SHIFT, Index: kvm/arch/x86/kvm/mmu.c === --- kvm.orig/arch/x86/kvm/mmu.c +++ kvm/arch/x86/kvm/mmu.c @@ -2273,6 +2273,7 @@ static int nonpaging_map(struct kvm_vcpu { int r; int level; + int writable; pfn_t pfn; unsigned long mmu_seq; @@ -2289,10 +2290,10 @@ static int nonpaging_map(struct kvm_vcpu mmu_seq = vcpu-kvm-mmu_notifier_seq; smp_rmb(); - pfn = gfn_to_pfn(vcpu-kvm, gfn); + pfn = gfn_to_pfn(vcpu-kvm, gfn, writable); /* mmio */ - if (is_error_pfn(pfn)) + if (is_error_pfn(pfn) || !writable) return kvm_handle_bad_page(vcpu-kvm, gfn, pfn); spin_lock(vcpu-kvm-mmu_lock); @@ -2581,6 +2582,8 @@ static int tdp_page_fault(struct kvm_vcp pfn_t pfn; int r; int level; + int writable; + int write = error_code PFERR_WRITE_MASK; gfn_t gfn = gpa PAGE_SHIFT; unsigned long mmu_seq; @@ -2597,15 +2600,14 @@ static int tdp_page_fault(struct kvm_vcp mmu_seq = vcpu-kvm-mmu_notifier_seq; smp_rmb(); - pfn = gfn_to_pfn(vcpu-kvm, gfn); - if (is_error_pfn(pfn)) + pfn = gfn_to_pfn(vcpu-kvm, gfn, writable); + if (is_error_pfn(pfn) || !writable) return kvm_handle_bad_page(vcpu-kvm, gfn, pfn); spin_lock(vcpu-kvm-mmu_lock); if (mmu_notifier_retry(vcpu, mmu_seq)) goto out_unlock; kvm_mmu_free_some_pages(vcpu); - r = __direct_map(vcpu, gpa, error_code PFERR_WRITE_MASK, -level, gfn, pfn); + r = __direct_map(vcpu, gpa, write, level, gfn, pfn); spin_unlock(vcpu-kvm-mmu_lock); return r; @@ -3043,6 +3045,7 @@ static void mmu_guess_page_from_pte_writ { gfn_t gfn; pfn_t pfn; + int writable; if (!is_present_gpte(gpte)) return; @@ -3050,9 +3053,9 @@ static void mmu_guess_page_from_pte_writ vcpu-arch.update_pte.mmu_seq = vcpu-kvm-mmu_notifier_seq; smp_rmb(); - pfn = gfn_to_pfn(vcpu-kvm, gfn); + pfn = gfn_to_pfn(vcpu-kvm, gfn, writable); - if (is_error_pfn(pfn)) { + if (is_error_pfn(pfn) || !writable) { kvm_release_pfn_clean(pfn); return; } Index: kvm/arch/x86/kvm/paging_tmpl.h === --- kvm.orig/arch/x86/kvm/paging_tmpl.h +++ kvm/arch/x86/kvm/paging_tmpl.h @@ -536,6 +536,7 @@ static int FNAME(page_fault)(struct kvm_ int write_fault = error_code PFERR_WRITE_MASK; int user_fault = error_code PFERR_USER_MASK; int fetch_fault = error_code PFERR_FETCH_MASK; + int writable; struct guest_walker walker; u64 *sptep; int write_pt = 0; @@ -573,10 +574,10 @@ static int FNAME(page_fault)(struct kvm_ mmu_seq = vcpu-kvm-mmu_notifier_seq; smp_rmb(); - pfn = gfn_to_pfn(vcpu-kvm, walker.gfn); + pfn = gfn_to_pfn(vcpu-kvm, walker.gfn, writable); /* mmio */ - if (is_error_pfn(pfn)) + if (is_error_pfn(pfn) || !writable) return kvm_handle_bad_page(vcpu-kvm, walker.gfn, pfn); spin_lock(vcpu-kvm-mmu_lock); Index: kvm/include/linux/kvm_host.h === --- kvm.orig/include/linux/kvm_host.h +++ kvm/include/linux/kvm_host.h @@ -302,7 +302,7 @@ void kvm_set_page_accessed(struct page * pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr); pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); -pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); +pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn, int *writable); pfn_t gfn_to_pfn_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn); int memslot_id(struct kvm *kvm, gfn_t gfn); Index: kvm/virt/kvm/kvm_main.c === --- kvm.orig/virt/kvm/kvm_main.c +++ kvm/virt/kvm/kvm_main.c @@ -948,17 +948,28 @@ unsigned long gfn_to_hva(struct kvm *kvm }
Re: unit tests and get_user_pages_ptes_fast()
On Tue, Oct 05, 2010 at 09:36:59AM +0200, Avi Kivity wrote: On 10/05/2010 01:59 AM, Marcelo Tosatti wrote: Yep, the drawback is the unnecessary write fault. What i have here is: --- kvm.orig/virt/kvm/kvm_main.c +++ kvm/virt/kvm/kvm_main.c @@ -827,7 +827,7 @@ unsigned long gfn_to_hva(struct kvm *kvm } EXPORT_SYMBOL_GPL(gfn_to_hva); -pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) +pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn, int *writable) { struct page *page[1]; unsigned long addr; @@ -842,8 +842,16 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t return page_to_pfn(bad_page); } + *writable = 1; npages = get_user_pages_fast(addr, 1, 1, page); + /* attempt to map read-only */ + if (unlikely(npages != 1)) { + npages = get_user_pages_fast(addr, 1, 0, page); + if (npages == 1) + *writable = 0; + } + if (unlikely(npages != 1)) { struct vm_area_struct *vma; Can rebase and resend, if you'd like. That will work for me but not for ksm. I guess it's good to get things going, so please to post it. It'll not be so advantageous for ksm because there should be read-faults very rarely on that case. Will post. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 11/18] KVM test: Add a subtest of multicast
On Mon, Sep 27, 2010 at 06:43:57PM -0400, Lucas Meneghel Rodrigues wrote: From: Amos Kong ak...@redhat.com Use 'ping' to test send/recive multicat packets. Flood ping test is also added. Limit guest network as 'bridge' mode, because multicast packets could not be transmitted to guest when using 'user' network. Add join_mcast.py for joining machine into multicast groups. Changes from v1: - Just flush the firewall rules with iptables -F Signed-off-by: Amos Kong ak...@redhat.com --- client/tests/kvm/scripts/join_mcast.py | 37 + client/tests/kvm/tests/multicast.py| 91 client/tests/kvm/tests_base.cfg.sample |9 +++- 3 files changed, 136 insertions(+), 1 deletions(-) create mode 100755 client/tests/kvm/scripts/join_mcast.py create mode 100644 client/tests/kvm/tests/multicast.py diff --git a/client/tests/kvm/scripts/join_mcast.py b/client/tests/kvm/scripts/join_mcast.py new file mode 100755 index 000..350cd5f --- /dev/null +++ b/client/tests/kvm/scripts/join_mcast.py @@ -0,0 +1,37 @@ +#!/usr/bin/python +import socket, struct, os, signal, sys +# -*- coding: utf-8 -*- + + +Script used to join machine into multicast groups. + +...@author Amos Kong ak...@redhat.com + + +if __name__ == __main__: +if len(sys.argv) 4: +print %s [mgroup_count] [prefix] [suffix] +mgroup_count: count of multicast addresses +prefix: multicast address prefix +suffix: multicast address suffix % sys.argv[0] +sys.exit() + +mgroup_count = int(sys.argv[1]) +prefix = sys.argv[2] +suffix = int(sys.argv[3]) + +s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +for i in range(mgroup_count): +mcast = prefix + . + str(suffix + i) +try: +mreq = struct.pack(4sl, socket.inet_aton(mcast), + socket.INADDR_ANY) +s.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, mreq) +except: +s.close() +print Could not join multicast: %s % mcast +raise + +print join_mcast_pid:%s % os.getpid() +os.kill(os.getpid(), signal.SIGSTOP) +s.close() diff --git a/client/tests/kvm/tests/multicast.py b/client/tests/kvm/tests/multicast.py new file mode 100644 index 000..d1674a6 --- /dev/null +++ b/client/tests/kvm/tests/multicast.py @@ -0,0 +1,91 @@ +import logging, os, re +from autotest_lib.client.common_lib import error +from autotest_lib.client.bin import utils +import kvm_test_utils + + +def run_multicast(test, params, env): + +Test multicast function of nic (rtl8139/e1000/virtio) + +1) Create a VM. +2) Join guest into multicast groups. +3) Ping multicast addresses on host. +4) Flood ping test with different size of packets. +5) Final ping test and check if lose packet. + +@param test: KVM test object. +@param params: Dictionary with the test parameters. +@param env: Dictionary with test environment. + +vm = kvm_test_utils.get_living_vm(env, params.get(main_vm)) +session = kvm_test_utils.wait_for_login(vm, + timeout=int(params.get(login_timeout, 360))) + +def run_guest(cmd): +s, o = session.get_command_status_output(cmd) +if s: +logging.warning('Command %s executed in guest returned exit code ' +'%s, output: %s', cmd, s, o.strip()) + +def run_host_guest(cmd): +run_guest(cmd) +utils.system(cmd, ignore_status=True) + +# flush the firewall rules +cmd_flush = iptables -F +cmd_selinux = (if [ -e /selinux/enforce ]; then setenforce 0; + else echo 'no /selinux/enforce file present'; fi) +run_host_guest(cmd_flush) +run_host_guest(cmd_selinux) +# make sure guest replies to broadcasts +cmd_broadcast = echo 0 /proc/sys/net/ipv4/icmp_echo_ignore Hello pradeep, thanks for your test, it's caused by this error cmd_broadcast = echo 0 /proc/sys/net/ipv4/icmp_echo_ignore_broadcasts +cmd_broadcast_2 = echo 0 /proc/sys/net/ipv4/icmp_echo_ignore_all +run_guest(cmd_broadcast) +run_guest(cmd_broadcast_2) + +# base multicast address +mcast = params.get(mcast, 225.0.0.1) +# count of multicast addresses, less than 20 +mgroup_count = int(params.get(mgroup_count, 5)) +flood_minutes = float(params.get(flood_minutes, 10)) +ifname = vm.get_ifname() +prefix = re.findall(\d+.\d+.\d+, mcast)[0] +suffix = int(re.findall(\d+, mcast)[-1]) +# copy python script to guest for joining guest to multicast groups +mcast_path = os.path.join(test.bindir, scripts/join_mcast.py) +if not vm.copy_files_to(mcast_path, /tmp): +raise error.TestError(Fail to copy %s to guest % mcast_path) +output = session.get_command_output(python
Re: [PATCH 11/18] KVM test: Add a subtest of multicast
On Tue, 2010-10-05 at 20:21 +0800, Amos Kong wrote: On Mon, Sep 27, 2010 at 06:43:57PM -0400, Lucas Meneghel Rodrigues wrote: From: Amos Kong ak...@redhat.com +# make sure guest replies to broadcasts +cmd_broadcast = echo 0 /proc/sys/net/ipv4/icmp_echo_ignore Hello pradeep, thanks for your test, it's caused by this error cmd_broadcast = echo 0 /proc/sys/net/ipv4/icmp_echo_ignore_broadcasts ^ Ouch, I am dumb :( Amos, I've just fixed it, will publish an updated version of this patch, and remove it from the todo list. Thank you very much! -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] kvm: add oom notifier for virtio balloon
Balloon could cause guest memory oom killing and panic. Add oom notify to leak some memory and retry fill balloon after 5 minutes. At the same time add a mutex to protect balloon operations because we need leak balloon in oom notifier and give back freed value. Thanks Anthony Liguori for his sugestion about inflate retrying. Sometimes it will cause endless inflate/oom/delay loop, so I think next step is to add an option to do noretry-when-oom balloon. Signed-off-by: Dave Young hidave.darks...@gmail.com --- drivers/virtio/virtio_balloon.c | 92 1 file changed, 75 insertions(+), 17 deletions(-) --- linux-2.6.orig/drivers/virtio/virtio_balloon.c 2010-10-02 10:35:44.72335 +0800 +++ linux-2.6/drivers/virtio/virtio_balloon.c 2010-10-05 10:40:24.740001466 +0800 @@ -2,6 +2,7 @@ * Tosatti's implementations. * * Copyright 2008 Rusty Russell IBM Corporation + * oom notify - Dave Young hidave.darks...@gmail.com * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,6 +26,14 @@ #include linux/freezer.h #include linux/delay.h #include linux/slab.h +#include linux/notifier.h +#include linux/param.h +#include linux/timer.h +#include linux/jiffies.h +#include linux/oom.h + +#define BALLOON_OOM_DELAY_MINUTES 5 +#define BALLOON_OOM_PAGES 256 struct virtio_balloon { @@ -54,6 +63,10 @@ struct virtio_balloon /* Memory statistics */ int need_stats_update; struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; + + struct mutex mutex; + struct timer_list timer; + struct notifier_block oom_nb; }; static struct virtio_device_id id_table[] = { @@ -97,34 +110,37 @@ static void tell_host(struct virtio_ball wait_for_completion(vb-acked); } +static void balloon_oom_timeout(unsigned long arg) +{ + struct virtio_balloon *v = (struct virtio_balloon *)arg; + + wake_up(v-config_change); +} + static void fill_balloon(struct virtio_balloon *vb, size_t num) { /* We can only do one array worth at a time. */ num = min(num, ARRAY_SIZE(vb-pfns)); for (vb-num_pfns = 0; vb-num_pfns num; vb-num_pfns++) { - struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY | + struct page *page; + + if (unlikely(timer_pending(vb-timer))) + break; + + page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); - if (!page) { - if (printk_ratelimit()) - dev_printk(KERN_INFO, vb-vdev-dev, - Out of puff! Can't get %zu pages\n, - num); - /* Sleep for at least 1/5 of a second before retry. */ - msleep(200); + if (!page) break; - } + vb-pfns[vb-num_pfns] = page_to_balloon_pfn(page); totalram_pages--; vb-num_pages++; list_add(page-lru, vb-pages); } - /* Didn't get any? Oh well. */ - if (vb-num_pfns == 0) - return; - - tell_host(vb, vb-inflate_vq); + if (vb-num_pfns) + tell_host(vb, vb-inflate_vq); } static void release_pages_by_pfn(const u32 pfns[], unsigned int num) @@ -235,22 +251,53 @@ static void virtballoon_changed(struct v static inline s64 towards_target(struct virtio_balloon *vb) { - u32 v; + u32 v, ret; vb-vdev-config-get(vb-vdev, offsetof(struct virtio_balloon_config, num_pages), v, sizeof(v)); - return (s64)v - vb-num_pages; + ret = (s64)v - vb-num_pages; + + if (ret 0 (unlikely(timer_pending(vb-timer { + printk(KERN_INFO balloon will delay inflate due to oom ...\n); + return 0; + } + + return ret; } static void update_balloon_size(struct virtio_balloon *vb) { - __le32 actual = cpu_to_le32(vb-num_pages); + __le32 actual; + actual = cpu_to_le32(vb-num_pages); vb-vdev-config-set(vb-vdev, offsetof(struct virtio_balloon_config, actual), actual, sizeof(actual)); } +static int balloon_oom_notify(struct notifier_block *self, + unsigned long dummy, void *parm) +{ + struct virtio_balloon *vb; + unsigned long *freed = (unsigned long *)parm; + unsigned int nr; + + vb = container_of(self, struct virtio_balloon, oom_nb); + + mutex_lock(vb-mutex); + nr = min_t(unsigned int, vb-num_pages, BALLOON_OOM_PAGES); + if (nr) { + printk(KERN_INFO balloon
Re: [PATCH 16/18] KVM test: Improve vlan subtest
On Thu, Sep 30, 2010 at 12:29:40PM +0530, pradeep wrote: On Mon, 27 Sep 2010 18:44:02 -0400 Lucas Meneghel Rodrigues l...@redhat.com wrote: From: Amos Kong ak...@redhat.com This is an enhancement of existed vlan test. Rename the vlan_tag.py to vlan.py, it is more reasonable. . Setup arp from /proc/sys/net/ipv4/conf/all/arp_ignore . Multiple vlans exist simultaneously . Test ping between same and different vlans . Test by TCP data transfer, floop ping between same vlan . Maximal plumb/unplumb vlans + +vm.append(kvm_test_utils.get_living_vm(env, params.get(main_vm))) +vm.append(kvm_test_utils.get_living_vm(env, vm2)) + +def add_vlan(session, id, iface=eth0): +if session.get_command_status(vconfig add %s %s % (iface, id)) != 0: +raise error.TestError(Fail to add %s.%s % (iface, id)) HI Lucas I got below error with my guests. With (2.6.32-71 kernel) guest 21:17:23 DEBUG| Sending command: vconfig add eth0 1 21:17:23 DEBUG| Command failed; status: 3, output: ERROR: trying to add VLAN #1 to IF -:eth0:- error: No such device it was caused by the hardcode interface name. lucas, attached the latest vlan.py. try to get right ifname by kvm_test_utils.get_linux_ifname() import logging, time, re from autotest_lib.client.common_lib import error import kvm_test_utils, kvm_utils def run_vlan(test, params, env): Test 802.1Q vlan of NIC, config it by vconfig command. 1) Create two VMs. 2) Setup guests in 10 different vlans by vconfig and using hard-coded ip address. 3) Test by ping between same and different vlans of two VMs. 4) Test by TCP data transfer, floop ping between same vlan of two VMs. 5) Test maximal plumb/unplumb vlans. 6) Recover the vlan config. @param test: KVM test object. @param params: Dictionary with the test parameters. @param env: Dictionary with test environment. vm = [] session = [] ifname = [] vm_ip = [] digest_origin = [] vlan_ip = ['', ''] ip_unit = ['1', '2'] subnet = params.get(subnet) vlan_num = int(params.get(vlan_num)) maximal = int(params.get(maximal)) file_size = params.get(file_size) vm.append(kvm_test_utils.get_living_vm(env, params.get(main_vm))) vm.append(kvm_test_utils.get_living_vm(env, vm2)) def add_vlan(session, id, iface=eth0): if session.get_command_status(vconfig add %s %s % (iface, id)) != 0: raise error.TestError(Fail to add %s.%s % (iface, id)) def set_ip_vlan(session, id, ip, iface=eth0): iface = %s.%s % (iface, id) if session.get_command_status(ifconfig %s %s % (iface, ip)) != 0: raise error.TestError(Fail to configure ip for %s % iface) def set_arp_ignore(session, iface=eth0): ignore_cmd = echo 1 /proc/sys/net/ipv4/conf/all/arp_ignore if session.get_command_status(ignore_cmd) != 0: raise error.TestError(Fail to set arp_ignore of %s % session) def rem_vlan(session, id, iface=eth0): rem_vlan_cmd = if [[ -e /proc/net/vlan/%s ]];then vconfig rem %s;fi iface = %s.%s % (iface, id) s = session.get_command_status(rem_vlan_cmd % (iface, iface)) return s def nc_transfer(src, dst): nc_port = kvm_utils.find_free_port(1025, 5334, vm_ip[dst]) listen_cmd = params.get(listen_cmd) send_cmd = params.get(send_cmd) #listen in dst listen_cmd = listen_cmd % (nc_port, receive) session[dst].sendline(listen_cmd) time.sleep(2) #send file from src to dst send_cmd = send_cmd % (vlan_ip[dst], str(nc_port), file) if session[src].get_command_status(send_cmd, timeout = 60) != 0: raise error.TestFail (Fail to send file from vm%s to vm%s % (src+1, dst+1)) s, o = session[dst].read_up_to_prompt(timeout=60) if s != True: raise error.TestFail (Fail to receive file from vm%s to vm%s % (src+1, dst+1)) #check MD5 message digest of receive file in dst output = session[dst].get_command_output(md5sum receive).strip() digest_receive = re.findall(r'(\w+)', output)[0] if digest_receive == digest_origin[src]: logging.info(file succeed received in vm %s % vlan_ip[dst]) else: logging.info(digest_origin is %s % digest_origin[src]) logging.info(digest_receive is %s % digest_receive) raise error.TestFail(File transfered differ from origin) session[dst].get_command_status(rm -f receive) for i in range(2): session.append(kvm_test_utils.wait_for_login(vm[i], timeout=int(params.get(login_timeout, 360 if not session[i] : raise error.TestError(Could not log into guest(vm%d) % i) logging.info(Logged in)
Re: [patch uq/master 5/8] Export qemu_ram_addr_from_host
On 10/04/2010 01:54 PM, Marcelo Tosatti wrote: To be used by next patches. Signed-off-by: Marcelo Tosattimtosa...@redhat.com Index: qemu/cpu-common.h === --- qemu.orig/cpu-common.h +++ qemu/cpu-common.h @@ -47,6 +47,7 @@ void qemu_ram_free(ram_addr_t addr); /* This should only be used for ram local to a device. */ void *qemu_get_ram_ptr(ram_addr_t addr); /* This should not be used by devices. */ +int do_qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr); This is not a great name for a function. A better way to do this would be to make the existing qemu_ram_addr_from_host() - qemu_ram_addr_from_host_nofail(). Regards, Anthony Liguori ram_addr_t qemu_ram_addr_from_host(void *ptr); int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read, Index: qemu/exec.c === --- qemu.orig/exec.c +++ qemu/exec.c @@ -2938,23 +2938,31 @@ void *qemu_get_ram_ptr(ram_addr_t addr) return NULL; } -/* Some of the softmmu routines need to translate from a host pointer - (typically a TLB entry) back to a ram offset. */ -ram_addr_t qemu_ram_addr_from_host(void *ptr) +int do_qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) { RAMBlock *block; uint8_t *host = ptr; QLIST_FOREACH(block,ram_list.blocks, next) { if (host - block-host block-length) { -return block-offset + (host - block-host); +*ram_addr = block-offset + (host - block-host); +return 0; } } +return -1; +} -fprintf(stderr, Bad ram pointer %p\n, ptr); -abort(); +/* Some of the softmmu routines need to translate from a host pointer + (typically a TLB entry) back to a ram offset. */ +ram_addr_t qemu_ram_addr_from_host(void *ptr) +{ +ram_addr_t ram_addr; -return 0; +if (do_qemu_ram_addr_from_host(ptr,ram_addr)) { +fprintf(stderr, Bad ram pointer %p\n, ptr); +abort(); +} +return ram_addr; } static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 14/18] KVM test: Add a netperf subtest
On Tue, Oct 05, 2010 at 02:30:18PM +0530, pradeep wrote: In Todo list i find TCP_CRR UDP_RR test case failures. 2) netperf 17:35:11 DEBUG| Execute netperf client test: /root/autotest/client/tests/netperf2/netperf-2.4.5/src/netperf -t TCP_CRR -H 10.16.74.142 -l 60 -- -m 1 17:35:45 ERROR| Fail to execute netperf test, protocol:TCP_CRR 17:35:45 DEBUG| Execute netperf client test: /root/autotest/client/tests/netperf2/netperf-2.4.5/src/netperf -t UDP_RR -H 10.16.74.142 -l 60 -- -m 1 17:36:06 ERROR| Fail to execute netperf test, protocol:UDP_RR I havent noticed any issues with UDP_RR But with RHEL 5.5 guest TCP_CRR fails. with other RHEL latest distro it works fine. Need to figure out if its test issue or RHEL 5.5 issue. This case can pass with rhel5.5 rhel6.0, not test with fedora. it would not be the problem of testcase. I did not touch this problem, can you provide more debug info ? eg, tcpdump, ... -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] kvm: allow tpr patching to write to write-protected vapic option rom
Now that we allow the bios to write protect option roms, we need to allow the tpr patching code to write to this write protected memory. This means using cpu_physical_memory_write_rom() instead of the usual APIs. Fixes Windows XP without flexpriority. Signed-off-by: Avi Kivity a...@redhat.com --- kvm-tpr-opt.c | 10 ++ 1 files changed, 6 insertions(+), 4 deletions(-) diff --git a/kvm-tpr-opt.c b/kvm-tpr-opt.c index 46890e2..c929fc8 100644 --- a/kvm-tpr-opt.c +++ b/kvm-tpr-opt.c @@ -73,7 +73,7 @@ static uint8_t read_byte_virt(CPUState *env, target_ulong virt) static void write_byte_virt(CPUState *env, target_ulong virt, uint8_t b) { -stb_phys(map_addr(env, virt, NULL), b); +cpu_physical_memory_write_rom(map_addr(env, virt, NULL), b, 1); } struct vapic_bios { @@ -107,7 +107,7 @@ static void update_vbios_real_tpr(void) cpu_physical_memory_rw(vbios_desc_phys, (void *)vapic_bios, sizeof vapic_bios, 0); vapic_bios.real_tpr = real_tpr; vapic_bios.vcpu_shift = 7; -cpu_physical_memory_rw(vbios_desc_phys, (void *)vapic_bios, sizeof vapic_bios, 1); +cpu_physical_memory_write_rom(vbios_desc_phys, (void *)vapic_bios, sizeof vapic_bios); } static unsigned modrm_reg(uint8_t modrm) @@ -174,6 +174,7 @@ static int bios_is_mapped(CPUState *env, uint64_t rip) unsigned perms; uint32_t i; uint32_t offset, fixup, start = vapic_bios_addr ? : 0xe; +uint32_t patch; if (bios_enabled) return 1; @@ -198,7 +199,8 @@ static int bios_is_mapped(CPUState *env, uint64_t rip) for (i = vapic_bios.fixup_start; i vapic_bios.fixup_end; i += 4) { offset = ldl_phys(phys + i - vapic_bios.virt_base); fixup = phys + offset; - stl_phys(fixup, ldl_phys(fixup) + bios_addr - vapic_bios.virt_base); +patch = ldl_phys(fixup) + bios_addr - vapic_bios.virt_base; +cpu_physical_memory_write_rom(fixup, (uint8_t *)patch, 4); } vapic_phys = vapic_bios.vapic - vapic_bios.virt_base + phys; return 1; @@ -225,7 +227,7 @@ int kvm_tpr_enable_vapic(CPUState *env) return 0; kvm_enable_vapic(env, vapic_phys + (pcr_cpu 7)); -cpu_physical_memory_rw(vapic_phys + (pcr_cpu 7) + 4, one, 1, 1); +cpu_physical_memory_write_rom(vapic_phys + (pcr_cpu 7) + 4, one, 1); env-kvm_vcpu_update_vapic = 0; bios_enabled = 1; return 1; -- 1.7.2.3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] KVM: Disable interrupts around get_kernel_ns()
get_kernel_ns() wants preemption disabled. It doesn't make a lot of sense during the get/set ioctls (no way to make them non-racy) but the callee wants it. Signed-off-by: Avi Kivity a...@redhat.com --- arch/x86/kvm/x86.c |4 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3729bcb..899acbb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3469,8 +3469,10 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; r = 0; + local_irq_disable(); now_ns = get_kernel_ns(); delta = user_ns.clock - now_ns; + local_irq_enable(); kvm-arch.kvmclock_offset = delta; break; } @@ -3478,8 +3480,10 @@ long kvm_arch_vm_ioctl(struct file *filp, struct kvm_clock_data user_ns; u64 now_ns; + local_irq_disable(); now_ns = get_kernel_ns(); user_ns.clock = kvm-arch.kvmclock_offset + now_ns; + local_irq_enable(); user_ns.flags = 0; r = -EFAULT; -- 1.7.2.3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] trace exit to userspace event
Add tracepoint for userspace exit. Signed-off-by: Gleb Natapov g...@redhat.com diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 6dd3a51..fb44da0 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -6,6 +6,31 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm +#define ERSN(x) { KVM_EXIT_##x, KVM_EXIT_ #x } + +#define kvm_trace_exit_reason \ + ERSN(UNKNOWN), ERSN(EXCEPTION), ERSN(IO), ERSN(HYPERCALL), \ + ERSN(DEBUG), ERSN(HLT), ERSN(MMIO), ERSN(IRQ_WINDOW_OPEN), \ + ERSN(SHUTDOWN), ERSN(FAIL_ENTRY), ERSN(INTR), ERSN(SET_TPR),\ + ERSN(TPR_ACCESS), ERSN(S390_SIEIC), ERSN(S390_RESET), ERSN(DCR),\ + ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI) + +TRACE_EVENT(kvm_userspace_exit, + TP_PROTO(__u32 reason), + TP_ARGS(reason), + + TP_STRUCT__entry( + __field(__u32, reason ) + ), + + TP_fast_assign( + __entry-reason = reason; + ), + + TP_printk(reason %s, __print_symbolic(__entry-reason, + kvm_trace_exit_reason)) +); + #if defined(__KVM_HAVE_IOAPIC) TRACE_EVENT(kvm_set_irq, TP_PROTO(unsigned int gsi, int level, int irq_source_id), diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b8499f5..8800713 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1458,6 +1458,8 @@ static long kvm_vcpu_ioctl(struct file *filp, if (arg) goto out; r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu-run); + if (r = 0) + trace_kvm_userspace_exit(vcpu-run-exit_reason); break; case KVM_GET_REGS: { struct kvm_regs *kvm_regs; -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 11/19] KVM test: Add a subtest of multicast
From: Amos Kong ak...@redhat.com Use 'ping' to test send/recive multicat packets. Flood ping test is also added. Limit guest network as 'bridge' mode, because multicast packets could not be transmitted to guest when using 'user' network. Add join_mcast.py for joining machine into multicast groups. Changes from v4: - Fixed a mistake made during one of the rebases Changes from v1: - Just flush the firewall rules with iptables -F Signed-off-by: Amos Kong ak...@redhat.com --- client/tests/kvm/scripts/join_mcast.py | 37 + client/tests/kvm/tests/multicast.py| 91 client/tests/kvm/tests_base.cfg.sample |9 +++- 3 files changed, 136 insertions(+), 1 deletions(-) create mode 100755 client/tests/kvm/scripts/join_mcast.py create mode 100644 client/tests/kvm/tests/multicast.py diff --git a/client/tests/kvm/scripts/join_mcast.py b/client/tests/kvm/scripts/join_mcast.py new file mode 100755 index 000..350cd5f --- /dev/null +++ b/client/tests/kvm/scripts/join_mcast.py @@ -0,0 +1,37 @@ +#!/usr/bin/python +import socket, struct, os, signal, sys +# -*- coding: utf-8 -*- + + +Script used to join machine into multicast groups. + +...@author Amos Kong ak...@redhat.com + + +if __name__ == __main__: +if len(sys.argv) 4: +print %s [mgroup_count] [prefix] [suffix] +mgroup_count: count of multicast addresses +prefix: multicast address prefix +suffix: multicast address suffix % sys.argv[0] +sys.exit() + +mgroup_count = int(sys.argv[1]) +prefix = sys.argv[2] +suffix = int(sys.argv[3]) + +s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +for i in range(mgroup_count): +mcast = prefix + . + str(suffix + i) +try: +mreq = struct.pack(4sl, socket.inet_aton(mcast), + socket.INADDR_ANY) +s.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, mreq) +except: +s.close() +print Could not join multicast: %s % mcast +raise + +print join_mcast_pid:%s % os.getpid() +os.kill(os.getpid(), signal.SIGSTOP) +s.close() diff --git a/client/tests/kvm/tests/multicast.py b/client/tests/kvm/tests/multicast.py new file mode 100644 index 000..a47779a --- /dev/null +++ b/client/tests/kvm/tests/multicast.py @@ -0,0 +1,91 @@ +import logging, os, re +from autotest_lib.client.common_lib import error +from autotest_lib.client.bin import utils +import kvm_test_utils + + +def run_multicast(test, params, env): + +Test multicast function of nic (rtl8139/e1000/virtio) + +1) Create a VM. +2) Join guest into multicast groups. +3) Ping multicast addresses on host. +4) Flood ping test with different size of packets. +5) Final ping test and check if lose packet. + +@param test: KVM test object. +@param params: Dictionary with the test parameters. +@param env: Dictionary with test environment. + +vm = kvm_test_utils.get_living_vm(env, params.get(main_vm)) +session = kvm_test_utils.wait_for_login(vm, + timeout=int(params.get(login_timeout, 360))) + +def run_guest(cmd): +s, o = session.get_command_status_output(cmd) +if s: +logging.warning('Command %s executed in guest returned exit code ' +'%s, output: %s', cmd, s, o.strip()) + +def run_host_guest(cmd): +run_guest(cmd) +utils.system(cmd, ignore_status=True) + +# flush the firewall rules +cmd_flush = iptables -F +cmd_selinux = (if [ -e /selinux/enforce ]; then setenforce 0; + else echo 'no /selinux/enforce file present'; fi) +run_host_guest(cmd_flush) +run_host_guest(cmd_selinux) +# make sure guest replies to broadcasts +cmd_broadcast = echo 0 /proc/sys/net/ipv4/icmp_echo_ignore_broadcasts +cmd_broadcast_2 = echo 0 /proc/sys/net/ipv4/icmp_echo_ignore_all +run_guest(cmd_broadcast) +run_guest(cmd_broadcast_2) + +# base multicast address +mcast = params.get(mcast, 225.0.0.1) +# count of multicast addresses, less than 20 +mgroup_count = int(params.get(mgroup_count, 5)) +flood_minutes = float(params.get(flood_minutes, 10)) +ifname = vm.get_ifname() +prefix = re.findall(\d+.\d+.\d+, mcast)[0] +suffix = int(re.findall(\d+, mcast)[-1]) +# copy python script to guest for joining guest to multicast groups +mcast_path = os.path.join(test.bindir, scripts/join_mcast.py) +if not vm.copy_files_to(mcast_path, /tmp): +raise error.TestError(Fail to copy %s to guest % mcast_path) +output = session.get_command_output(python /tmp/join_mcast.py %d %s %d % +(mgroup_count, prefix, suffix)) + +# if success to join multicast, the process will be paused, and return PID. +try: +pid = re.findall(join_mcast_pid:(\d+), output)[0] +except IndexError: +
Re: [patch 2/3] KVM: dont require read-only host ptes
On Tue, Oct 05, 2010 at 08:55:00AM -0300, Marcelo Tosatti wrote: gfn_to_pfn requires a writable host pte, failing otherwise. Change it to fallback to read-only acquision', informing the callers. Hopefully the ptes are cache-hot so the overhead is minimal. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Index: kvm/arch/ia64/kvm/kvm-ia64.c === --- kvm.orig/arch/ia64/kvm/kvm-ia64.c +++ kvm/arch/ia64/kvm/kvm-ia64.c @@ -1589,7 +1589,7 @@ int kvm_arch_prepare_memory_region(struc return -ENOMEM; for (i = 0; i npages; i++) { - pfn = gfn_to_pfn(kvm, base_gfn + i); + pfn = gfn_to_pfn(kvm, base_gfn + i, NULL); if (!kvm_is_mmio_pfn(pfn)) { kvm_set_pmt_entry(kvm, base_gfn + i, pfn PAGE_SHIFT, Index: kvm/arch/x86/kvm/mmu.c === --- kvm.orig/arch/x86/kvm/mmu.c +++ kvm/arch/x86/kvm/mmu.c @@ -2273,6 +2273,7 @@ static int nonpaging_map(struct kvm_vcpu { int r; int level; + int writable; pfn_t pfn; unsigned long mmu_seq; @@ -2289,10 +2290,10 @@ static int nonpaging_map(struct kvm_vcpu mmu_seq = vcpu-kvm-mmu_notifier_seq; smp_rmb(); - pfn = gfn_to_pfn(vcpu-kvm, gfn); + pfn = gfn_to_pfn(vcpu-kvm, gfn, writable); /* mmio */ - if (is_error_pfn(pfn)) + if (is_error_pfn(pfn) || !writable) return kvm_handle_bad_page(vcpu-kvm, gfn, pfn); spin_lock(vcpu-kvm-mmu_lock); @@ -2581,6 +2582,8 @@ static int tdp_page_fault(struct kvm_vcp pfn_t pfn; int r; int level; + int writable; + int write = error_code PFERR_WRITE_MASK; gfn_t gfn = gpa PAGE_SHIFT; unsigned long mmu_seq; @@ -2597,15 +2600,14 @@ static int tdp_page_fault(struct kvm_vcp mmu_seq = vcpu-kvm-mmu_notifier_seq; smp_rmb(); - pfn = gfn_to_pfn(vcpu-kvm, gfn); - if (is_error_pfn(pfn)) + pfn = gfn_to_pfn(vcpu-kvm, gfn, writable); + if (is_error_pfn(pfn) || !writable) Why would we fail read only access to read only memory? Shouldn't we check access type here? -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM call agenda for Oct 5
* Chris Wright (chr...@redhat.com) wrote: Please send in any agenda items you are interested in covering. No agenda, call cancelled. thanks, -chris -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: unit tests and get_user_pages_ptes_fast()
On Tue, Oct 05, 2010 at 06:22:17AM -0300, Marcelo Tosatti wrote: It'll not be so advantageous for ksm because there should be read-faults very rarely on that case. It'll also make all clean swapcache dirty for no good. Will post. If we've to walk pagetables twice, why don't you do this: writable=1 get_user_pages_fast(write=write_fault) if (!write_fault) writable = __get_user_pages_fast(write=1) That will solve the debugging knob and it'll solve ksm and it'll be optimal for read swapins on exclusive clean swapcache too. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: unit tests and get_user_pages_ptes_fast()
On 10/05/2010 04:15 PM, Andrea Arcangeli wrote: On Tue, Oct 05, 2010 at 06:22:17AM -0300, Marcelo Tosatti wrote: It'll not be so advantageous for ksm because there should be read-faults very rarely on that case. It'll also make all clean swapcache dirty for no good. Will post. If we've to walk pagetables twice, why don't you do this: writable=1 get_user_pages_fast(write=write_fault) if (!write_fault) writable = __get_user_pages_fast(write=1) That will solve the debugging knob and it'll solve ksm and it'll be optimal for read swapins on exclusive clean swapcache too. But it means an extra vmexit in the following case: - read fault - page is present and writeable in the Linux page table which is very common. For this you need get_user_pages_ptes_fast(). -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: unit tests and get_user_pages_ptes_fast()
On Tue, Oct 05, 2010 at 04:25:05PM +0200, Avi Kivity wrote: On 10/05/2010 04:15 PM, Andrea Arcangeli wrote: On Tue, Oct 05, 2010 at 06:22:17AM -0300, Marcelo Tosatti wrote: It'll not be so advantageous for ksm because there should be read-faults very rarely on that case. It'll also make all clean swapcache dirty for no good. Will post. If we've to walk pagetables twice, why don't you do this: writable=1 get_user_pages_fast(write=write_fault) if (!write_fault) writable = __get_user_pages_fast(write=1) That will solve the debugging knob and it'll solve ksm and it'll be optimal for read swapins on exclusive clean swapcache too. But it means an extra vmexit in the following case: - read fault - page is present and writeable in the Linux page table which is very common. For this you need get_user_pages_ptes_fast(). With a read fault, the VM already sets the pte as writable if the VM permissions allows that and the page isn't shared (i.e. if it's an exclusive swap page). We've just to check if it did that or not. So when it's a read fault we've to run __get_user_pages_fast(write=1) before we can assume the page is mapped writable in the pte. So I don't see the problem... in terms of page faults is optimal. Only downside is having to walk the pagetables twice, the second time to verify if the first gup_fast has marked the host pte writable or not. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: unit tests and get_user_pages_ptes_fast()
On 10/05/2010 04:32 PM, Andrea Arcangeli wrote: On Tue, Oct 05, 2010 at 04:25:05PM +0200, Avi Kivity wrote: On 10/05/2010 04:15 PM, Andrea Arcangeli wrote: On Tue, Oct 05, 2010 at 06:22:17AM -0300, Marcelo Tosatti wrote: It'll not be so advantageous for ksm because there should be read-faults very rarely on that case. It'll also make all clean swapcache dirty for no good. Will post. If we've to walk pagetables twice, why don't you do this: writable=1 get_user_pages_fast(write=write_fault) if (!write_fault) writable = __get_user_pages_fast(write=1) That will solve the debugging knob and it'll solve ksm and it'll be optimal for read swapins on exclusive clean swapcache too. But it means an extra vmexit in the following case: - read fault - page is present and writeable in the Linux page table which is very common. For this you need get_user_pages_ptes_fast(). With a read fault, the VM already sets the pte as writable if the VM permissions allows that and the page isn't shared (i.e. if it's an exclusive swap page). We've just to check if it did that or not. So when it's a read fault we've to run __get_user_pages_fast(write=1) before we can assume the page is mapped writable in the pte. So I don't see the problem... in terms of page faults is optimal. Only downside is having to walk the pagetables twice, the second time to verify if the first gup_fast has marked the host pte writable or not. You're right, I misread your pseudocode. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
8 NIC limit
Hello list: I'm working on a project that calls for the creation of a firewall in KVM. While adding a 20-interface trunk of virtio adapters to bring in a dual 10GB bond, I've discovered an 8 NIC limit in QEMU. I found the following thread in the list archives detailing a similar problem: http://kerneltrap.org/mailarchive/linux-kvm/2009/1/29/4848304 It includes a patch for the file qemu/net.h to allow 24 NICs: https://bugs.launchpad.net/ubuntu/+source/qemu-kvm;qemu-kvm/+bug/595873/+attachment/1429544/+files/max_nics.patch In my case I want to attach 29, and have simply changed line 8 to 30 from 24. This will be the first patch I've ever had to do, and so far my internet search yields results that don't seem to apply. Would someone like to recommend a pertinent tutorial? Many thanks -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
kvm guest reseting incoming connection
Hi, i have trying to play with KVM on gentoo and stumbled upon a guest reseting incoming connection. (looks like qemu cheating the guest by sending fake tcp-reset). below i send typical ssh session: tcpdump: WARNING: eth0: no IPv4 address assigned tcpdump: verbose output suppressed, use -v or -vv for full protocol decode listening on eth0, link-type EN10MB (Ethernet), capture size 68 bytes 16:53:55.533949 IP Y.Y.79.34.59793 X.X.79.231.22: S 41315573:41315573(0) win 5840 mss 1460,[|tcp] 16:53:55.534211 IP X.X.79.231.22 Y.Y.79.34.59793: S 3726054219:3726054219(0) ack 41315574 win 5792 mss 1460,[|tcp] 16:53:55.536362 IP Y.Y.79.34.59793 X.X.79.231.22: . ack 1 win 92 nop,nop,timestamp[|tcp] !--- 16:53:55.536514 IP X.X.79.231.22 Y.Y.79.34.59793: R 3726054220:3726054220(0) win 0 ^C4 packets captured 4 packets received by filter 0 packets dropped by kernel tcpdump: verbose output suppressed, use -v or -vv for full protocol decode listening on br0, link-type EN10MB (Ethernet), capture size 68 bytes 16:53:55.533949 IP Y.Y.79.34.59793 X.X.79.231.22: S 41315573:41315573(0) win 5840 mss 1460,[|tcp] 16:53:55.534199 IP X.X.79.231.22 Y.Y.79.34.59793: S 3726054219:3726054219(0) ack 41315574 win 5792 mss 1460,[|tcp] !--- 16:53:55.534228 IP Y.Y.79.34.59793 X.X.79.231.22: R 41315574:41315574(0) win 0 16:53:55.536362 IP Y.Y.79.34.59793 X.X.79.231.22: . ack 1 win 92 nop,nop,timestamp[|tcp] 16:53:55.536504 IP X.X.79.231.22 Y.Y.79.34.59793: R 3726054220:3726054220(0) win 0 16:53:55.536530 IP Y.Y.79.34.59793 X.X.79.231.22: R 4253651723:4253651723(0) ack 1 win 0 eth0 - host's hardware interface br0 - interface bridging eth0 and guests tap ~ # brctl show br0 bridge name bridge id STP enabled interfaces br0 8000.003048def1bc no eth0 tap_dev_ext afaics the only reqirement to trigger this behaviour is a *default* route set for the guest kernel, i.e. somthing like. ip route add 0/0 gw X.X.79.1 inside guest leads to fake reset, but ip route add Y.Y.79.34/32 gw X.X.79.1 works as expected. on the other side seems like outgoing tcp sessions aren't affected. I'm stuck. No ideia where it come from and how to debug :( any ideas? -- Best regards. Alexander Y. Fomichev git.u...@gmail.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 8 NIC limit
On Tue, Oct 5, 2010 at 7:48 AM, linux_...@proinbox.com wrote: Hello list: I'm working on a project that calls for the creation of a firewall in KVM. While adding a 20-interface trunk of virtio adapters to bring in a dual 10GB bond, I've discovered an 8 NIC limit in QEMU. I found the following thread in the list archives detailing a similar problem: http://kerneltrap.org/mailarchive/linux-kvm/2009/1/29/4848304 It includes a patch for the file qemu/net.h to allow 24 NICs: https://bugs.launchpad.net/ubuntu/+source/qemu-kvm;qemu-kvm/+bug/595873/+attachment/1429544/+files/max_nics.patch In my case I want to attach 29, and have simply changed line 8 to 30 from 24. This will be the first patch I've ever had to do, and so far my internet search yields results that don't seem to apply. Would someone like to recommend a pertinent tutorial? Hi there, I commented on the original bug in Launchpad. We're willing and able to carry the patch against qemu-kvm in Ubuntu, I just asked that the reporter at least submit the patch upstream for discussion. I don't see where that has happened yet. It's a trivial patch to submit. Please note in that bug a pointer to the mailing list thread, if you start one. To your specific question, different communities have different requirements on patch submission, so you do need to consult each community. A good place to start might be the Documentation/SubmittingPatches how-to in the kernel tree: * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=blob_plain;f=Documentation/SubmittingPatches;hb=HEAD In this case, I think you're going to want to send your patch to the qemu-devel (on CC) mailing list (perhaps in addition to sending it here, to the kvm list). :-Dustin -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 8 NIC limit
linux_...@proinbox.com writes: Hello list: I'm working on a project that calls for the creation of a firewall in KVM. While adding a 20-interface trunk of virtio adapters to bring in a dual 10GB bond, I've discovered an 8 NIC limit in QEMU. Have you tried creating NICs with -device? The limit shouldn't apply there. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] PCI: Export pci_map_option_rom()
* Alex Williamson (alex.william...@redhat.com) wrote: Allow it to be referenced outside of hw/pci.c so we can register option ROM BARs using the default mapping routine. Signed-off-by: Alex Williamson alex.william...@redhat.com Acked-by: Chris Wright chr...@redhat.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Fwd: Re: 8 NIC limit
Forgot to cc list, forwarding. In this case, I think you're going to want to send your patch to the qemu-devel (on CC) mailing list (perhaps in addition to sending it here, to the kvm list). Will do, thanks for the pointer. Before I do so, I'd like to bring up one thing that comes to mind. I don't know how to make the determination, but it makes sense to me for the limit defined here to be indicitive of an actual limitation, rather than what seems an arbitrary best-guess as to the most someone might need. If the change ends up being permanent, then I would hope it would be a large enough value to provide a degree of extensibility and prevent the necessity of bumping it up again later when someone else comes along with even greater bandwidth requirements. Perhaps someone could provide some guidance as to a sane, higher number, as opposed to an arbitrary '65000' which would surely prevent this from happening again (knock on wood). For the time being I still have to find something to help learn how to implement the change locally. I rarely have to compile let alone deal with patches, so to me at least this is a considerable obstacle. -Thanks On Tue, 05 Oct 2010 08:24 -0700, Dustin Kirkland kirkl...@canonical.com wrote: On Tue, Oct 5, 2010 at 7:48 AM, linux_...@proinbox.com wrote: Hello list: I'm working on a project that calls for the creation of a firewall in KVM. While adding a 20-interface trunk of virtio adapters to bring in a dual 10GB bond, I've discovered an 8 NIC limit in QEMU. I found the following thread in the list archives detailing a similar problem: http://kerneltrap.org/mailarchive/linux-kvm/2009/1/29/4848304 It includes a patch for the file qemu/net.h to allow 24 NICs: https://bugs.launchpad.net/ubuntu/+source/qemu-kvm;qemu-kvm/+bug/595873/+attachment/1429544/+files/max_nics.patch In my case I want to attach 29, and have simply changed line 8 to 30 from 24. This will be the first patch I've ever had to do, and so far my internet search yields results that don't seem to apply. Would someone like to recommend a pertinent tutorial? Hi there, I commented on the original bug in Launchpad. We're willing and able to carry the patch against qemu-kvm in Ubuntu, I just asked that the reporter at least submit the patch upstream for discussion. I don't see where that has happened yet. It's a trivial patch to submit. Please note in that bug a pointer to the mailing list thread, if you start one. To your specific question, different communities have different requirements on patch submission, so you do need to consult each community. A good place to start might be the Documentation/SubmittingPatches how-to in the kernel tree: * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=blob_plain;f=Documentation/SubmittingPatches;hb=HEAD In this case, I think you're going to want to send your patch to the qemu-devel (on CC) mailing list (perhaps in addition to sending it here, to the kvm list). :-Dustin -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 8 NIC limit
Have you tried creating NICs with -device? I'm not sure what that is, will look into it, thanks. I'm using ProxmoxVE, and currently add them via a web interface. Someone happens to host a screenshot of that part here: http://c-nergy.be/blog/wp-content/uploads/Proxmox_Net2.png On Tue, 05 Oct 2010 17:57 +0200, Markus Armbruster arm...@redhat.com wrote: linux_...@proinbox.com writes: Hello list: I'm working on a project that calls for the creation of a firewall in KVM. While adding a 20-interface trunk of virtio adapters to bring in a dual 10GB bond, I've discovered an 8 NIC limit in QEMU. Have you tried creating NICs with -device? The limit shouldn't apply there. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [patch uq/master 0/8] port qemu-kvm's MCE support
Marcelo, Am 04.10.2010 um 20:54 schrieb Marcelo Tosatti: I assume something went wrong with your cover letter here. It would've been nice to see MCE spelled out or summarized for those of us that don't speak x86. Cheers, Andreas -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 8 NIC limit
On 10/5/2010 9:48 AM, linux_...@proinbox.com wrote: Hello list: I'm working on a project that calls for the creation of a firewall in KVM. While adding a 20-interface trunk of virtio adapters to bring in a dual 10GB bond, I've discovered an 8 NIC limit in QEMU. I found the following thread in the list archives detailing a similar problem: http://kerneltrap.org/mailarchive/linux-kvm/2009/1/29/4848304 It includes a patch for the file qemu/net.h to allow 24 NICs: https://bugs.launchpad.net/ubuntu/+source/qemu-kvm;qemu-kvm/+bug/595873/+attachment/1429544/+files/max_nics.patch In my case I want to attach 29, and have simply changed line 8 to 30 from 24. I'd guess you'll bump into a pci device number limit (I believe it is 32 at the moment). This will be the first patch I've ever had to do, and so far my internet search yields results that don't seem to apply. Would someone like to recommend a pertinent tutorial? Many thanks -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[no subject]
subscribe -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
booting a guest via USB
Can kvm boot a guest OS in an LVM on a USB hard disk. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
malloc implementation in kvm
Hi, can some one point me to correct link. I want to go through malloc implementation in kvm. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 2/3] KVM: dont require read-only host ptes
On Tue, Oct 05, 2010 at 03:43:01PM +0200, Gleb Natapov wrote: --- kvm.orig/arch/x86/kvm/mmu.c +++ kvm/arch/x86/kvm/mmu.c @@ -2273,6 +2273,7 @@ static int nonpaging_map(struct kvm_vcpu { int r; int level; + int writable; pfn_t pfn; unsigned long mmu_seq; @@ -2289,10 +2290,10 @@ static int nonpaging_map(struct kvm_vcpu mmu_seq = vcpu-kvm-mmu_notifier_seq; smp_rmb(); - pfn = gfn_to_pfn(vcpu-kvm, gfn); + pfn = gfn_to_pfn(vcpu-kvm, gfn, writable); /* mmio */ - if (is_error_pfn(pfn)) + if (is_error_pfn(pfn) || !writable) return kvm_handle_bad_page(vcpu-kvm, gfn, pfn); spin_lock(vcpu-kvm-mmu_lock); @@ -2581,6 +2582,8 @@ static int tdp_page_fault(struct kvm_vcp pfn_t pfn; int r; int level; + int writable; + int write = error_code PFERR_WRITE_MASK; gfn_t gfn = gpa PAGE_SHIFT; unsigned long mmu_seq; @@ -2597,15 +2600,14 @@ static int tdp_page_fault(struct kvm_vcp mmu_seq = vcpu-kvm-mmu_notifier_seq; smp_rmb(); - pfn = gfn_to_pfn(vcpu-kvm, gfn); - if (is_error_pfn(pfn)) + pfn = gfn_to_pfn(vcpu-kvm, gfn, writable); + if (is_error_pfn(pfn) || !writable) Why would we fail read only access to read only memory? Shouldn't we check access type here? -- Gleb. Next patch does that. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v6 02/12] Halt vcpu if page it tries to access is swapped out.
On Mon, Oct 04, 2010 at 05:56:24PM +0200, Gleb Natapov wrote: If a guest accesses swapped out memory do not swap it in from vcpu thread context. Schedule work to do swapping and put vcpu into halted state instead. Interrupts will still be delivered to the guest and if interrupt will cause reschedule guest will continue to run another task. Signed-off-by: Gleb Natapov g...@redhat.com --- arch/x86/include/asm/kvm_host.h | 17 +++ arch/x86/kvm/Kconfig|1 + arch/x86/kvm/Makefile |1 + arch/x86/kvm/mmu.c | 51 +- arch/x86/kvm/paging_tmpl.h |4 +- arch/x86/kvm/x86.c | 109 +++- include/linux/kvm_host.h| 31 ++ include/trace/events/kvm.h | 88 virt/kvm/Kconfig|3 + virt/kvm/async_pf.c | 220 +++ virt/kvm/async_pf.h | 36 +++ virt/kvm/kvm_main.c | 57 -- 12 files changed, 603 insertions(+), 15 deletions(-) create mode 100644 virt/kvm/async_pf.c create mode 100644 virt/kvm/async_pf.h + async_pf_cache = NULL; +} + +void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu) +{ + INIT_LIST_HEAD(vcpu-async_pf.done); + INIT_LIST_HEAD(vcpu-async_pf.queue); + spin_lock_init(vcpu-async_pf.lock); +} + +static void async_pf_execute(struct work_struct *work) +{ + struct page *page; + struct kvm_async_pf *apf = + container_of(work, struct kvm_async_pf, work); + struct mm_struct *mm = apf-mm; + struct kvm_vcpu *vcpu = apf-vcpu; + unsigned long addr = apf-addr; + gva_t gva = apf-gva; + + might_sleep(); + + use_mm(mm); + down_read(mm-mmap_sem); + get_user_pages(current, mm, addr, 1, 1, 0, page, NULL); + up_read(mm-mmap_sem); + unuse_mm(mm); + + spin_lock(vcpu-async_pf.lock); + list_add_tail(apf-link, vcpu-async_pf.done); + apf-page = page; + spin_unlock(vcpu-async_pf.lock); This can fail, and apf-page become NULL. + if (list_empty_careful(vcpu-async_pf.done)) + return; + + spin_lock(vcpu-async_pf.lock); + work = list_first_entry(vcpu-async_pf.done, typeof(*work), link); + list_del(work-link); + spin_unlock(vcpu-async_pf.lock); + + kvm_arch_async_page_present(vcpu, work); + +free: + list_del(work-queue); + vcpu-async_pf.queued--; + put_page(work-page); + kmem_cache_free(async_pf_cache, work); +} Better handle it here (and other sites). -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v6 03/12] Retry fault before vmentry
On Mon, Oct 04, 2010 at 05:56:25PM +0200, Gleb Natapov wrote: When page is swapped in it is mapped into guest memory only after guest tries to access it again and generate another fault. To save this fault we can map it immediately since we know that guest is going to access the page. Do it only when tdp is enabled for now. Shadow paging case is more complicated. CR[034] and EFER registers should be switched before doing mapping and then switched back. Acked-by: Rik van Riel r...@redhat.com Signed-off-by: Gleb Natapov g...@redhat.com --- arch/x86/include/asm/kvm_host.h |4 +++- arch/x86/kvm/mmu.c | 16 arch/x86/kvm/paging_tmpl.h |6 +++--- arch/x86/kvm/x86.c |7 +++ virt/kvm/async_pf.c |2 ++ 5 files changed, 23 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5f154d3..b9f263e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -240,7 +240,7 @@ struct kvm_mmu { void (*new_cr3)(struct kvm_vcpu *vcpu); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); - int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); + int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, bool no_apf); void (*inject_page_fault)(struct kvm_vcpu *vcpu); void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, @@ -838,6 +838,8 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); +void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, +struct kvm_async_pf *work); extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4d49b5e..d85fda8 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2558,7 +2558,7 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, } static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, - u32 error_code) + u32 error_code, bool no_apf) { gfn_t gfn; int r; @@ -2594,8 +2594,8 @@ static bool can_do_async_pf(struct kvm_vcpu *vcpu) return kvm_x86_ops-interrupt_allowed(vcpu); } -static bool try_async_pf(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gva, - pfn_t *pfn) +static bool try_async_pf(struct kvm_vcpu *vcpu, bool no_apf, gfn_t gfn, + gva_t gva, pfn_t *pfn) { bool async; @@ -2606,7 +2606,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gva, put_page(pfn_to_page(*pfn)); - if (can_do_async_pf(vcpu)) { + if (!no_apf can_do_async_pf(vcpu)) { trace_kvm_try_async_get_page(async, *pfn); if (kvm_find_async_pf_gfn(vcpu, gfn)) { vcpu-async_pf.work = kvm_double_apf; @@ -2620,8 +2620,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gva, return false; } -static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, - u32 error_code) +static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, + bool no_apf) { pfn_t pfn; int r; @@ -2643,7 +2643,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, mmu_seq = vcpu-kvm-mmu_notifier_seq; smp_rmb(); - if (try_async_pf(vcpu, gfn, gpa, pfn)) + if (try_async_pf(vcpu, no_apf, gfn, gpa, pfn)) return 0; /* mmio */ @@ -3306,7 +3306,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) int r; enum emulation_result er; - r = vcpu-arch.mmu.page_fault(vcpu, cr2, error_code); + r = vcpu-arch.mmu.page_fault(vcpu, cr2, error_code, false); if (r 0) goto out; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 8154353..9ad90f8 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -530,8 +530,8 @@ out_gpte_changed: * Returns: 1 if we need to emulate the instruction, 0 otherwise, or * a negative value on error. */ -static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, -u32 error_code) +static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, + bool no_apf) { int write_fault = error_code PFERR_WRITE_MASK; int user_fault = error_code PFERR_USER_MASK; @@ -574,7 +574,7
Re: [PATCH v6 04/12] Add memory slot versioning and use it to provide fast guest write interface
On Mon, Oct 04, 2010 at 05:56:26PM +0200, Gleb Natapov wrote: Keep track of memslots changes by keeping generation number in memslots structure. Provide kvm_write_guest_cached() function that skips gfn_to_hva() translation if memslots was not changed since previous invocation. Signed-off-by: Gleb Natapov g...@redhat.com --- include/linux/kvm_host.h |7 + include/linux/kvm_types.h |7 + virt/kvm/kvm_main.c | 57 +--- 3 files changed, 67 insertions(+), 4 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a08614e..4dff9a1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -199,6 +199,7 @@ struct kvm_irq_routing_table {}; struct kvm_memslots { int nmemslots; + u32 generation; struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS]; }; @@ -352,12 +353,18 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, int offset, int len); int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, unsigned long len); +int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, +void *data, unsigned long len); +int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + gpa_t gpa); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); +void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, + gfn_t gfn); void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 7ac0d4e..ee6eb71 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -67,4 +67,11 @@ struct kvm_lapic_irq { u32 dest_id; }; +struct gfn_to_hva_cache { + u32 generation; + gpa_t gpa; + unsigned long hva; + struct kvm_memory_slot *memslot; +}; + #endif /* __KVM_TYPES_H__ */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index db58a1b..45ef50c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -687,6 +687,7 @@ skip_lpage: memcpy(slots, kvm-memslots, sizeof(struct kvm_memslots)); if (mem-slot = slots-nmemslots) slots-nmemslots = mem-slot + 1; + slots-generation++; slots-memslots[mem-slot].flags |= KVM_MEMSLOT_INVALID; old_memslots = kvm-memslots; @@ -723,6 +724,7 @@ skip_lpage: memcpy(slots, kvm-memslots, sizeof(struct kvm_memslots)); if (mem-slot = slots-nmemslots) slots-nmemslots = mem-slot + 1; + slots-generation++; /* actual memory is freed via old in kvm_free_physmem_slot below */ if (!npages) { @@ -1247,6 +1249,47 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, return 0; } +int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + gpa_t gpa) +{ + struct kvm_memslots *slots = kvm_memslots(kvm); + int offset = offset_in_page(gpa); + gfn_t gfn = gpa PAGE_SHIFT; + + ghc-gpa = gpa; + ghc-generation = slots-generation; + ghc-memslot = gfn_to_memslot(kvm, gfn); + ghc-hva = gfn_to_hva(kvm, gfn); + if (!kvm_is_error_hva(ghc-hva)) + ghc-hva += offset; + else + return -EFAULT; + + return 0; +} Should use a unique kvm_memslots structure for the cache entry, since it can change in between (use gfn_to_hva_memslot, etc on slots pointer). Also should zap any cached entries on overflow, otherwise malicious userspace could make use of stale slots: +void mark_page_dirty(struct kvm *kvm, gfn_t gfn) +{ + struct kvm_memory_slot *memslot; + + memslot = gfn_to_memslot(kvm, gfn); + mark_page_dirty_in_slot(kvm, memslot, gfn); +} + /* * The vCPU has executed a HLT instruction with in-kernel mode enabled. */ -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [v2 RFC PATCH 0/4] Implement multiqueue virtio-net
On Tue, Oct 05, 2010 at 04:10:00PM +0530, Krishna Kumar2 wrote: Michael S. Tsirkin m...@redhat.com wrote on 09/19/2010 06:14:43 PM: Could you document how exactly do you measure multistream bandwidth: netperf flags, etc? All results were without any netperf flags or system tuning: for i in $list do netperf -c -C -l 60 -H 192.168.122.1 /tmp/netperf.$$.$i done wait Another script processes the result files. It also displays the start time/end time of each iteration to make sure skew due to parallel netperfs is minimal. I changed the vhost functionality once more to try to get the best model, the new model being: 1. #numtxqs=1 - #vhosts=1, this thread handles both RX/TX. 2. #numtxqs1 - vhost[0] handles RX and vhost[1-MAX] handles TX[0-n], where MAX is 4. Beyond numtxqs=4, the remaining TX queues are handled by vhost threads in round-robin fashion. Results from here on are with these changes, and only tuning is to set each vhost's affinity to CPUs[0-3] (taskset -p f vhost-pids). Any idea where does this come from? Do you see more TX interrupts? RX interrupts? Exits? Do interrupts bounce more between guest CPUs? 4. Identify reasons for single netperf BW regression. After testing various combinations of #txqs, #vhosts, #netperf sessions, I think the drop for 1 stream is due to TX and RX for a flow being processed on different cpus. Right. Can we fix it? I did two more tests: 1. Pin vhosts to same CPU: - BW drop is much lower for 1 stream case (- 5 to -8% range) - But performance is not so high for more sessions. 2. Changed vhost to be single threaded: - No degradation for 1 session, and improvement for upto 8, sometimes 16 streams (5-12%). - BW degrades after that, all the way till 128 netperf sessions. - But overall CPU utilization improves. Summary of the entire run (for 1-128 sessions): txq=4: BW: (-2.3) CPU: (-16.5)RCPU: (-5.3) txq=16: BW: (-1.9) CPU: (-24.9)RCPU: (-9.6) I don't see any reasons mentioned above. However, for higher number of netperf sessions, I see a big increase in retransmissions: Hmm, ok, and do you see any errors? ___ #netperf ORG NEW BW (#retr)BW (#retr) ___ 1 70244 (0) 64102 (0) 4 21421 (0) 36570 (416) 8 21746 (0) 38604 (148) 16 21783 (0) 40632 (464) 32 22677 (0) 37163 (1053) 64 23648 (4) 36449 (2197) 12823251 (2) 31676 (3185) ___ Single netperf case didn't have any retransmissions so that is not the cause for drop. I tested ixgbe (MQ): ___ #netperf ixgbe ixgbe (pin intrs to cpu#0 on both server/client) BW (#retr) BW (#retr) ___ 1 3567 (117) 6000 (251) 2 4406 (477) 6298 (725) 4 6119 (1085) 7208 (3387) 8 6595 (4276) 7381 (15296) 16 6651 (11651)6856 (30394) Interesting. You are saying we get much more retransmissions with physical nic as well? ___ 5. Test perf in more scenarious: small packets 512 byte packets - BW drop for upto 8 (sometimes 16) netperf sessions, but increases with #sessions: ___ # BW1 BW2 (%) CPU1CPU2 (%)RCPU1 RCPU2 (%) ___ 1 40433800 (-6.0) 50 50 (0) 86 98 (13.9) 2 83587485 (-10.4)153 178 (16.3) 230 264 (14.7) 4 20664 13567 (-34.3) 448 490 (9.3) 530 624 (17.7) 8 25198 17590 (-30.1) 967 1021 (5.5) 10851257 (15.8) 16 23791 24057 (1.1) 19042220 (16.5) 21562578 (19.5) 24 23055 26378 (14.4)28073378 (20.3) 32253901 (20.9) 32 22873 27116 (18.5)37484525 (20.7) 43075239 (21.6) 40 22876 29106 (27.2)47055717 (21.5) 53886591 (22.3) 48 23099 31352 (35.7)56426986 (23.8) 64758085 (24.8) 64 22645 30563 (34.9)75279027 (19.9) 861910656 (23.6) 80 22497 31922 (41.8)937511390 (21.4)10736 13485 (25.6) 96 22509 32718 (45.3)11271 13710 (21.6)12927 16269 (25.8) 128 22255 32397 (45.5)15036 18093 (20.3)17144 21608 (26.0)
[GIT PULL net-next-2.6] vhost-net patchset for 2.6.37
It looks like it was a quiet cycle for vhost-net: probably because most of energy was spent on bugfixes that went in for 2.6.36. People are working on multiqueue, tracing but I'm not sure it'll get done in time for 2.6.37 - so here's a tree with a single patch that helps windows guests which we definitely want in the next kernel. Please merge for 2.6.37. Thanks! The following changes since commit a00eac0c459abecb539fb2a2abd3122dd7ca5d4a: ppp: Use a real SKB control block in fragmentation engine. (2010-10-05 01:36:52 -0700) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git vhost-net-next Jason Wang (1): vhost: max s/g to match qemu drivers/vhost/net.c |2 +- drivers/vhost/vhost.c | 49 - drivers/vhost/vhost.h | 18 -- 3 files changed, 57 insertions(+), 12 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 8 NIC limit
* linux_...@proinbox.com (linux_...@proinbox.com) wrote: I don't know how to make the determination, but it makes sense to me for the limit defined here to be indicitive of an actual limitation, rather than what seems an arbitrary best-guess as to the most someone might need. The actual limitation is that we have only a single PCI bus. The network devices are single function devices. While there are 32 slots on the PCI bus, a few are already taken (typically the first 3, leaving 29 free slots). But, as Markus mentioned, this limitation is only for legacy command line option. The modern qdev way[1] should not hit this same limitation. thanks, -chris [1] http://www.linux-kvm.org/page/Qdev_status -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [patch uq/master 0/8] port qemu-kvm's MCE support
* Andreas Färber (andreas.faer...@web.de) wrote: Am 04.10.2010 um 20:54 schrieb Marcelo Tosatti: I assume something went wrong with your cover letter here. It would've been nice to see MCE spelled out or summarized for those of us that don't speak x86. It would help. The acronym is Machine Check Exception. The patchset should allow (on newer Intel x86 hw with a newer linux kernel) a class of memory errors delivered to the host OS as MCEs to be propagated to the guest OS. Without the patchset, the qemu process assoicated with the memory where the error took place would be killed. With the patchset, qemu can propagate the error into the guest and allow the guest to kill only the process within the guest that is assocated with the memory error. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
virsh 0.8.2 fail to reboot a domain
Hi I get a failure when I try to reboot a DomU: # virsh reboot fc13_1 error: Failed to reboot domain fc13_1 error: this function is not supported by the connection driver: virDomainReboot I haven't tried this command before. Does it have any relation to https://bugzilla.redhat.com/show_bug.cgi?id=624195 I'm running - Fedora release 13 (Goddard) - Linux testbay 2.6.34.7-56.fc13.x86_64 #1 SMP Wed Sep 15 03:36:55 UTC 2010 x86_64 x86_64 x86_64 GNU/Linux # virsh version Compiled against library: libvir 0.8.2 Using library: libvir 0.8.2 Using API: QEMU 0.8.2 Running hypervisor: QEMU 0.12.5 Thanks Henry -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v6 07/12] Add async PF initialization to PV guest.
On Mon, Oct 04, 2010 at 05:56:29PM +0200, Gleb Natapov wrote: Enable async PF in a guest if async PF capability is discovered. Signed-off-by: Gleb Natapov g...@redhat.com --- Documentation/kernel-parameters.txt |3 + arch/x86/include/asm/kvm_para.h |5 ++ arch/x86/kernel/kvm.c | 92 +++ 3 files changed, 100 insertions(+), 0 deletions(-) +static int __cpuinit kvm_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + int cpu = (unsigned long)hcpu; + switch (action) { + case CPU_ONLINE: + case CPU_DOWN_FAILED: + case CPU_ONLINE_FROZEN: + smp_call_function_single(cpu, kvm_guest_cpu_notify, NULL, 0); wait parameter should probably be 1. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v6 09/12] Inject asynchronous page fault into a PV guest if page is swapped out.
On Mon, Oct 04, 2010 at 05:56:31PM +0200, Gleb Natapov wrote: Send async page fault to a PV guest if it accesses swapped out memory. Guest will choose another task to run upon receiving the fault. Allow async page fault injection only when guest is in user mode since otherwise guest may be in non-sleepable context and will not be able to reschedule. Vcpu will be halted if guest will fault on the same page again or if vcpu executes kernel code. Signed-off-by: Gleb Natapov g...@redhat.com --- arch/x86/include/asm/kvm_host.h |3 ++ arch/x86/kvm/mmu.c |1 + arch/x86/kvm/x86.c | 49 -- include/trace/events/kvm.h | 17 virt/kvm/async_pf.c |3 +- 5 files changed, 58 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index de31551..2f6fc87 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -419,6 +419,7 @@ struct kvm_vcpu_arch { gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; struct gfn_to_hva_cache data; u64 msr_val; + u32 id; } apf; }; @@ -594,6 +595,7 @@ struct kvm_x86_ops { }; struct kvm_arch_async_pf { + u32 token; gfn_t gfn; }; @@ -842,6 +844,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); +bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d85fda8..de53cab 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2580,6 +2580,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) { struct kvm_arch_async_pf arch; + arch.token = (vcpu-arch.apf.id++ 12) | vcpu-vcpu_id; arch.gfn = gfn; return kvm_setup_async_pf(vcpu, gva, gfn, arch); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3e123ab..0e69d37 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6225,25 +6225,58 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) } } +static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) +{ + + return kvm_write_guest_cached(vcpu-kvm, vcpu-arch.apf.data, val, + sizeof(val)); +} + void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) { - vcpu-arch.mp_state = KVM_MP_STATE_HALTED; - - if (work == kvm_double_apf) + if (work == kvm_double_apf) { trace_kvm_async_pf_doublefault(kvm_rip_read(vcpu)); - else { - trace_kvm_async_pf_not_present(work-gva); - + vcpu-arch.mp_state = KVM_MP_STATE_HALTED; + } else { + trace_kvm_async_pf_not_present(work-arch.token, work-gva); kvm_add_async_pf_gfn(vcpu, work-arch.gfn); + + if (!(vcpu-arch.apf.msr_val KVM_ASYNC_PF_ENABLED) || + kvm_x86_ops-get_cpl(vcpu) == 0) + vcpu-arch.mp_state = KVM_MP_STATE_HALTED; + else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) { + vcpu-arch.fault.error_code = 0; + vcpu-arch.fault.address = work-arch.token; + kvm_inject_page_fault(vcpu); + } Missed !kvm_event_needs_reinjection(vcpu) ? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v6 10/12] Handle async PF in non preemptable context
On Mon, Oct 04, 2010 at 05:56:32PM +0200, Gleb Natapov wrote: If async page fault is received by idle task or when preemp_count is not zero guest cannot reschedule, so do sti; hlt and wait for page to be ready. vcpu can still process interrupts while it waits for the page to be ready. Acked-by: Rik van Riel r...@redhat.com Signed-off-by: Gleb Natapov g...@redhat.com --- arch/x86/kernel/kvm.c | 40 ++-- 1 files changed, 34 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 36fb3e4..f73946f 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -37,6 +37,7 @@ #include asm/cpu.h #include asm/traps.h #include asm/desc.h +#include asm/tlbflush.h #define MMU_QUEUE_SIZE 1024 @@ -78,6 +79,8 @@ struct kvm_task_sleep_node { wait_queue_head_t wq; u32 token; int cpu; + bool halted; + struct mm_struct *mm; }; static struct kvm_task_sleep_head { @@ -106,6 +109,11 @@ void kvm_async_pf_task_wait(u32 token) struct kvm_task_sleep_head *b = async_pf_sleepers[key]; struct kvm_task_sleep_node n, *e; DEFINE_WAIT(wait); + int cpu, idle; + + cpu = get_cpu(); + idle = idle_cpu(cpu); + put_cpu(); spin_lock(b-lock); e = _find_apf_task(b, token); @@ -119,19 +127,33 @@ void kvm_async_pf_task_wait(u32 token) n.token = token; n.cpu = smp_processor_id(); + n.mm = current-active_mm; + n.halted = idle || preempt_count() 1; + atomic_inc(n.mm-mm_count); Can't see why this reference is needed. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch uq/master 5/8] Export qemu_ram_addr_from_host
On Tue, Oct 05, 2010 at 07:57:14AM -0500, Anthony Liguori wrote: On 10/04/2010 01:54 PM, Marcelo Tosatti wrote: To be used by next patches. Signed-off-by: Marcelo Tosattimtosa...@redhat.com Index: qemu/cpu-common.h === --- qemu.orig/cpu-common.h +++ qemu/cpu-common.h @@ -47,6 +47,7 @@ void qemu_ram_free(ram_addr_t addr); /* This should only be used for ram local to a device. */ void *qemu_get_ram_ptr(ram_addr_t addr); /* This should not be used by devices. */ +int do_qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr); This is not a great name for a function. A better way to do this would be to make the existing qemu_ram_addr_from_host() - qemu_ram_addr_from_host_nofail(). It should fail for all callers in tree now, where address from qemu_get_ram_ptr() is saved somewhere. MCE handler is an exception to that. Are you OK with this: Index: qemu/cpu-common.h === --- qemu.orig/cpu-common.h +++ qemu/cpu-common.h @@ -47,6 +47,7 @@ void qemu_ram_free(ram_addr_t addr); /* This should only be used for ram local to a device. */ void *qemu_get_ram_ptr(ram_addr_t addr); /* This should not be used by devices. */ +int qemu_ram_addr_from_host_nofail(void *ptr, ram_addr_t *ram_addr); ram_addr_t qemu_ram_addr_from_host(void *ptr); int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read, Index: qemu/exec.c === --- qemu.orig/exec.c +++ qemu/exec.c @@ -2938,23 +2938,31 @@ void *qemu_get_ram_ptr(ram_addr_t addr) return NULL; } -/* Some of the softmmu routines need to translate from a host pointer - (typically a TLB entry) back to a ram offset. */ -ram_addr_t qemu_ram_addr_from_host(void *ptr) +int qemu_ram_addr_from_host_nofail(void *ptr, ram_addr_t *ram_addr) { RAMBlock *block; uint8_t *host = ptr; QLIST_FOREACH(block, ram_list.blocks, next) { if (host - block-host block-length) { -return block-offset + (host - block-host); +*ram_addr = block-offset + (host - block-host); +return 0; } } +return -1; +} -fprintf(stderr, Bad ram pointer %p\n, ptr); -abort(); +/* Some of the softmmu routines need to translate from a host pointer + (typically a TLB entry) back to a ram offset. */ +ram_addr_t qemu_ram_addr_from_host(void *ptr) +{ +ram_addr_t ram_addr; -return 0; +if (qemu_ram_addr_from_host_nofail(ptr, ram_addr)) { +fprintf(stderr, Bad ram pointer %p\n, ptr); +abort(); +} +return ram_addr; } static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [patch uq/master 0/8] port qemu-kvm's MCE support
On Tue, Oct 05, 2010 at 11:58:13AM -0700, Chris Wright wrote: * Andreas Färber (andreas.faer...@web.de) wrote: Am 04.10.2010 um 20:54 schrieb Marcelo Tosatti: I assume something went wrong with your cover letter here. It would've been nice to see MCE spelled out or summarized for those of us that don't speak x86. Sorry about that. Will improve on next submission. It would help. The acronym is Machine Check Exception. The patchset should allow (on newer Intel x86 hw with a newer linux kernel) a class of memory errors delivered to the host OS as MCEs to be propagated to the guest OS. Without the patchset, the qemu process assoicated with the memory where the error took place would be killed. With the patchset, qemu can propagate the error into the guest and allow the guest to kill only the process within the guest that is assocated with the memory error. -- Thanks Chris. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch uq/master 5/8] Export qemu_ram_addr_from_host
On 10/05/2010 03:13 PM, Marcelo Tosatti wrote: On Tue, Oct 05, 2010 at 07:57:14AM -0500, Anthony Liguori wrote: On 10/04/2010 01:54 PM, Marcelo Tosatti wrote: To be used by next patches. Signed-off-by: Marcelo Tosattimtosa...@redhat.com Index: qemu/cpu-common.h === --- qemu.orig/cpu-common.h +++ qemu/cpu-common.h @@ -47,6 +47,7 @@ void qemu_ram_free(ram_addr_t addr); /* This should only be used for ram local to a device. */ void *qemu_get_ram_ptr(ram_addr_t addr); /* This should not be used by devices. */ +int do_qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr); This is not a great name for a function. A better way to do this would be to make the existing qemu_ram_addr_from_host() - qemu_ram_addr_from_host_nofail(). It should fail for all callers in tree now, where address from qemu_get_ram_ptr() is saved somewhere. MCE handler is an exception to that. Are you OK with this: I meant the inverse of naming. nofail means something can never fail (because if it does, it aborts). That happens to be the way we currently use that naming convention. An example is qdev_init() vs. qdev_init_nofail(). Regards, Anthony Liguori Index: qemu/cpu-common.h === --- qemu.orig/cpu-common.h +++ qemu/cpu-common.h @@ -47,6 +47,7 @@ void qemu_ram_free(ram_addr_t addr); /* This should only be used for ram local to a device. */ void *qemu_get_ram_ptr(ram_addr_t addr); /* This should not be used by devices. */ +int qemu_ram_addr_from_host_nofail(void *ptr, ram_addr_t *ram_addr); ram_addr_t qemu_ram_addr_from_host(void *ptr); int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read, Index: qemu/exec.c === --- qemu.orig/exec.c +++ qemu/exec.c @@ -2938,23 +2938,31 @@ void *qemu_get_ram_ptr(ram_addr_t addr) return NULL; } -/* Some of the softmmu routines need to translate from a host pointer - (typically a TLB entry) back to a ram offset. */ -ram_addr_t qemu_ram_addr_from_host(void *ptr) +int qemu_ram_addr_from_host_nofail(void *ptr, ram_addr_t *ram_addr) { RAMBlock *block; uint8_t *host = ptr; QLIST_FOREACH(block,ram_list.blocks, next) { if (host - block-host block-length) { -return block-offset + (host - block-host); +*ram_addr = block-offset + (host - block-host); +return 0; } } +return -1; +} -fprintf(stderr, Bad ram pointer %p\n, ptr); -abort(); +/* Some of the softmmu routines need to translate from a host pointer + (typically a TLB entry) back to a ram offset. */ +ram_addr_t qemu_ram_addr_from_host(void *ptr) +{ +ram_addr_t ram_addr; -return 0; +if (qemu_ram_addr_from_host_nofail(ptr,ram_addr)) { +fprintf(stderr, Bad ram pointer %p\n, ptr); +abort(); +} +return ram_addr; } static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] [RFC] KVM test: Change sample control file to allow host kernel install
Use the autotest step engine to conveniently install a host kernel, be it an rpm kernel or a git build. Even though this is standard autotest functionality for quite a while, we haven't integrated it properly so far. Now people testing KVM will have a clean, programatic way to test host kernels. Want to install the kernel from an rpm? Just put it on the control file something like: host_kernel_install = 'rpm' host_kernel_rpm_url = 'http://kojipkgs.fedoraproject.org/packages/kernel/2.6.36/0.32.rc6.git2.fc15/x86_64/kernel-2.6.36-0.32.rc6.git2.fc15.x86_64.rpm' Want to install the kernel from kvm.git, master branch? Just put it on the control file: host_kernel_git_repo = 'git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm.git' host_kernel_git_branch = 'master' host_kernel_config = 'http://your-server.com/config' You can also specify commits and a list of patches to apply in the kernel before it is built. This change moves the bulk of the KVM test code to a function step_test, and the host kernel install is the implementation of the function step_init(). The interesting variables for the user are defined right at the top of the control file, and, by default no attempt to build/install the host kernel will be made. This change was tested using a fedora box, both kvm.git and a recent fedora rpm kernel were used on a F14 host, but it could use more testing from interested parties, hence I'm putting this here and will wait for comments. Signed-off-by: Lucas Meneghel Rodrigues l...@redhat.com --- client/tests/kvm/control | 88 + 1 files changed, 64 insertions(+), 24 deletions(-) diff --git a/client/tests/kvm/control b/client/tests/kvm/control index 63bbe5d..668de8b 100644 --- a/client/tests/kvm/control +++ b/client/tests/kvm/control @@ -21,43 +21,83 @@ For online docs, please refer to http://www.linux-kvm.org/page/KVM-Autotest import sys, os, logging +# set English environment (command output might be localized, need to be safe) +os.environ['LANG'] = 'en_US.UTF-8' # Add the KVM tests dir to the python path kvm_test_dir = os.path.join(os.environ['AUTODIR'],'tests/kvm') sys.path.append(kvm_test_dir) # Now we can import modules inside the KVM tests dir import kvm_utils, kvm_config -# set English environment (command output might be localized, need to be safe) -os.environ['LANG'] = 'en_US.UTF-8' +# Choose the host kernel install mode 'rpm' or 'git' +# If you don't want to install a kernel, keep the below 'default' +host_kernel_install = 'default' +# URL for the kernel package +host_kernel_rpm_url = 'http://kojipkgs.fedoraproject.org/packages/kernel/2.6.36/0.32.rc6.git2.fc15/x86_64/kernel-2.6.36-0.32.rc6.git2.fc15.x86_64.rpm' +# Git repo URL and other git repo relevant data +host_kernel_git_repo = 'git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm.git' +host_kernel_git_branch = 'master' +host_kernel_git_commit = '' +# If you want to apply patches to your tree, make sure you populate the list +# below with the urls of the patches. +host_kernel_patch_list = [] +# URL for the kernel config file (git build method) +host_kernel_config = 'http://your-server.com/config' + + +def step_init(): +job.next_step([step_test]) +if host_kernel_install == 'rpm': +logging.info('Chose to install host kernel through rpm, proceeding') +dst = os.path.join(/tmp, os.path.basename(host_kernel_rpm_url)) +k = utils.get_file(host_kernel_rpm_url, dst) +host_kernel = job.kernel(k) +host_kernel.install(install_vmlinux=False) +job.bootloader.boot_once('autotest') +elif host_kernel_install == 'git': +logging.info('Chose to install host kernel through git, proceeding') +repodir = os.path.join(/tmp, 'kernel_src') +r = kvm_utils.get_git_branch(host_kernel_git_repo, + host_kernel_git_branch, + repodir, + host_kernel_git_commit) +host_kernel = job.kernel(r) +if host_kernel_patch_list: +host_kernel.patch(host_kernel_patch_list) +host_kernel.config(host_kernel_config) +host_kernel.build() +host_kernel.install() +job.bootloader.boot_once('autotest') +else: +logging.info('Chose %s, using the current kernel for the host', + host_kernel_install) -str = + +def step_test(): +str = # This string will be parsed after build.cfg. Make any desired changes to the # build configuration here. For example: #release_tag = 84 - -build_cfg = kvm_config.config() -# As the base test config is quite large, in order to save memory, we use the -# fork_and_parse() method, that creates another parser process and destroys it -# at the end of the parsing, so the memory spent can be given back to the OS. -build_cfg_path = os.path.join(kvm_test_dir, build.cfg) -build_cfg.fork_and_parse(build_cfg_path, str) -if not
BCM5708 performance issues
I'm running two separate KVM hosts that are showing the same network performance issue - KVM-guest is 35% slower than non-KVM (or ESXi for that matter). I'm wondering if the Broadcom NetXtreme II BCM5708 interface is at fault. I've tried all sorts of tweaks and configurations and I've only succeeded in making the network slower. The first box is running ProxMox 1.6. The second is Ubuntu 10.04. The guests on both boxes are 64-bit Ubuntu 10.04 server installs with virtio. ProxMox guest: /usr/bin/kvm -monitor unix:/var/run/qemu-server/104.mon,server,nowait -vnc unix:/var/run/qemu-server/104.vnc,password -pidfile /var/run/qemu-server/104.pid -daemonize -usbdevice tablet -name UbuntuServer -smp sockets=2,cores=2 -nodefaults -boot menu=on -vga cirrus -tdf -k en-us -drive file=/var/lib/vz/images/104/vm-104-disk-2.raw,if=ide,index=3 -drive file=/var/lib/vz/images/104/vm-104-disk-1.raw,if=virtio,index=0,boot=on -m 1024 -net tap,vlan=0,ifname=vmtab104i0,script=/var/lib/qemu-server/bridge-vlan -net nic,vlan=0,model=virtio,macaddr=76:3F:1A:03:6D:6F Ubuntu guest: /usr/bin/kvm -S -M pc-0.12 -enable-kvm -m 1024 -smp 1 -name ubutest -uuid c0537369-fffa-9680-2f29-2e0cc0406561 -chardev socket,id=monitor,path=/var/lib/libvirt/qemu/ubutest.monitor,server,nowait -monitor chardev:monitor -boot c -drive file=/dev/vg/ubutest,if=virtio,index=0,boot=on -net nic,macaddr=52:54:00:35:11:f1,vlan=0,model=virtio,name=virtio.0 -net tap,fd=51,vlan=0,name=tap.0 -chardev pty,id=serial0 -serial chardev:serial0 -parallel none -usb -vnc 0.0.0.0 Netperf looks like this on the Ubuntu host: Recv SendSend Socket Socket Message Elapsed Size SizeSize Time Throughput bytes bytes bytessecs.10^6bits/sec 1048576 16384 1638410.01 941.62 This is what I get on the Ubuntu guest for either setup: Recv SendSend Socket Socket Message Elapsed Size SizeSize Time Throughput bytes bytes bytessecs.10^6bits/sec 1048576 16384 1638410.00 615.65 Thank you for any guidance you can lend. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch uq/master 7/8] MCE: Relay UCR MCE to guest
(2010/10/05 3:54), Marcelo Tosatti wrote: Port qemu-kvm's commit 4b62fff1101a7ad77553147717a8bd3bf79df7ef Author: Huang Ying ying.hu...@intel.com Date: Mon Sep 21 10:43:25 2009 +0800 MCE: Relay UCR MCE to guest UCR (uncorrected recovery) MCE is supported in recent Intel CPUs, where some hardware error such as some memory error can be reported without PCC (processor context corrupted). To recover from such MCE, the corresponding memory will be unmapped, and all processes accessing the memory will be killed via SIGBUS. For KVM, if QEMU/KVM is killed, all guest processes will be killed too. So we relay SIGBUS from host OS to guest system via a UCR MCE injection. Then guest OS can isolate corresponding memory and kill necessary guest processes only. SIGBUS sent to main thread (not VCPU threads) will be broadcast to all VCPU threads as UCR MCE. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com (snip) +static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo, + void *ctx) +{ +#if defined(TARGET_I386) +if (kvm_on_sigbus_vcpu(siginfo-ssi_code, (void *)(intptr_t)siginfo-ssi_addr)) +#endif +sigbus_reraise(); +} + static void qemu_kvm_eat_signal(CPUState *env, int timeout) { struct timespec ts; int r, e; siginfo_t siginfo; sigset_t waitset; +sigset_t chkset; ts.tv_sec = timeout / 1000; ts.tv_nsec = (timeout % 1000) * 100; sigemptyset(waitset); sigaddset(waitset, SIG_IPI); +sigaddset(waitset, SIGBUS); -qemu_mutex_unlock(qemu_global_mutex); -r = sigtimedwait(waitset, siginfo, ts); -e = errno; -qemu_mutex_lock(qemu_global_mutex); +do { +qemu_mutex_unlock(qemu_global_mutex); -if (r == -1 !(e == EAGAIN || e == EINTR)) { -fprintf(stderr, sigtimedwait: %s\n, strerror(e)); -exit(1); -} +r = sigtimedwait(waitset, siginfo, ts); +e = errno; + +qemu_mutex_lock(qemu_global_mutex); + +if (r == -1 !(e == EAGAIN || e == EINTR)) { +fprintf(stderr, sigtimedwait: %s\n, strerror(e)); +exit(1); +} + +switch (r) { +case SIGBUS: +#ifdef TARGET_I386 +if (kvm_on_sigbus(env, siginfo.si_code, siginfo.si_addr)) +#endif +sigbus_reraise(); +break; +default: +break; +} + +r = sigpending(chkset); +if (r == -1) { +fprintf(stderr, sigpending: %s\n, strerror(e)); +exit(1); +} +} while (sigismember(chkset, SIG_IPI) || sigismember(chkset, SIGBUS)); } static void qemu_kvm_wait_io_event(CPUState *env) (snip) Index: qemu/kvm.h === --- qemu.orig/kvm.h +++ qemu/kvm.h @@ -110,6 +110,9 @@ int kvm_arch_init_vcpu(CPUState *env); void kvm_arch_reset_vcpu(CPUState *env); +int kvm_on_sigbus(CPUState *env, int code, void *addr); +int kvm_on_sigbus_vcpu(int code, void *addr); + struct kvm_guest_debug; struct kvm_debug_exit_arch; So kvm_on_sigbus() is called from qemu_kvm_eat_signal() that is called on vcpu thread, while kvm_on_sigbus_vcpu() is called via sigbus_handler that invoked on iothread using signalfd. ... Inverse naming? Thanks, H.Seto -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: BCM5708 performance issues
* Pete Ashdown (pashd...@xmission.com) wrote: ProxMox guest: /usr/bin/kvm -monitor unix:/var/run/qemu-server/104.mon,server,nowait -vnc unix:/var/run/qemu-server/104.vnc,password -pidfile /var/run/qemu-server/104.pid -daemonize -usbdevice tablet -name UbuntuServer -smp sockets=2,cores=2 -nodefaults -boot menu=on -vga cirrus -tdf -k en-us -drive file=/var/lib/vz/images/104/vm-104-disk-2.raw,if=ide,index=3 -drive file=/var/lib/vz/images/104/vm-104-disk-1.raw,if=virtio,index=0,boot=on -m 1024 -net tap,vlan=0,ifname=vmtab104i0,script=/var/lib/qemu-server/bridge-vlan -net nic,vlan=0,model=virtio,macaddr=76:3F:1A:03:6D:6F Ubuntu guest: /usr/bin/kvm -S -M pc-0.12 -enable-kvm -m 1024 -smp 1 -name ubutest -uuid c0537369-fffa-9680-2f29-2e0cc0406561 -chardev socket,id=monitor,path=/var/lib/libvirt/qemu/ubutest.monitor,server,nowait -monitor chardev:monitor -boot c -drive file=/dev/vg/ubutest,if=virtio,index=0,boot=on -net nic,macaddr=52:54:00:35:11:f1,vlan=0,model=virtio,name=virtio.0 -net tap,fd=51,vlan=0,name=tap.0 -chardev pty,id=serial0 -serial chardev:serial0 -parallel none -usb -vnc 0.0.0.0 Not sure what userspace you are using, but you are probably not getting any of the useful offload features set. Checking ethtool -k $ETH in the guest will verify that. Try changing this: -net nic,macaddr=52:54:00:35:11:f1,vlan=0,model=virtio,name=virtio.0 \ -net tap,fd=51,vlan=0,name=tap.0 to use newer syntax: -netdev type=tap,id=netdev0 -device virtio-net-pci,mac=52:54:00:35:11:f1,netdev=netdev0 With just a 1Gb link, you should see line rate from guest via virtio. thanks, -chris -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch uq/master 7/8] MCE: Relay UCR MCE to guest
I got some more question: (2010/10/05 3:54), Marcelo Tosatti wrote: Index: qemu/target-i386/cpu.h === --- qemu.orig/target-i386/cpu.h +++ qemu/target-i386/cpu.h @@ -250,16 +250,32 @@ #define PG_ERROR_RSVD_MASK 0x08 #define PG_ERROR_I_D_MASK 0x10 -#define MCG_CTL_P(1UL8) /* MCG_CAP register available */ +#define MCG_CTL_P(1ULL8) /* MCG_CAP register available */ +#define MCG_SER_P(1ULL24) /* MCA recovery/new status bits */ -#define MCE_CAP_DEF MCG_CTL_P +#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P) #define MCE_BANKS_DEF10 It seems that current kvm doesn't support SER_P, so injecting SRAO to guest will mean that guest receives VAL|UC|!PCC and RIPV event from virtual processor that doesn't have SER_P. I think most OSes don't expect that it can receives MCE with !PCC on traditional x86 processor without SER_P. Q1: Is it safe to expect that guests can handle such !PCC event? Q2: What is the expected behavior on the guest? Q3: What happen if guest reboots itself in response to the MCE? Thanks, H.Seto -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 8 NIC limit - patch - places limit at 32
Attached is a patch that allows qemu to have up to 32 NICs, without using the qdev -device method. max_nics.patch Description: Binary data
Re: [PATCH 09/18] KVM test: Add a subtest of load/unload nic driver
On Mon, Sep 27, 2010 at 06:43:55PM -0400, Lucas Meneghel Rodrigues wrote: Repeatedly load/unload nic driver, try to transfer file between guest and host by threads at the same time, and check the md5sum. Changes from v1: - Use a new method to get nic driver name - Use utils.hash_file() to get md5sum Signed-off-by: Amos Kong ak...@redhat.com Signed-off-by: Lucas Meneghel Rodrigues l...@redhat.com --- client/tests/kvm/tests/nicdriver_unload.py | 115 client/tests/kvm/tests_base.cfg.sample | 10 ++- 2 files changed, 124 insertions(+), 1 deletions(-) create mode 100644 client/tests/kvm/tests/nicdriver_unload.py diff --git a/client/tests/kvm/tests/nicdriver_unload.py b/client/tests/kvm/tests/nicdriver_unload.py new file mode 100644 index 000..0a39815 --- /dev/null +++ b/client/tests/kvm/tests/nicdriver_unload.py @@ -0,0 +1,115 @@ +import logging, threading, os +from autotest_lib.client.common_lib import error +from autotest_lib.client.bin import utils +import kvm_utils, kvm_test_utils + +def run_nicdriver_unload(test, params, env): + +Test nic driver. + +1) Boot a VM. +2) Get the NIC driver name. +3) Repeatedly unload/load NIC driver. +4) Multi-session TCP transfer on test interface. +5) Check whether the test interface should still work. + +@param test: KVM test object. +@param params: Dictionary with the test parameters. +@param env: Dictionary with test environment. + +timeout = int(params.get(login_timeout, 360)) +vm = kvm_test_utils.get_living_vm(env, params.get(main_vm)) +session = kvm_test_utils.wait_for_login(vm, timeout=timeout) +logging.info(Trying to log into guest '%s' by serial, vm.name) +session2 = kvm_utils.wait_for(lambda: vm.serial_login(), + timeout, 0, step=2) +if not session2: +raise error.TestFail(Could not log into guest '%s' % vm.name) + +ethname = kvm_test_utils.get_linux_ifname(session, vm.get_mac_address(0)) +sys_path = /sys/class/net/%s/device/driver % (ethname) +s, o = session.get_command_status_output('readlink -e %s' % sys_path) +if s: +raise error.TestError(Could not find driver name) +driver = os.path.basename(o.strip()) +logging.info(driver is %s, driver) + +class ThreadScp(threading.Thread): +def run(self): +remote_file = '/tmp/' + self.getName() +file_list.append(remote_file) +ret = vm.copy_files_to(file_name, remote_file, timeout=scp_timeout) +if ret: +logging.debug(File %s was transfered successfuly, remote_file) +else: +logging.debug(Failed to transfer file %s, remote_file) + +def compare(origin_file, receive_file): +cmd = md5sum %s +check_sum1 = utils.hash_file(origin_file, method=md5) +s, output2 = session.get_command_status_output(cmd % receive_file) +if s != 0: +logging.error(Could not get md5sum of receive_file) +return False +check_sum2 = output2.strip().split()[0] +logging.debug(original file md5: %s, received file md5: %s, + check_sum1, check_sum2) +if check_sum1 != check_sum2: +logging.error(MD5 hash of origin and received files doesn't match) +return False +return True + +#produce sized file in host +file_size = params.get(file_size) +file_name = /tmp/nicdriver_unload_file +cmd = dd if=/dev/urandom of=%s bs=%sM count=1 +utils.system(cmd % (file_name, file_size)) + +file_list = [] +connect_time = params.get(connect_time) +scp_timeout = int(params.get(scp_timeout)) +thread_num = int(params.get(thread_num)) +unload_load_cmd = (sleep %s ifconfig %s down modprobe -r %s + sleep 1 modprobe %s ifconfig %s up % Need sleep some seconds (eg. 4s) between 'modprobe ..' and 'ifconfig .. up'. Because, the creation of interface maybe not complete when command('modprobe ..') returns. unload_load_cmd = (sleep %s ifconfig %s down modprobe -r %s sleep 1 modprobe %s sleep 4 ifconfig %s up % btw, lucas I added comments to nicdriver_unload, vlan, multicast, ethtool. I also tested other patchsets, they are all fine. + (connect_time, ethname, driver, driver, ethname)) +pid = os.fork() +if pid != 0: +logging.info(Unload/load NIC driver repeatedly in guest...) +while True: +logging.debug(Try to unload/load nic drive once) +if session2.get_command_status(unload_load_cmd, timeout=120) != 0: +session.get_command_output(rm -rf /tmp/Thread-*) +raise error.TestFail(Unload/load nic driver failed) +pid, s = os.waitpid(pid, os.WNOHANG) +
[PATCH] KVM: PPC: e500: Call kvm_vcpu_uninit() before kvmppc_e500_tlb_uninit().
The VCPU uninit calls some TLB functions, and the TLB uninit function frees the memory used by them. Signed-off-by: Scott Wood scottw...@freescale.com --- arch/powerpc/kvm/e500.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 71750f2..e3768ee 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -138,8 +138,8 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); free_page((unsigned long)vcpu-arch.shared); - kvmppc_e500_tlb_uninit(vcpu_e500); kvm_vcpu_uninit(vcpu); + kvmppc_e500_tlb_uninit(vcpu_e500); kmem_cache_free(kvm_vcpu_cache, vcpu_e500); } -- 1.7.0.4 -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: PPC: e500: Call kvm_vcpu_uninit() before kvmppc_e500_tlb_uninit().
On 05.10.2010, at 21:22, Scott Wood wrote: The VCPU uninit calls some TLB functions, and the TLB uninit function frees the memory used by them. Liu, this is your code. Please sign it off if you think the change is correct. Alex -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html