[COMMIT master] KVM: x86 emulator: Report unhandled instructions
From: Mohammed Gamal m.gamal...@gmail.com Report unhandled instructions in the syslog on emulation failure Signed-off-by: Mohammed Gamal m.gamal...@gmail.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1f0ff4a..3d6a562 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2194,6 +2194,7 @@ writeback: done: if (rc == X86EMUL_UNHANDLEABLE) { + kvm_report_emulation_failure(ctxt-vcpu, unhandled instruction); c-eip = saved_eip; return -1; } @@ -2467,7 +2468,7 @@ twobyte_insn: goto writeback; cannot_emulate: - DPRINTF(Cannot emulate %02x\n, c-b); + kvm_report_emulation_failure(ctxt-vcpu, unhandled instruction); c-eip = saved_eip; return -1; } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: x86 emulator: Introduce No64 decode option
From: Mohammed Gamal m.gamal...@gmail.com Introduces a new decode option No64, which is used for instructions that are invalid in long mode. Signed-off-by: Mohammed Gamal m.gamal...@gmail.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1cdfec5..1f0ff4a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -75,6 +75,8 @@ #define Group (114) /* Bits 3:5 of modrm byte extend opcode */ #define GroupDual (115) /* Alternate decoding of mod == 3 */ #define GroupMask 0xff/* Group number stored in bits 0:7 */ +/* Misc flags */ +#define No64 (128) /* Source 2 operand type */ #define Src2None(029) #define Src2CL (129) @@ -93,21 +95,21 @@ static u32 opcode_table[256] = { ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, - ImplicitOps | Stack, ImplicitOps | Stack, + ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, /* 0x08 - 0x0F */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, - 0, 0, ImplicitOps | Stack, 0, + 0, 0, ImplicitOps | Stack | No64, 0, /* 0x10 - 0x17 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, - ImplicitOps | Stack, ImplicitOps | Stack, + ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, /* 0x18 - 0x1F */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, - ImplicitOps | Stack, ImplicitOps | Stack, + ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, /* 0x20 - 0x27 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, @@ -161,7 +163,7 @@ static u32 opcode_table[256] = { /* 0x90 - 0x97 */ DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, /* 0x98 - 0x9F */ - 0, 0, SrcImm | Src2Imm16, 0, + 0, 0, SrcImm | Src2Imm16 | No64, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, /* 0xA0 - 0xA7 */ ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, @@ -188,7 +190,7 @@ static u32 opcode_table[256] = { ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, /* 0xC8 - 0xCF */ 0, 0, 0, ImplicitOps | Stack, - ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps, + ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps, /* 0xD0 - 0xD7 */ ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, @@ -201,7 +203,7 @@ static u32 opcode_table[256] = { ByteOp | SrcImmUByte, SrcImmUByte, /* 0xE8 - 0xEF */ SrcImm | Stack, SrcImm | ImplicitOps, - SrcImmU | Src2Imm16, SrcImmByte | ImplicitOps, + SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* 0xF0 - 0xF7 */ @@ -967,6 +969,11 @@ done_prefixes: } } + if (mode == X86EMUL_MODE_PROT64 (c-d No64)) { + kvm_report_emulation_failure(ctxt-vcpu, invalid x86/64 instruction);; + return -1; + } + if (c-d Group) { group = c-d GroupMask; c-modrm = insn_fetch(u8, 1, c-eip); @@ -1739,15 +1746,9 @@ special_insn: emulate_2op_SrcV(add, c-src, c-dst, ctxt-eflags); break; case 0x06: /* push es */ - if (ctxt-mode == X86EMUL_MODE_PROT64) - goto cannot_emulate; - emulate_push_sreg(ctxt, VCPU_SREG_ES); break; case 0x07: /* pop es */ -if (ctxt-mode == X86EMUL_MODE_PROT64) -goto cannot_emulate; - rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); if (rc != 0) goto done; @@ -1757,9 +1758,6 @@ special_insn: emulate_2op_SrcV(or, c-src, c-dst, ctxt-eflags); break; case 0x0e: /* push cs */ -if (ctxt-mode == X86EMUL_MODE_PROT64) -goto cannot_emulate; - emulate_push_sreg(ctxt, VCPU_SREG_CS); break; case 0x10 ... 0x15: @@ -1767,15 +1765,9 @@ special_insn: emulate_2op_SrcV(adc, c-src, c-dst, ctxt-eflags);
[COMMIT master] KVM: Don't pass kvm_run arguments
From: Avi Kivity a...@redhat.com They're just copies of vcpu-run, which is readily accessible. Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 33901be..b080590 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -509,8 +509,8 @@ struct kvm_x86_ops { void (*tlb_flush)(struct kvm_vcpu *vcpu); - void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); - int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); + void (*run)(struct kvm_vcpu *vcpu); + int (*handle_exit)(struct kvm_vcpu *vcpu); void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); @@ -571,7 +571,7 @@ enum emulation_result { #define EMULTYPE_NO_DECODE (1 0) #define EMULTYPE_TRAP_UD (1 1) #define EMULTYPE_SKIP (1 2) -int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, +int emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, u16 error_code, int emulation_type); void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); @@ -588,9 +588,9 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); struct x86_emulate_ctxt; -int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, +int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port); -int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, +int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, int size, unsigned long count, int down, gva_t address, int rep, unsigned port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 3d6a562..15593e8 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1826,7 +1826,7 @@ special_insn: break; case 0x6c: /* insb */ case 0x6d: /* insw/insd */ -if (kvm_emulate_pio_string(ctxt-vcpu, NULL, +if (kvm_emulate_pio_string(ctxt-vcpu, 1, (c-d ByteOp) ? 1 : c-op_bytes, c-rep_prefix ? @@ -1842,7 +1842,7 @@ special_insn: return 0; case 0x6e: /* outsb */ case 0x6f: /* outsw/outsd */ - if (kvm_emulate_pio_string(ctxt-vcpu, NULL, + if (kvm_emulate_pio_string(ctxt-vcpu, 0, (c-d ByteOp) ? 1 : c-op_bytes, c-rep_prefix ? @@ -2135,7 +2135,7 @@ special_insn: case 0xef: /* out (e/r)ax,dx */ port = c-regs[VCPU_REGS_RDX]; io_dir_in = 0; - do_io: if (kvm_emulate_pio(ctxt-vcpu, NULL, io_dir_in, + do_io: if (kvm_emulate_pio(ctxt-vcpu, io_dir_in, (c-d ByteOp) ? 1 : c-op_bytes, port) != 0) { c-eip = saved_eip; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6f38178..ffd3c97 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2734,7 +2734,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) if (r) goto out; - er = emulate_instruction(vcpu, vcpu-run, cr2, error_code, 0); + er = emulate_instruction(vcpu, cr2, error_code, 0); switch (er) { case EMULATE_DONE: diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7853dd3..2df9b45 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -286,7 +286,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (!svm-next_rip) { - if (emulate_instruction(vcpu, vcpu-run, 0, 0, EMULTYPE_SKIP) != + if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != EMULATE_DONE) printk(KERN_DEBUG %s: NOP\n, __func__); return; @@ -1178,7 +1178,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, } } -static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int pf_interception(struct vcpu_svm *svm) { u64 fault_address; u32 error_code; @@ -1192,8 +1192,10 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return kvm_mmu_page_fault(svm-vcpu, fault_address, error_code); } -static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int
Re: [PATCH][RESEND] x86 emulator: Add 'push/pop sreg' instructions
On 08/23/2009 02:24 PM, Mohammed Gamal wrote: Signed-off-by: Mohammed Gamalm.gamal...@gmail.com Applied, thanks. +static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, +struct x86_emulate_ops *ops, int seg) +{ + struct decode_cache *c =ctxt-decode; + u16 selector; + int rc; + + rc = emulate_pop(ctxt, ops,selector, c-op_bytes); This overflows the stack. I changed 'selector' to be unsigned long to fix this. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][RESEND] x86 emulator: Introduce No64 decode option
On 08/23/2009 02:24 PM, Mohammed Gamal wrote: Introduces a new decode option No64, which is used for instructions that are invalid in long mode. Applied, thanks. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][RESEND] x86 emulator: Report unhandled instructions
On 08/23/2009 02:24 PM, Mohammed Gamal wrote: Report unhandled instructions in the syslog on emulation failure Applied, thanks. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][RESEND] Add push/pop instructions test in test harness
On 08/23/2009 02:24 PM, Mohammed Gamal wrote: Signed-off-by: Mohammed Gamalm.gamal...@gmail.com Applied, thanks. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] VMX: Return to userspace on invalid state emulation failure
On 08/24/2009 07:07 AM, Mohammed Gamal wrote: Return to userspace instead of repeatedly trying to emulate instructions that have already failed Signed-off-by: Mohammed Gamalm.gamal...@gmail.com --- arch/x86/kvm/vmx.c |5 - 1 files changed, 4 insertions(+), 1 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1ee811c..6030671 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3341,6 +3341,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, if (err != EMULATE_DONE) { kvm_report_emulation_failure(vcpu, emulation failure); + kvm_run-exit_reason = KVM_EXIT_INTERNAL_ERROR; + kvm_run-internal.suberror = KVM_INTERNAL_ERROR_EMULATION; break; } @@ -3612,7 +3614,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vmx-entry_time = ktime_get(); /* Handle invalid guest state instead of entering VMX */ - if (vmx-emulation_required emulate_invalid_guest_state) { + if (vmx-emulation_required emulate_invalid_guest_state + kvm_run-internal.suberror != KVM_INTERNAL_ERROR_EMULATION) { handle_invalid_guest_state(vcpu, kvm_run); return; } kvm_run-internal.suberror is an uninitialized variable and can contain any value. You need a different communication channel here. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2351676 ] Guests hang periodically on Ubuntu-8.10
Bugs item #2351676, was opened at 2008-11-26 19:59 Message generated for change (Comment added) made by z-image You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2351676group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Open Resolution: None Priority: 5 Private: No Submitted By: Chris Jones (c_jones) Assigned to: Nobody/Anonymous (nobody) Summary: Guests hang periodically on Ubuntu-8.10 Initial Comment: I'm seeing periodic hangs on my guests. I've been unable so far to find a trigger - they always boot fine, but after anywhere from 10 minutes to 24 hours they eventually hang completely. My setup: * AMD Athlon X2 4850e (2500 MHz dual core) * 4Gig memory * Ubuntu 8.10 server, 64-bit * KVMs tried: : kvm-72 (shipped with ubuntu) : kvm-79 (built myself, --patched-kernel option) * Kernels tried: : 2.6.27.7 (kernel.org, self built) : 2.6.27-7-server from Ubuntu 8.10 distribution In guests * Ubuntu 8.10 server, 64-bit (virtual machine install) * kernel 2.6.27-7-server from Ubuntu 8.10 I'm running the guests like: sudo /usr/local/bin/qemu-system-x86_64\ -daemonize \ -no-kvm-irqchip\ -hda Imgs/ndev_root.img\ -m 1024\ -cdrom ISOs/ubuntu-8.10-server-amd64.iso \ -vnc :4\ -net nic,macaddr=DE:AD:BE:EF:04:04,model=e1000 \ -net tap,ifname=tap4,script=/home/chris/kvm/qemu-ifup.sh The problem does not happen if I use -no-kvm. I've tried some other options that have no effect: -no-kvm-pit -no-acpi The disk images are raw format. When the guests hang, I cannot ping them, and the vnc console us hung. The qemu monitor is still accessible, and the guests recover if I issue a system_reset command from the monitor. However, often, the console will not take keyboard after doing so. When the guest is hung, kvm_stat shows all 0s for the counters: efer_relo exits fpu_reloa halt_exit halt_wake host_stat hypercall +insn_emul insn_emul invlpg io_exits irq_exits irq_windo largepage +mmio_exit mmu_cache mmu_flood mmu_pde_z mmu_pte_u mmu_pte_w mmu_recyc +mmu_shado nmi_windo pf_fixed pf_guest remote_tl request_i signal_ex +tlb_flush 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 +0 0 0 0 0 0 gdb shows two threads - both waiting: c(gdb) info threads 2 Thread 0x414f1950 (LWP 422) 0x7f36f07a03e1 in sigtimedwait () from /lib/libc.so.6 1 Thread 0x7f36f1f306e0 (LWP 414) 0x7f36f084b482 in select () from /lib/libc.so.6 (gdb) thread 1 [Switching to thread 1 (Thread 0x7f36f1f306e0 (LWP 414))]#0 0x7f36f084b482 +in select () from /lib/libc.so.6 (gdb) bt #0 0x7f36f084b482 in select () from /lib/libc.so.6 #1 0x004094cb in main_loop_wait (timeout=0) at /home/chris/pkgs/kvm/kvm-79/qemu/vl.c:4719 #2 0x0050a7ea in kvm_main_loop () at /home/chris/pkgs/kvm/kvm-79/qemu/qemu-kvm.c:619 #3 0x0040fafc in main (argc=value optimized out, argv=0x79f41948) at /home/chris/pkgs/kvm/kvm-79/qemu/vl.c:4871 (gdb) thread 2 [Switching to thread 2 (Thread 0x414f1950 (LWP 422))]#0 0x7f36f07a03e1 in +sigtimedwait () from /lib/libc.so.6 (gdb) bt #0 0x7f36f07a03e1 in sigtimedwait () from /lib/libc.so.6 #1 0x0050a560 in kvm_main_loop_wait (env=0xc319e0, timeout=0) at /home/chris/pkgs/kvm/kvm-79/qemu/qemu-kvm.c:284 #2 0x0050aaf7 in ap_main_loop (_env=value optimized out) at /home/chris/pkgs/kvm/kvm-79/qemu/qemu-kvm.c:425 #3 0x7f36f11ba3ea in start_thread () from /lib/libpthread.so.0 #4 0x7f36f0852c6d in clone () from /lib/libc.so.6 #5 0x in ?? () Any clues to help me resolve this would be much appreciated. -- Comment By: Teodor Milkov (z-image) Date: 2009-08-24 10:45 Message: With 2.6.31-rc6 it is running fine for almost 72 hours. Looks like the problem is gone in 2.6.31. -- Comment By: Teodor Milkov (z-image) Date: 2009-08-21 11:53 Message: With -no-kvm-pit it is running fine for almost 20 hours. Didn't survive that long without -no-kvm-pit. -- Comment By: Daniel Poelzleithner (poelzi) Date: 2009-08-20 18:20 Message: I'm still in investigation but I got new informations so far. There seem to be diffenerent issues that
vhost net: performance with ping benchmark
At Rusty's suggestion, I tested vhost base performance with ping. Results below, and seem to be what you'd expect. I'm working on TSO support, expect results shortly. latency with ping (lower is better): native: [r...@virtlab17 ~]# ping -c 100 -f -q 21.1.50.4 PING 21.1.50.4 (21.1.50.4) 56(84) bytes of data. --- 21.1.50.4 ping statistics --- 100 packets transmitted, 100 received, 0% packet loss, time 73624ms rtt min/avg/max/mdev = 0.047/0.061/1.253/0.036 ms, ipg/ewma 0.073/0.097 ms vhost: [r...@virtlab17 ~]# ping -c 100 -f -q 20.1.50.4 PING 20.1.50.4 (20.1.50.4) 56(84) bytes of data. --- 20.1.50.4 ping statistics --- 100 packets transmitted, 100 received, 0% packet loss, time 92308ms rtt min/avg/max/mdev = 0.064/0.080/1.062/0.041 ms, ipg/ewma 0.092/0.083 ms userspace: [r...@virtlab17 ~]# ping -c 10 -f -q 20.1.50.4 PING 20.1.50.4 (20.1.50.4) 56(84) bytes of data. --- 20.1.50.4 ping statistics --- 10 packets transmitted, 10 received, 0% packet loss, time 54473ms rtt min/avg/max/mdev = 0.219/0.505/2.342/0.131 ms, ipg/ewma 0.544/0.485 ms Conclusion: for latency difference between native and vhost is about 20usec, userspace is way slower. This basically matches what was observed with venet. ## throughput with ping (lower time is better): native: [r...@virtlab17 ~]# ping -s 1024 -l 120 -c 10 -f -q 20.1.50.2 PING 20.1.50.2 (20.1.50.2) 1024(1052) bytes of data. --- 20.1.50.2 ping statistics --- 10 packets transmitted, 10 received, 0% packet loss, time 3582ms rtt min/avg/max/mdev = 0.105/4.155/5.471/0.471 ms, pipe 120, ipg/ewma 0.035/4.567 ms vhost: [r...@virtlab17 ~]# ping -s 1024 -l 120 -c 10 -f -q 20.1.50.4 PING 20.1.50.4 (20.1.50.4) 1024(1052) bytes of data. --- 20.1.50.4 ping statistics --- 10 packets transmitted, 10 received, 0% packet loss, time 3900ms rtt min/avg/max/mdev = 0.354/4.129/6.009/0.520 ms, pipe 120, ipg/ewma 0.039/3.109 ms userspace: [r...@virtlab17 ~]# ping -s 1024 -l 120 -c 100 -f -q 20.1.50.4 PING 20.1.50.4 (20.1.50.4) 1024(1052) bytes of data. --- 20.1.50.4 ping statistics --- 100 packets transmitted, 999731 received, 0% packet loss, time 45082ms rtt min/avg/max/mdev = 0.299/4.130/8.143/1.094 ms, pipe 120, ipg/ewma 0.045/1.117 ms Conclusion: for throughput vhost is half-way between native and userspace. Again, same thing as was observed with venet. -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 0/9] make interrupt injection lockless (almost)
kvm-irq_lock protects too much stuff, but still fail to protect everything it was design to protect (see ack notifiers call in pic). I want to make IRQ injection fast path as lockless as possible. This patch series removes kvm-irq_lock from irq injection path effectively making interrupt injection to lapic lockless (several kvm_irq_delivery_to_apic() may run in parallel), but access to lapic was never fully locked in the first place. VCPU could access lapic in parallel with interrupt injection. Patches 2-3 changes irq routing data structure to much more efficient one. v1-v2: Drop MSI injection interface (for now). Use irq_lock to protect irq routing and ack notifiers. Splitting irq routing table changes to two patches (+ comments addressed). Drop ioapic/pic lock before calling ack notifiers. v2-v3 Drop patch that changes irq_lock to spinlock. Use mutex for ioapic lock. Do not call ack notifier if there is no GSI mapping. Call pic_clear_isr() after PIC state completely changed. v3-v4 Add patch to move irq sharing information to irqchip level Do not remove call of ack notifiers on pic reset (yet). Call irq-set() function outside of RCU read section Gleb Natapov (9): Call pic_clear_isr() on pic reset to reuse logic there. Move irq sharing information to irqchip level. Change irq routing table to use gsi indexed array. Maintain back mapping from irqchip/pin to gsi. Move irq routing data structure to rcu locking Move irq ack notifier list to arch independent code. Convert irq notifiers lists to RCU locking. Move IO APIC to its own lock. Drop kvm-irq_lock lock from irq injection path. arch/ia64/include/asm/kvm.h |1 + arch/ia64/include/asm/kvm_host.h |1 - arch/ia64/kvm/kvm-ia64.c |9 +-- arch/x86/include/asm/kvm.h |1 + arch/x86/include/asm/kvm_host.h |2 - arch/x86/kvm/i8254.c |2 - arch/x86/kvm/i8259.c | 44 + arch/x86/kvm/irq.h |1 + arch/x86/kvm/lapic.c |7 +- arch/x86/kvm/x86.c | 12 +-- include/linux/kvm_host.h | 20 +++- virt/kvm/eventfd.c |2 - virt/kvm/ioapic.c| 80 +++ virt/kvm/ioapic.h|5 + virt/kvm/irq_comm.c | 212 ++ virt/kvm/kvm_main.c |4 +- 16 files changed, 239 insertions(+), 164 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 3/9] Change irq routing table to use gsi indexed array.
Use gsi indexed array instead of scanning all entries on each interrupt injection. Signed-off-by: Gleb Natapov g...@redhat.com --- include/linux/kvm_host.h | 16 +++-- virt/kvm/irq_comm.c | 88 +++-- virt/kvm/kvm_main.c |1 - 3 files changed, 66 insertions(+), 39 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index beab24b..802c080 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -129,7 +129,17 @@ struct kvm_kernel_irq_routing_entry { } irqchip; struct msi_msg msi; }; - struct list_head link; + struct hlist_node link; +}; + +struct kvm_irq_routing_table { + struct kvm_kernel_irq_routing_entry *rt_entries; + u32 nr_rt_entries; + /* +* Array indexed by gsi. Each entry contains list of irq chips +* the gsi is connected to. +*/ + struct hlist_head map[0]; }; struct kvm { @@ -167,7 +177,7 @@ struct kvm { struct mutex irq_lock; #ifdef CONFIG_HAVE_KVM_IRQCHIP - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */ + struct kvm_irq_routing_table *irq_routing; struct hlist_head mask_notifier_list; #endif @@ -396,7 +406,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, union kvm_ioapic_redirect_entry *entry, unsigned long *deliver_bitmask); #endif -int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); +int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 11aa702..c9cfa70 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -144,10 +144,12 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, * = 0 Interrupt was coalesced (previous irq is still pending) * 0 Number of CPUs interrupt was delivered to */ -int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) +int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) { struct kvm_kernel_irq_routing_entry *e; int ret = -1; + struct kvm_irq_routing_table *irq_rt; + struct hlist_node *n; trace_kvm_set_irq(irq, level, irq_source_id); @@ -157,8 +159,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ - list_for_each_entry(e, kvm-irq_routing, link) - if (e-gsi == irq) { + irq_rt = kvm-irq_routing; + if (irq irq_rt-nr_rt_entries) + hlist_for_each_entry(e, n, irq_rt-map[irq], link) { int r = e-set(e, kvm, irq_source_id, level); if (r 0) continue; @@ -170,20 +173,23 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { - struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_ack_notifier *kian; struct hlist_node *n; unsigned gsi = pin; + int i; trace_kvm_ack_irq(irqchip, pin); - list_for_each_entry(e, kvm-irq_routing, link) + for (i = 0; i kvm-irq_routing-nr_rt_entries; i++) { + struct kvm_kernel_irq_routing_entry *e; + e = kvm-irq_routing-rt_entries[i]; if (e-type == KVM_IRQ_ROUTING_IRQCHIP e-irqchip.irqchip == irqchip e-irqchip.pin == pin) { gsi = e-gsi; break; } + } hlist_for_each_entry(kian, n, kvm-arch.irq_ack_notifier_list, link) if (kian-gsi == gsi) @@ -278,26 +284,30 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) kimn-func(kimn, mask); } -static void __kvm_free_irq_routing(struct list_head *irq_routing) -{ - struct kvm_kernel_irq_routing_entry *e, *n; - - list_for_each_entry_safe(e, n, irq_routing, link) - kfree(e); -} - void kvm_free_irq_routing(struct kvm *kvm) { mutex_lock(kvm-irq_lock); - __kvm_free_irq_routing(kvm-irq_routing); + kfree(kvm-irq_routing); mutex_unlock(kvm-irq_lock); } -static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, +static int setup_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; int
[PATCH v4 2/9] Move irq sharing information to irqchip level.
This removes assumptions that max GSIs is smaller than number of pins. Sharing is tracked on pin level not GSI level. Signed-off-by: Gleb Natapov g...@redhat.com --- arch/x86/include/asm/kvm_host.h |1 - arch/x86/kvm/irq.h |1 + include/linux/kvm_host.h|2 +- virt/kvm/ioapic.h |1 + virt/kvm/irq_comm.c | 57 +++--- 5 files changed, 37 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 33901be..6b02f86 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -413,7 +413,6 @@ struct kvm_arch{ gpa_t ept_identity_map_addr; unsigned long irq_sources_bitmap; - unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; u64 vm_init_tsc; }; diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 7d6058a..c025a23 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -71,6 +71,7 @@ struct kvm_pic { int output; /* intr from master PIC */ struct kvm_io_device dev; void (*ack_notifier)(void *opaque, int irq); + unsigned long irq_states[16]; }; struct kvm_pic *kvm_create_pic(struct kvm *kvm); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f814512..beab24b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -121,7 +121,7 @@ struct kvm_kernel_irq_routing_entry { u32 gsi; u32 type; int (*set)(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level); + struct kvm *kvm, int irq_source_id, int level); union { struct { unsigned irqchip; diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 7080b71..6e461ad 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -41,6 +41,7 @@ struct kvm_ioapic { u32 irr; u32 pad; union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; + unsigned long irq_states[IOAPIC_NUM_PINS]; struct kvm_io_device dev; struct kvm *kvm; void (*ack_notifier)(void *opaque, int irq); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 001663f..11aa702 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -31,20 +31,39 @@ #include ioapic.h +static inline int kvm_irq_line_state(unsigned long *irq_state, +int irq_source_id, int level) +{ + /* Logical OR for level trig interrupt */ + if (level) + set_bit(irq_source_id, irq_state); + else + clear_bit(irq_source_id, irq_state); + + return !!(*irq_state); +} + static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) + struct kvm *kvm, int irq_source_id, int level) { #ifdef CONFIG_X86 - return kvm_pic_set_irq(pic_irqchip(kvm), e-irqchip.pin, level); + struct kvm_pic *pic = pic_irqchip(kvm); + level = kvm_irq_line_state(pic-irq_states[e-irqchip.pin], + irq_source_id, level); + return kvm_pic_set_irq(pic, e-irqchip.pin, level); #else return -1; #endif } static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) + struct kvm *kvm, int irq_source_id, int level) { - return kvm_ioapic_set_irq(kvm-arch.vioapic, e-irqchip.pin, level); + struct kvm_ioapic *ioapic = kvm-arch.vioapic; + level = kvm_irq_line_state(ioapic-irq_states[e-irqchip.pin], + irq_source_id, level); + + return kvm_ioapic_set_irq(ioapic, e-irqchip.pin, level); } inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) @@ -96,10 +115,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, } static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) + struct kvm *kvm, int irq_source_id, int level) { struct kvm_lapic_irq irq; + if (!level) + return -1; + trace_kvm_msi_set_irq(e-msi.address_lo, e-msi.data); irq.dest_id = (e-msi.address_lo @@ -125,34 +147,19 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) { struct kvm_kernel_irq_routing_entry *e; - unsigned long *irq_state, sig_level; int ret = -1; trace_kvm_set_irq(irq, level, irq_source_id); WARN_ON(!mutex_is_locked(kvm-irq_lock)); - if (irq KVM_IOAPIC_NUM_PINS) { - irq_state = (unsigned long *)kvm-arch.irq_states[irq]; - - /* Logical OR for level trig interrupt */ - if (level) -
[PATCH v4 1/9] Call pic_clear_isr() on pic reset to reuse logic there.
Also move call of ack notifiers after pic state change. Signed-off-by: Gleb Natapov g...@redhat.com --- arch/x86/kvm/i8259.c | 22 +- 1 files changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 01f1516..ccc941a 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -225,22 +225,11 @@ int kvm_pic_read_irq(struct kvm *kvm) void kvm_pic_reset(struct kvm_kpic_state *s) { - int irq, irqbase, n; + int irq; struct kvm *kvm = s-pics_state-irq_request_opaque; struct kvm_vcpu *vcpu0 = kvm-bsp_vcpu; + u8 irr = s-irr, isr = s-imr; - if (s == s-pics_state-pics[0]) - irqbase = 0; - else - irqbase = 8; - - for (irq = 0; irq PIC_NUM_PINS/2; irq++) { - if (vcpu0 kvm_apic_accept_pic_intr(vcpu0)) - if (s-irr (1 irq) || s-isr (1 irq)) { - n = irq + irqbase; - kvm_notify_acked_irq(kvm, SELECT_PIC(n), n); - } - } s-last_irr = 0; s-irr = 0; s-imr = 0; @@ -256,6 +245,13 @@ void kvm_pic_reset(struct kvm_kpic_state *s) s-rotate_on_auto_eoi = 0; s-special_fully_nested_mode = 0; s-init4 = 0; + + for (irq = 0; irq PIC_NUM_PINS/2; irq++) { + if (vcpu0 kvm_apic_accept_pic_intr(vcpu0)) + if (irr (1 irq) || isr (1 irq)) { + pic_clear_isr(s, irq); + } + } } static void pic_ioport_write(void *opaque, u32 addr, u32 val) -- 1.6.3.3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 4/9] Maintain back mapping from irqchip/pin to gsi.
Maintain back mapping from irqchip/pin to gsi to speedup interrupt acknowledgment notifications. Signed-off-by: Gleb Natapov g...@redhat.com --- arch/ia64/include/asm/kvm.h |1 + arch/x86/include/asm/kvm.h |1 + include/linux/kvm_host.h|1 + virt/kvm/irq_comm.c | 31 ++- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h index 18a7e49..bc90c75 100644 --- a/arch/ia64/include/asm/kvm.h +++ b/arch/ia64/include/asm/kvm.h @@ -60,6 +60,7 @@ struct kvm_ioapic_state { #define KVM_IRQCHIP_PIC_MASTER 0 #define KVM_IRQCHIP_PIC_SLAVE1 #define KVM_IRQCHIP_IOAPIC 2 +#define KVM_NR_IRQCHIPS 3 #define KVM_CONTEXT_SIZE 8*1024 diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 4a5fe91..f02e87a 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -79,6 +79,7 @@ struct kvm_ioapic_state { #define KVM_IRQCHIP_PIC_MASTER 0 #define KVM_IRQCHIP_PIC_SLAVE1 #define KVM_IRQCHIP_IOAPIC 2 +#define KVM_NR_IRQCHIPS 3 /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 802c080..00e4762 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -133,6 +133,7 @@ struct kvm_kernel_irq_routing_entry { }; struct kvm_irq_routing_table { + int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS]; struct kvm_kernel_irq_routing_entry *rt_entries; u32 nr_rt_entries; /* diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index c9cfa70..6a8434d 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -175,25 +175,16 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_ack_notifier *kian; struct hlist_node *n; - unsigned gsi = pin; - int i; + int gsi; trace_kvm_ack_irq(irqchip, pin); - for (i = 0; i kvm-irq_routing-nr_rt_entries; i++) { - struct kvm_kernel_irq_routing_entry *e; - e = kvm-irq_routing-rt_entries[i]; - if (e-type == KVM_IRQ_ROUTING_IRQCHIP - e-irqchip.irqchip == irqchip - e-irqchip.pin == pin) { - gsi = e-gsi; - break; - } - } - - hlist_for_each_entry(kian, n, kvm-arch.irq_ack_notifier_list, link) - if (kian-gsi == gsi) - kian-irq_acked(kian); + gsi = kvm-irq_routing-chip[irqchip][pin]; + if (gsi != -1) + hlist_for_each_entry(kian, n, kvm-arch.irq_ack_notifier_list, +link) + if (kian-gsi == gsi) + kian-irq_acked(kian); } void kvm_register_irq_ack_notifier(struct kvm *kvm, @@ -330,6 +321,9 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, } e-irqchip.irqchip = ue-u.irqchip.irqchip; e-irqchip.pin = ue-u.irqchip.pin + delta; + if (e-irqchip.pin = KVM_IOAPIC_NUM_PINS) + goto out; + rt-chip[ue-u.irqchip.irqchip][e-irqchip.pin] = ue-gsi; break; case KVM_IRQ_ROUTING_MSI: e-set = kvm_set_msi; @@ -354,7 +348,7 @@ int kvm_set_irq_routing(struct kvm *kvm, unsigned flags) { struct kvm_irq_routing_table *new, *old; - u32 i, nr_rt_entries = 0; + u32 i, j, nr_rt_entries = 0; int r; for (i = 0; i nr; ++i) { @@ -375,6 +369,9 @@ int kvm_set_irq_routing(struct kvm *kvm, new-rt_entries = (void *)new-map[nr_rt_entries]; new-nr_rt_entries = nr_rt_entries; + for (i = 0; i 3; i++) + for (j = 0; j KVM_IOAPIC_NUM_PINS; j++) + new-chip[i][j] = -1; for (i = 0; i nr; ++i) { r = -EINVAL; -- 1.6.3.3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 5/9] Move irq routing data structure to rcu locking
Signed-off-by: Gleb Natapov g...@redhat.com --- virt/kvm/irq_comm.c | 16 +++- 1 files changed, 11 insertions(+), 5 deletions(-) diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 6a8434d..8350050 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -159,7 +159,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ - irq_rt = kvm-irq_routing; + rcu_read_lock(); + irq_rt = rcu_dereference(kvm-irq_routing); if (irq irq_rt-nr_rt_entries) hlist_for_each_entry(e, n, irq_rt-map[irq], link) { int r = e-set(e, kvm, irq_source_id, level); @@ -168,6 +169,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) ret = r + ((ret 0) ? 0 : ret); } + rcu_read_unlock(); return ret; } @@ -179,7 +181,10 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) trace_kvm_ack_irq(irqchip, pin); - gsi = kvm-irq_routing-chip[irqchip][pin]; + rcu_read_lock(); + gsi = rcu_dereference(kvm-irq_routing)-chip[irqchip][pin]; + rcu_read_unlock(); + if (gsi != -1) hlist_for_each_entry(kian, n, kvm-arch.irq_ack_notifier_list, link) @@ -277,9 +282,9 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) void kvm_free_irq_routing(struct kvm *kvm) { - mutex_lock(kvm-irq_lock); + /* Called only during vm destruction. Nobody can use the pointer + at this stage */ kfree(kvm-irq_routing); - mutex_unlock(kvm-irq_lock); } static int setup_routing_entry(struct kvm_irq_routing_table *rt, @@ -385,8 +390,9 @@ int kvm_set_irq_routing(struct kvm *kvm, mutex_lock(kvm-irq_lock); old = kvm-irq_routing; - kvm-irq_routing = new; + rcu_assign_pointer(kvm-irq_routing, new); mutex_unlock(kvm-irq_lock); + synchronize_rcu(); new = old; r = 0; -- 1.6.3.3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 7/9] Convert irq notifiers lists to RCU locking.
Use RCU locking for mask/ack notifiers lists. Signed-off-by: Gleb Natapov g...@redhat.com --- virt/kvm/irq_comm.c | 22 -- 1 files changed, 12 insertions(+), 10 deletions(-) diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index d7393d6..71a5a43 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -183,19 +183,19 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) rcu_read_lock(); gsi = rcu_dereference(kvm-irq_routing)-chip[irqchip][pin]; - rcu_read_unlock(); - if (gsi != -1) - hlist_for_each_entry(kian, n, kvm-irq_ack_notifier_list, link) + hlist_for_each_entry_rcu(kian, n, kvm-irq_ack_notifier_list, +link) if (kian-gsi == gsi) kian-irq_acked(kian); + rcu_read_unlock(); } void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian) { mutex_lock(kvm-irq_lock); - hlist_add_head(kian-link, kvm-irq_ack_notifier_list); + hlist_add_head_rcu(kian-link, kvm-irq_ack_notifier_list); mutex_unlock(kvm-irq_lock); } @@ -203,8 +203,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian) { mutex_lock(kvm-irq_lock); - hlist_del_init(kian-link); + hlist_del_init_rcu(kian-link); mutex_unlock(kvm-irq_lock); + synchronize_rcu(); } int kvm_request_irq_source_id(struct kvm *kvm) @@ -255,7 +256,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, { mutex_lock(kvm-irq_lock); kimn-irq = irq; - hlist_add_head(kimn-link, kvm-mask_notifier_list); + hlist_add_head_rcu(kimn-link, kvm-mask_notifier_list); mutex_unlock(kvm-irq_lock); } @@ -263,8 +264,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, struct kvm_irq_mask_notifier *kimn) { mutex_lock(kvm-irq_lock); - hlist_del(kimn-link); + hlist_del_rcu(kimn-link); mutex_unlock(kvm-irq_lock); + synchronize_rcu(); } void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) @@ -272,11 +274,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) struct kvm_irq_mask_notifier *kimn; struct hlist_node *n; - WARN_ON(!mutex_is_locked(kvm-irq_lock)); - - hlist_for_each_entry(kimn, n, kvm-mask_notifier_list, link) + rcu_read_lock(); + hlist_for_each_entry_rcu(kimn, n, kvm-mask_notifier_list, link) if (kimn-irq == irq) kimn-func(kimn, mask); + rcu_read_unlock(); } void kvm_free_irq_routing(struct kvm *kvm) -- 1.6.3.3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 6/9] Move irq ack notifier list to arch independent code.
Mask irq notifier list is already there. Signed-off-by: Gleb Natapov g...@redhat.com --- arch/ia64/include/asm/kvm_host.h |1 - arch/x86/include/asm/kvm_host.h |1 - include/linux/kvm_host.h |1 + virt/kvm/irq_comm.c |5 ++--- virt/kvm/kvm_main.c |1 + 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index d9b6325..a362e67 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -475,7 +475,6 @@ struct kvm_arch { struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; int iommu_flags; - struct hlist_head irq_ack_notifier_list; unsigned long irq_sources_bitmap; unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6b02f86..ee13379 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -400,7 +400,6 @@ struct kvm_arch{ struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; - struct hlist_head irq_ack_notifier_list; int vapics_in_nmi_mode; unsigned int tss_addr; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 00e4762..75cf6ee 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -180,6 +180,7 @@ struct kvm { #ifdef CONFIG_HAVE_KVM_IRQCHIP struct kvm_irq_routing_table *irq_routing; struct hlist_head mask_notifier_list; + struct hlist_head irq_ack_notifier_list; #endif #ifdef KVM_ARCH_WANT_MMU_NOTIFIER diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 8350050..d7393d6 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -186,8 +186,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) rcu_read_unlock(); if (gsi != -1) - hlist_for_each_entry(kian, n, kvm-arch.irq_ack_notifier_list, -link) + hlist_for_each_entry(kian, n, kvm-irq_ack_notifier_list, link) if (kian-gsi == gsi) kian-irq_acked(kian); } @@ -196,7 +195,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian) { mutex_lock(kvm-irq_lock); - hlist_add_head(kian-link, kvm-arch.irq_ack_notifier_list); + hlist_add_head(kian-link, kvm-irq_ack_notifier_list); mutex_unlock(kvm-irq_lock); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 50cc001..783fa7c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -945,6 +945,7 @@ static struct kvm *kvm_create_vm(void) goto out; #ifdef CONFIG_HAVE_KVM_IRQCHIP INIT_HLIST_HEAD(kvm-mask_notifier_list); + INIT_HLIST_HEAD(kvm-irq_ack_notifier_list); #endif #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET -- 1.6.3.3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 8/9] Move IO APIC to its own lock.
Signed-off-by: Gleb Natapov g...@redhat.com --- arch/ia64/kvm/kvm-ia64.c |7 +--- arch/x86/kvm/i8259.c | 22 +--- arch/x86/kvm/lapic.c |5 +-- arch/x86/kvm/x86.c | 10 + virt/kvm/ioapic.c| 80 +++--- virt/kvm/ioapic.h|4 ++ virt/kvm/irq_comm.c | 23 - 7 files changed, 100 insertions(+), 51 deletions(-) diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 0ad09f0..4a98314 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -851,8 +851,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, r = 0; switch (chip-chip_id) { case KVM_IRQCHIP_IOAPIC: - memcpy(chip-chip.ioapic, ioapic_irqchip(kvm), - sizeof(struct kvm_ioapic_state)); + r = kvm_get_ioapic(kvm, chip-chip.ioapic); break; default: r = -EINVAL; @@ -868,9 +867,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) r = 0; switch (chip-chip_id) { case KVM_IRQCHIP_IOAPIC: - memcpy(ioapic_irqchip(kvm), - chip-chip.ioapic, - sizeof(struct kvm_ioapic_state)); + r = kvm_set_ioapic(kvm, chip-chip.ioapic); break; default: r = -EINVAL; diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index ccc941a..d057c0c 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -38,7 +38,15 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) s-isr_ack |= (1 irq); if (s != s-pics_state-pics[0]) irq += 8; + /* +* We are dropping lock while calling ack notifiers since ack +* notifier callbacks for assigned devices call into PIC recursively. +* Other interrupt may be delivered to PIC while lock is dropped but +* it should be safe since PIC state is already updated at this stage. +*/ + spin_unlock(s-pics_state-lock); kvm_notify_acked_irq(s-pics_state-kvm, SELECT_PIC(irq), irq); + spin_lock(s-pics_state-lock); } void kvm_pic_clear_isr_ack(struct kvm *kvm) @@ -176,16 +184,18 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) static inline void pic_intack(struct kvm_kpic_state *s, int irq) { s-isr |= 1 irq; - if (s-auto_eoi) { - if (s-rotate_on_auto_eoi) - s-priority_add = (irq + 1) 7; - pic_clear_isr(s, irq); - } /* * We don't clear a level sensitive interrupt here */ if (!(s-elcr (1 irq))) s-irr = ~(1 irq); + + if (s-auto_eoi) { + if (s-rotate_on_auto_eoi) + s-priority_add = (irq + 1) 7; + pic_clear_isr(s, irq); + } + } int kvm_pic_read_irq(struct kvm *kvm) @@ -294,9 +304,9 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) priority = get_priority(s, s-isr); if (priority != 8) { irq = (priority + s-priority_add) 7; - pic_clear_isr(s, irq); if (cmd == 5) s-priority_add = (irq + 1) 7; + pic_clear_isr(s, irq); pic_update_irq(s-pics_state); } break; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ce195f8..f24d4d0 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -471,11 +471,8 @@ static void apic_set_eoi(struct kvm_lapic *apic) trigger_mode = IOAPIC_LEVEL_TRIG; else trigger_mode = IOAPIC_EDGE_TRIG; - if (!(apic_get_reg(apic, APIC_SPIV) APIC_SPIV_DIRECTED_EOI)) { - mutex_lock(apic-vcpu-kvm-irq_lock); + if (!(apic_get_reg(apic, APIC_SPIV) APIC_SPIV_DIRECTED_EOI)) kvm_ioapic_update_eoi(apic-vcpu-kvm, vector, trigger_mode); - mutex_unlock(apic-vcpu-kvm-irq_lock); - } } static void apic_send_ipi(struct kvm_lapic *apic) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0f22f72..f49b2a1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2023,9 +2023,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) sizeof(struct kvm_pic_state)); break; case KVM_IRQCHIP_IOAPIC: - memcpy(chip-chip.ioapic, - ioapic_irqchip(kvm), - sizeof(struct kvm_ioapic_state)); + r = kvm_get_ioapic(kvm, chip-chip.ioapic); break; default:
[PATCH v4 9/9] Drop kvm-irq_lock lock from irq injection path.
The only thing it protects now is interrupt injection into lapic and this can work lockless. Even now with kvm-irq_lock in place access to lapic is not entirely serialized since vcpu access doesn't take kvm-irq_lock. Signed-off-by: Gleb Natapov g...@redhat.com --- arch/ia64/kvm/kvm-ia64.c |2 -- arch/x86/kvm/i8254.c |2 -- arch/x86/kvm/lapic.c |2 -- arch/x86/kvm/x86.c |2 -- virt/kvm/eventfd.c |2 -- virt/kvm/irq_comm.c |6 +- virt/kvm/kvm_main.c |2 -- 7 files changed, 1 insertions(+), 17 deletions(-) diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 4a98314..f534e0f 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -982,10 +982,8 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; if (irqchip_in_kernel(kvm)) { __s32 status; - mutex_lock(kvm-irq_lock); status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irq_event.irq, irq_event.level); - mutex_unlock(kvm-irq_lock); if (ioctl == KVM_IRQ_LINE_STATUS) { irq_event.status = status; if (copy_to_user(argp, irq_event, diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 82ad523..b857ca3 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -688,10 +688,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm) struct kvm_vcpu *vcpu; int i; - mutex_lock(kvm-irq_lock); kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 1); kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 0); - mutex_unlock(kvm-irq_lock); /* * Provides NMI watchdog support via Virtual Wire mode. diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index f24d4d0..e41e948 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -501,9 +501,7 @@ static void apic_send_ipi(struct kvm_lapic *apic) irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, irq.vector); - mutex_lock(apic-vcpu-kvm-irq_lock); kvm_irq_delivery_to_apic(apic-vcpu-kvm, apic, irq); - mutex_unlock(apic-vcpu-kvm-irq_lock); } static u32 apic_get_tmcct(struct kvm_lapic *apic) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f49b2a1..fe68745 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2268,10 +2268,8 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; if (irqchip_in_kernel(kvm)) { __s32 status; - mutex_lock(kvm-irq_lock); status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irq_event.irq, irq_event.level); - mutex_unlock(kvm-irq_lock); if (ioctl == KVM_IRQ_LINE_STATUS) { irq_event.status = status; if (copy_to_user(argp, irq_event, diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 99017e8..95954ad 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -61,10 +61,8 @@ irqfd_inject(struct work_struct *work) struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); struct kvm *kvm = irqfd-kvm; - mutex_lock(kvm-irq_lock); kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 1); kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0); - mutex_unlock(kvm-irq_lock); } /* diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 036a431..47c0194 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -82,8 +82,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, int i, r = -1; struct kvm_vcpu *vcpu, *lowest = NULL; - WARN_ON(!mutex_is_locked(kvm-irq_lock)); - if (irq-dest_mode == 0 irq-dest_id == 0xff kvm_is_dm_lowest_prio(irq)) printk(KERN_INFO kvm: apic: phys broadcast and lowest prio\n); @@ -138,7 +136,7 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return kvm_irq_delivery_to_apic(kvm, NULL, irq); } -/* This should be called with the kvm-irq_lock mutex held +/* * Return value: * 0 Interrupt was ignored (masked or not delivered for other reasons) * = 0 Interrupt was coalesced (previous irq is still pending) @@ -153,8 +151,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) trace_kvm_set_irq(irq, level, irq_source_id); - WARN_ON(!mutex_is_locked(kvm-irq_lock)); - /* Not possible to detect if the guest uses the PIC or the * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. diff --git a/virt/kvm/kvm_main.c
[PATCH] VMX: Return to userspace on invalid state emulation failure
Return to userspace instead of repeatedly trying to emulate instructions that have already failed Signed-off-by: Mohammed Gamal m.gamal...@gmail.com --- arch/x86/kvm/vmx.c |6 +- 1 files changed, 5 insertions(+), 1 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1ee811c..423e44f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3341,6 +3341,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, if (err != EMULATE_DONE) { kvm_report_emulation_failure(vcpu, emulation failure); + kvm_run-exit_reason = KVM_EXIT_INTERNAL_ERROR; + kvm_run-internal.suberror = KVM_INTERNAL_ERROR_EMULATION; break; } @@ -3612,7 +3614,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vmx-entry_time = ktime_get(); /* Handle invalid guest state instead of entering VMX */ - if (vmx-emulation_required emulate_invalid_guest_state) { + if (vmx-emulation_required emulate_invalid_guest_state +!(kvm_run-exit_reason == KVM_EXIT_INTERNAL_ERROR + kvm_run-internal.suberror == KVM_INTERNAL_ERROR_EMULATION)) { handle_invalid_guest_state(vcpu, kvm_run); return; } -- 1.6.0.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH][RFC] Use return value from kvm_set_irq() to re-inject PIT interrupts.
Use return value from kvm_set_irq() to track coalesced PIT interrupts instead of ack/mask notifiers. Signed-off-by: Gleb Natapov g...@redhat.com diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index b857ca3..0b63991 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -231,20 +231,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu) { struct kvm_pit *pit = vcpu-kvm-arch.vpit; - if (pit kvm_vcpu_is_bsp(vcpu) pit-pit_state.irq_ack) - return atomic_read(pit-pit_state.pit_timer.pending); - return 0; -} - -static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) -{ - struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, -irq_ack_notifier); - spin_lock(ps-inject_lock); - if (atomic_dec_return(ps-pit_timer.pending) 0) - atomic_inc(ps-pit_timer.pending); - ps-irq_ack = 1; - spin_unlock(ps-inject_lock); + return atomic_read(pit-pit_state.pit_timer.pending); } void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) @@ -297,7 +284,6 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) pt-vcpu = pt-kvm-bsp_vcpu; atomic_set(pt-pending, 0); - ps-irq_ack = 1; hrtimer_start(pt-timer, ktime_add_ns(ktime_get(), interval), HRTIMER_MODE_ABS); @@ -577,17 +563,6 @@ void kvm_pit_reset(struct kvm_pit *pit) mutex_unlock(pit-pit_state.lock); atomic_set(pit-pit_state.pit_timer.pending, 0); - pit-pit_state.irq_ack = 1; -} - -static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask) -{ - struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier); - - if (!mask) { - atomic_set(pit-pit_state.pit_timer.pending, 0); - pit-pit_state.irq_ack = 1; - } } static const struct kvm_io_device_ops pit_dev_ops = { @@ -619,7 +594,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) mutex_init(pit-pit_state.lock); mutex_lock(pit-pit_state.lock); - spin_lock_init(pit-pit_state.inject_lock); kvm-arch.vpit = pit; pit-kvm = kvm; @@ -628,17 +602,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) pit_state-pit = pit; hrtimer_init(pit_state-pit_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - pit_state-irq_ack_notifier.gsi = 0; - pit_state-irq_ack_notifier.irq_acked = kvm_pit_ack_irq; - kvm_register_irq_ack_notifier(kvm, pit_state-irq_ack_notifier); pit_state-pit_timer.reinject = true; mutex_unlock(pit-pit_state.lock); kvm_pit_reset(pit); - pit-mask_notifier.func = pit_mask_notifer; - kvm_register_irq_mask_notifier(kvm, 0, pit-mask_notifier); - kvm_iodevice_init(pit-dev, pit_dev_ops); ret = __kvm_io_bus_register_dev(kvm-pio_bus, pit-dev); if (ret 0) @@ -670,10 +638,6 @@ void kvm_free_pit(struct kvm *kvm) struct hrtimer *timer; if (kvm-arch.vpit) { - kvm_unregister_irq_mask_notifier(kvm, 0, - kvm-arch.vpit-mask_notifier); - kvm_unregister_irq_ack_notifier(kvm, - kvm-arch.vpit-pit_state.irq_ack_notifier); mutex_lock(kvm-arch.vpit-pit_state.lock); timer = kvm-arch.vpit-pit_state.pit_timer.timer; hrtimer_cancel(timer); @@ -683,12 +647,12 @@ void kvm_free_pit(struct kvm *kvm) } } -static void __inject_pit_timer_intr(struct kvm *kvm) +static int __inject_pit_timer_intr(struct kvm *kvm) { struct kvm_vcpu *vcpu; - int i; + int i, r; - kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 1); + r = kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 1); kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 0); /* @@ -703,6 +667,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm) if (kvm-arch.vapics_in_nmi_mode 0) kvm_for_each_vcpu(i, vcpu, kvm) kvm_apic_nmi_wd_deliver(vcpu); + + return r; } void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) @@ -711,20 +677,14 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) struct kvm *kvm = vcpu-kvm; struct kvm_kpit_state *ps; - if (pit) { - int inject = 0; - ps = pit-pit_state; - - /* Try to inject pending interrupts when -* last one has been acked. -*/ - spin_lock(ps-inject_lock); - if (atomic_read(ps-pit_timer.pending) ps-irq_ack) { - ps-irq_ack = 0; - inject = 1; - } - spin_unlock(ps-inject_lock); - if (inject) - __inject_pit_timer_intr(kvm); - } +
Re: [PATCH] Fix sysenter migration issue on AMD CPUs
Andre Przywara wrote: Stephane, Thomas: Can you verify this? I'm not very familiar with compiling kvm-mod from git sources. And your patch does not apply to svm.c shipped with kernel 2.6.30.5 So at the moment I have no clue, how to verify. Is there any short howto out there, how to get kvm module from git source? Regards Thomas -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Fix sysenter migration issue on AMD CPUs
Thomas Besser wrote: Andre Przywara wrote: Stephane, Thomas: Can you verify this? I'm not very familiar with compiling kvm-mod from git sources. And your patch does not apply to svm.c shipped with kernel 2.6.30.5 You shouldn't have seen any problems with 2.6.30.5, since the code in question (sysenter/syscall emulation) is not in here. So at the moment I have no clue, how to verify. Is there any short howto out there, how to get kvm module from git source? You can use the attached patch, which applies against kvm-kmod-devel-88.tar.gz If that does not help, tell me with what tree or tarball you are usually generating the KVM kernel modules. I use Avi's latest git tree, which is regularly synced with 2.6.31.rcx. The patch from Friday should apply against this one. $ git clone git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm.git You will have to build your whole kernel with this tree, if you build only the modules from here they will certainly mismatch your running kernel. Regards, Andre. -- Andre Przywara AMD-Operating System Research Center (OSRC), Dresden, Germany Tel: +49 351 448 3567 12 to satisfy European Law for business letters: Advanced Micro Devices GmbH Karl-Hammerschmidt-Str. 34, 85609 Dornach b. Muenchen Geschaeftsfuehrer: Thomas M. McCoy; Giuliano Meroni Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen Registergericht Muenchen, HRB Nr. 43632 diff --git a/x86/svm.c b/x86/svm.c index fb29061..75d18bf 100644 --- a/x86/svm.c +++ b/x86/svm.c @@ -117,7 +117,6 @@ struct vcpu_svm { unsigned long vmcb_pa; struct svm_cpu_data *svm_data; uint64_t asid_generation; - uint64_t sysenter_cs; uint64_t sysenter_esp; uint64_t sysenter_eip; @@ -436,8 +435,6 @@ static void svm_vcpu_init_msrpm(u32 *msrpm) #endif set_msr_interception(msrpm, MSR_K6_STAR, 1, 1); set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1); - set_msr_interception(msrpm, MSR_IA32_SYSENTER_ESP, 1, 1); - set_msr_interception(msrpm, MSR_IA32_SYSENTER_EIP, 1, 1); } static void svm_enable_lbrv(struct vcpu_svm *svm) @@ -2062,7 +2059,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) break; #endif case MSR_IA32_SYSENTER_CS: - *data = svm-sysenter_cs; + *data = svm-vmcb-save.sysenter_cs; break; case MSR_IA32_SYSENTER_EIP: *data = svm-sysenter_eip; @@ -2151,13 +2148,15 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) break; #endif case MSR_IA32_SYSENTER_CS: - svm-sysenter_cs = data; + svm-vmcb-save.sysenter_cs = data; break; case MSR_IA32_SYSENTER_EIP: svm-sysenter_eip = data; + svm-vmcb-save.sysenter_eip = data; break; case MSR_IA32_SYSENTER_ESP: svm-sysenter_esp = data; + svm-vmcb-save.sysenter_esp = data; break; case MSR_IA32_DEBUGCTLMSR: if (!svm_has(SVM_FEATURE_LBRV)) {
Re: [PATCH] Fix sysenter migration issue on AMD CPUs
Andre Przywara wrote: Thomas Besser wrote: Andre Przywara wrote: Stephane, Thomas: Can you verify this? I'm not very familiar with compiling kvm-mod from git sources. And your patch does not apply to svm.c shipped with kernel 2.6.30.5 You shouldn't have seen any problems with 2.6.30.5, since the code in question (sysenter/syscall emulation) is not in here. Both hosts running 2.6.30.5 with kvm as module from kernel source. So I should have another problem with live migration and qemu-kvm (0.10.6). Probably this http://article.gmane.org/gmane.comp.emulators.kvm.devel/39185 Thanx Thomas -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[no subject]
subscribe kvm Veja quais são os assuntos do momento no Yahoo! +Buscados http://br.maisbuscados.yahoo.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: qemu-kvm segfaults in qemu_del_timer (0.10.5 and 0.10.6)
Chris Webb ch...@arachsys.com writes: With the following applied, VNC connections and disconnections still work correctly, so it doesn't horribly break anything, but I can't immediately confirm whether it will cure the rare segfaults as I haven't yet found a rapid way of reproducing the crashes other than by waiting for one. Just to follow up on this: the backported patch has cured the vast majority of VNC crashes we've been seeing on 0.10.6, although I've still seen this earlier today: Core was generated by `qemu-kvm -m 512 -smp 1 -uuid d6f2cb13-7421-4baa-a978-eda9bec9d075 -pidfile /var'. Program terminated with signal 11, Segmentation fault. [New process 16847] [New process 16855] (gdb) bt #0 0x7fe42e9c6cb1 in memcpy () from /lib/libc.so.6 #1 0x004917e4 in vnc_write (vs=0x31a7f50, data=0x7fffe3a19230, len=2) at vnc.c:323 #2 0x004919bf in vnc_write_u16 (vs=0x7fe2f8cae023, value=value optimized out) at vnc.c:1035 #3 0x00491bf3 in vnc_framebuffer_update (vs=0x7fe2f8cae023, x=-475950544, y=2, w=16385, h=1, encoding=6) at vnc.c:286 #4 0x00496660 in send_framebuffer_update (vs=0x7fe2f8cae023, x=-475950544, y=196, w=208, h=1) at vnc.c:598 #5 0x00496f65 in vnc_update_client (opaque=value optimized out) at vnc.c:754 #6 0x0040822a in main_loop_wait (timeout=value optimized out) at /packages/qemu-kvm+vncfix/src-nUlCId/vl.c:1240 #7 0x0051753a in kvm_main_loop () at /packages/qemu-kvm+vncfix/src-nUlCId/qemu-kvm.c:596 #8 0x0040c8a5 in main (argc=value optimized out, argv=value optimized out, envp=value optimized out) at /packages/qemu-kvm+vncfix/src-nUlCId/vl.c:3850 (gdb) f 1 #1 0x004917e4 in vnc_write (vs=0x31a7f50, data=0x7fffe3a19230, len=2) at vnc.c:323 323 memcpy(buffer-buffer + buffer-offset, data, len); (gdb) f 1 #1 0x004917e4 in vnc_write (vs=0x31a7f50, data=0x7fffe3a19230, len=2) at vnc.c:323 323 memcpy(buffer-buffer + buffer-offset, data, len); (gdb) p *vs $1 = {timer = 0x2b90b20, csock = 18, ds = 0x28a1a20, vd = 0x28b0fc0, need_update = 1, dirty_row = {{0, 0, 0, 0} repeats 197 times, {65535, 262128, 0, 0}, {4294967295, 1, 0, 0}, {4294967288, 262143, 0, 0}, {4294443008, 262143, 0, 0}, {131071, 262128, 0, 0}, {4294967295, 1, 0, 0}, {4294967292, 262143, 0, 0}, {4294443008, 262143, 0, 0}, {131071, 262136, 0, 0}, {4294967295, 1, 0, 0}, {4294967292, 262143, 0, 0}, {4294443008, 262143, 0, 0}, { 131071, 262136, 0, 0}, {4294967295, 1, 0, 0}, {4294967292, 262143, 0, 0}, {4294705152, 262143, 0, 0}, {131071, 262136, 0, 0}, {4294967295, 1, 0, 0}, {4294967294, 262143, 0, 0}, {4294705152, 262143, 0, 0}, {131071, 262140, 0, 0}, {4294967295, 1, 0, 0}, {4294967294, 262143, 0, 0}, {4294836224, 262143, 0, 0}, {131071, 262140, 0, 0}, { 4294967295, 1, 0, 0}, {4294967294, 262143, 0, 0}, {4294836224, 262143, 0, 0}, {131071, 262140, 0, 0}, { 4294967295, 1, 0, 0}, {4294967295, 262143, 0, 0}, {4294836224, 262143, 0, 0}, {131071, 262142, 0, 0}, { 4294967295, 1, 0, 0}, {4294967295, 262143, 0, 0}, {4294901760, 262143, 0, 0}, {131071, 262142, 0, 0}, { 4294967295, 1, 0, 0}, {4294967295, 262143, 0, 0}, {4294901760, 262143, 0, 0}, {131071, 262142, 0, 0}, { 4294967295, 131073, 0, 0}, {4294967295, 262143, 0, 0}, {4294901760, 262143, 0, 0}, {131071, 262143, 0, 0}, { 4294967295, 131073, 0, 0}, {4294967295, 262143, 0, 0}, {4294934528, 262143, 0, 0}, {131071, 262143, 0, 0}, { 4294967295, 131075, 0, 0}, {4294967295, 262143, 0, 0}, {4294934528, 262143, 0, 0}, {131071, 262143, 0, 0}, { 4294967295, 196611, 0, 0}, {4294967295, 262143, 0, 0}, {4294934528, 262143, 0, 0}, {2147614719, 262143, 0, 0}, { 4294967295, 196611, 0, 0}, {4294967295, 262143, 0, 0}, {4294950912, 262143, 0, 0}, {2147614719, 262143, 0, 0}, { 4294967295, 196611, 0, 0}, {4294967295, 262143, 0, 0}, {4294950912, 262143, 0, 0}, {2147614719, 262143, 0, 0}, { 4294967295, 229379, 0, 0}, {4294967295, 262143, 0, 0}, {4294950912, 262143, 0, 0}, {3221356543, 262143, 0, 0}, { 4294967295, 229379, 0, 0}, {4294967295, 262143, 0, 0}, {4294950912, 262143, 0, 0}, {3221356543, 262143, 0, 0}, { 4294967295, 229377, 0, 0}, {4294967295, 262143, 0, 0}, {4294959104, 262143, 0, 0}, {3221356543, 262143, 0, 0}, { 4294967295, 245761, 0, 0}, {4294967295, 262143, 0, 0}, {4294959104, 262143, 0, 0}, {3758227455, 262143, 0, 0}, { 4294967295, 245761, 0, 0}, {4294967295, 262143, 0, 0}, {4294959104, 262143, 0, 0}, {3758227455, 262143, 0, 0}, { 4294967295, 245761, 0, 0}, {4294967295, 262143, 0, 0}, {4294963200, 262143, 0, 0}, {3758227455, 262143, 0, 0}, { 4294967295, 253953, 0, 0}, {4294967295, 262143, 0, 0}, {4294963200, 262143, 0, 0}, {4026662911, 262143, 0, 0}, { 4294967295, 253953, 0, 0}, {4294967295, 262143, 0, 0}, {4294963200, 262143, 0, 0}, {4026662911, 262143, 0, 0}, { 4294967295, 253953, 0, 0},
Re: [PATCH][RFC] Use return value from kvm_set_irq() to re-inject PIT interrupts.
On Mon, Aug 24, 2009 at 03:06:23PM +0300, Gleb Natapov wrote: Use return value from kvm_set_irq() to track coalesced PIT interrupts instead of ack/mask notifiers. Gleb, What is the advantage of doing so? Ack notifiers are asynchronous notifications. Using the return value from kvm_set_irq implies that timer emulation is based on a tick generating device on the host side. What I mean is that the ack notifications are useful, since they are asynchronous. Supposing your goal is to get rid of ack notifiers, due to their burden in irqchip code? Signed-off-by: Gleb Natapov g...@redhat.com diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index b857ca3..0b63991 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -231,20 +231,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu) { struct kvm_pit *pit = vcpu-kvm-arch.vpit; - if (pit kvm_vcpu_is_bsp(vcpu) pit-pit_state.irq_ack) - return atomic_read(pit-pit_state.pit_timer.pending); - return 0; -} - -static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) -{ - struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, - irq_ack_notifier); - spin_lock(ps-inject_lock); - if (atomic_dec_return(ps-pit_timer.pending) 0) - atomic_inc(ps-pit_timer.pending); - ps-irq_ack = 1; - spin_unlock(ps-inject_lock); + return atomic_read(pit-pit_state.pit_timer.pending); } void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) @@ -297,7 +284,6 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) pt-vcpu = pt-kvm-bsp_vcpu; atomic_set(pt-pending, 0); - ps-irq_ack = 1; hrtimer_start(pt-timer, ktime_add_ns(ktime_get(), interval), HRTIMER_MODE_ABS); @@ -577,17 +563,6 @@ void kvm_pit_reset(struct kvm_pit *pit) mutex_unlock(pit-pit_state.lock); atomic_set(pit-pit_state.pit_timer.pending, 0); - pit-pit_state.irq_ack = 1; -} - -static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask) -{ - struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier); - - if (!mask) { - atomic_set(pit-pit_state.pit_timer.pending, 0); - pit-pit_state.irq_ack = 1; - } } static const struct kvm_io_device_ops pit_dev_ops = { @@ -619,7 +594,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) mutex_init(pit-pit_state.lock); mutex_lock(pit-pit_state.lock); - spin_lock_init(pit-pit_state.inject_lock); kvm-arch.vpit = pit; pit-kvm = kvm; @@ -628,17 +602,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) pit_state-pit = pit; hrtimer_init(pit_state-pit_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - pit_state-irq_ack_notifier.gsi = 0; - pit_state-irq_ack_notifier.irq_acked = kvm_pit_ack_irq; - kvm_register_irq_ack_notifier(kvm, pit_state-irq_ack_notifier); pit_state-pit_timer.reinject = true; mutex_unlock(pit-pit_state.lock); kvm_pit_reset(pit); - pit-mask_notifier.func = pit_mask_notifer; - kvm_register_irq_mask_notifier(kvm, 0, pit-mask_notifier); - kvm_iodevice_init(pit-dev, pit_dev_ops); ret = __kvm_io_bus_register_dev(kvm-pio_bus, pit-dev); if (ret 0) @@ -670,10 +638,6 @@ void kvm_free_pit(struct kvm *kvm) struct hrtimer *timer; if (kvm-arch.vpit) { - kvm_unregister_irq_mask_notifier(kvm, 0, -kvm-arch.vpit-mask_notifier); - kvm_unregister_irq_ack_notifier(kvm, - kvm-arch.vpit-pit_state.irq_ack_notifier); mutex_lock(kvm-arch.vpit-pit_state.lock); timer = kvm-arch.vpit-pit_state.pit_timer.timer; hrtimer_cancel(timer); @@ -683,12 +647,12 @@ void kvm_free_pit(struct kvm *kvm) } } -static void __inject_pit_timer_intr(struct kvm *kvm) +static int __inject_pit_timer_intr(struct kvm *kvm) { struct kvm_vcpu *vcpu; - int i; + int i, r; - kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 1); + r = kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 1); kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 0); /* @@ -703,6 +667,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm) if (kvm-arch.vapics_in_nmi_mode 0) kvm_for_each_vcpu(i, vcpu, kvm) kvm_apic_nmi_wd_deliver(vcpu); + + return r; } void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) @@ -711,20 +677,14 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) struct kvm *kvm = vcpu-kvm; struct kvm_kpit_state *ps; - if (pit) { - int inject = 0; - ps = pit-pit_state; - - /* Try to
Re: [PATCH][RFC] Use return value from kvm_set_irq() to re-inject PIT interrupts.
On Mon, Aug 24, 2009 at 01:32:56PM -0300, Marcelo Tosatti wrote: On Mon, Aug 24, 2009 at 03:06:23PM +0300, Gleb Natapov wrote: Use return value from kvm_set_irq() to track coalesced PIT interrupts instead of ack/mask notifiers. Gleb, What is the advantage of doing so? Current code very fragile and relies on hacks to work. Lets take calling of ack notifiers on pic reset as an example. Why is it needed? It is obviously wrong thing to do from assigned devices POV. Why ioapic calls mask notifiers but pic doesn't? Besides diffstat for the patch shows: 2 files changed, 16 insertions(+), 59 deletions(-) 43 lines less for the same functionality. Looks like clear win to me. Ack notifiers are asynchronous notifications. Using the return value from kvm_set_irq implies that timer emulation is based on a tick generating device on the host side. No notification is needed in the first place. You know immediately if injection fails or not. I don't see why using return value from kvm_set_irq implies that timer emulation is based on a tick generating device on the host side? What can you do with ack notifiers that can't be done without? What I mean is that the ack notifications are useful, since they are asynchronous. What I mean is that no notification is needed at all since result is known immediately. Supposing your goal is to get rid of ack notifiers, due to their burden in irqchip code? Unfortunately to get rid of ack notifiers we need to get rid of assigned devices. I will gladly do that, but I doubt Avi shares my enthusiasm. The patch to remove mask notification already sits in my patch queue though. Signed-off-by: Gleb Natapov g...@redhat.com diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index b857ca3..0b63991 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -231,20 +231,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu) { struct kvm_pit *pit = vcpu-kvm-arch.vpit; - if (pit kvm_vcpu_is_bsp(vcpu) pit-pit_state.irq_ack) - return atomic_read(pit-pit_state.pit_timer.pending); - return 0; -} - -static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) -{ - struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, -irq_ack_notifier); - spin_lock(ps-inject_lock); - if (atomic_dec_return(ps-pit_timer.pending) 0) - atomic_inc(ps-pit_timer.pending); - ps-irq_ack = 1; - spin_unlock(ps-inject_lock); + return atomic_read(pit-pit_state.pit_timer.pending); } void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) @@ -297,7 +284,6 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) pt-vcpu = pt-kvm-bsp_vcpu; atomic_set(pt-pending, 0); - ps-irq_ack = 1; hrtimer_start(pt-timer, ktime_add_ns(ktime_get(), interval), HRTIMER_MODE_ABS); @@ -577,17 +563,6 @@ void kvm_pit_reset(struct kvm_pit *pit) mutex_unlock(pit-pit_state.lock); atomic_set(pit-pit_state.pit_timer.pending, 0); - pit-pit_state.irq_ack = 1; -} - -static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask) -{ - struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier); - - if (!mask) { - atomic_set(pit-pit_state.pit_timer.pending, 0); - pit-pit_state.irq_ack = 1; - } } static const struct kvm_io_device_ops pit_dev_ops = { @@ -619,7 +594,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) mutex_init(pit-pit_state.lock); mutex_lock(pit-pit_state.lock); - spin_lock_init(pit-pit_state.inject_lock); kvm-arch.vpit = pit; pit-kvm = kvm; @@ -628,17 +602,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) pit_state-pit = pit; hrtimer_init(pit_state-pit_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - pit_state-irq_ack_notifier.gsi = 0; - pit_state-irq_ack_notifier.irq_acked = kvm_pit_ack_irq; - kvm_register_irq_ack_notifier(kvm, pit_state-irq_ack_notifier); pit_state-pit_timer.reinject = true; mutex_unlock(pit-pit_state.lock); kvm_pit_reset(pit); - pit-mask_notifier.func = pit_mask_notifer; - kvm_register_irq_mask_notifier(kvm, 0, pit-mask_notifier); - kvm_iodevice_init(pit-dev, pit_dev_ops); ret = __kvm_io_bus_register_dev(kvm-pio_bus, pit-dev); if (ret 0) @@ -670,10 +638,6 @@ void kvm_free_pit(struct kvm *kvm) struct hrtimer *timer; if (kvm-arch.vpit) { - kvm_unregister_irq_mask_notifier(kvm, 0, - kvm-arch.vpit-mask_notifier); - kvm_unregister_irq_ack_notifier(kvm, - kvm-arch.vpit-pit_state.irq_ack_notifier); mutex_lock(kvm-arch.vpit-pit_state.lock); timer =
Re: [PATCH][RFC] Use return value from kvm_set_irq() to re-inject PIT interrupts.
On Mon, Aug 24, 2009 at 08:16:46PM +0300, Gleb Natapov wrote: On Mon, Aug 24, 2009 at 01:32:56PM -0300, Marcelo Tosatti wrote: On Mon, Aug 24, 2009 at 03:06:23PM +0300, Gleb Natapov wrote: Use return value from kvm_set_irq() to track coalesced PIT interrupts instead of ack/mask notifiers. Gleb, What is the advantage of doing so? Current code very fragile and relies on hacks to work. Lets take calling of ack notifiers on pic reset as an example. Why is it needed? To signal the ack notifiers users that, in case of reset with pending IRR, the given interrupt has been acked (its an artificial ack event). Is there a need to differentiate between actual interrupt ack and reset with pending IRR? At the time this code was written, there was no indication that differentation would be necessary. It is obviously wrong thing to do from assigned devices POV. Thats not entirely clear to me. So what happens if a guest with PIC assigned device resets with a pending IRR? The host interrupt line will be kept disabled, even though the guest is able to process further interrupts? Why ioapic calls mask notifiers but pic doesn't? Because it is not implemented. Besides diffstat for the patch shows: 2 files changed, 16 insertions(+), 59 deletions(-) 43 lines less for the same functionality. Looks like clear win to me. Ack notifiers are asynchronous notifications. Using the return value from kvm_set_irq implies that timer emulation is based on a tick generating device on the host side. No notification is needed in the first place. You know immediately if injection fails or not. I don't see why using return value from kvm_set_irq implies that timer emulation is based on a tick generating device on the host side? What can you do with ack notifiers that can't be done without? If you don't have a host timer emulating the guest PIT, to periodically bang on kvm_set_irq, how do you know when to attempt reinjection? You keep calling kvm_set_irq on every guest entry to figure out when reinjection is possible? What I mean is that the ack notifications are useful, since they are asynchronous. What I mean is that no notification is needed at all since result is known immediately. ? Supposing your goal is to get rid of ack notifiers, due to their burden in irqchip code? Unfortunately to get rid of ack notifiers we need to get rid of assigned devices. I will gladly do that, but I doubt Avi shares my enthusiasm. The patch to remove mask notification already sits in my patch queue though. Signed-off-by: Gleb Natapov g...@redhat.com diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index b857ca3..0b63991 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -231,20 +231,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu) { struct kvm_pit *pit = vcpu-kvm-arch.vpit; - if (pit kvm_vcpu_is_bsp(vcpu) pit-pit_state.irq_ack) - return atomic_read(pit-pit_state.pit_timer.pending); - return 0; -} - -static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) -{ - struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, - irq_ack_notifier); - spin_lock(ps-inject_lock); - if (atomic_dec_return(ps-pit_timer.pending) 0) - atomic_inc(ps-pit_timer.pending); - ps-irq_ack = 1; - spin_unlock(ps-inject_lock); + return atomic_read(pit-pit_state.pit_timer.pending); } void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) @@ -297,7 +284,6 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) pt-vcpu = pt-kvm-bsp_vcpu; atomic_set(pt-pending, 0); - ps-irq_ack = 1; hrtimer_start(pt-timer, ktime_add_ns(ktime_get(), interval), HRTIMER_MODE_ABS); @@ -577,17 +563,6 @@ void kvm_pit_reset(struct kvm_pit *pit) mutex_unlock(pit-pit_state.lock); atomic_set(pit-pit_state.pit_timer.pending, 0); - pit-pit_state.irq_ack = 1; -} - -static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask) -{ - struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier); - - if (!mask) { - atomic_set(pit-pit_state.pit_timer.pending, 0); - pit-pit_state.irq_ack = 1; - } } static const struct kvm_io_device_ops pit_dev_ops = { @@ -619,7 +594,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) mutex_init(pit-pit_state.lock); mutex_lock(pit-pit_state.lock); - spin_lock_init(pit-pit_state.inject_lock); kvm-arch.vpit = pit; pit-kvm = kvm; @@ -628,17 +602,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) pit_state-pit = pit; hrtimer_init(pit_state-pit_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - pit_state-irq_ack_notifier.gsi = 0; -
Re: [RFC] defer skb allocation in virtio_net -- mergable buff part
Hello Avi, Thanks for you review. I was on vacation last week. On Sun, 2009-08-16 at 16:47 +0300, Avi Kivity wrote: Alternatives include: - store the link in the page itself - have an array of pages per list element instead of just one pointer - combine the two, store an array of page pointers in one of the free pages - use the struct page::lru member The last is the most traditional and easiest so I'd recommend it (though it still takes the cacheline hit). I prefer the combine of the two. But I will compare the performance differences if no much difference, we can use the easiest one. +static struct page_list *get_a_free_page(struct virtnet_info *vi, gfp_t gfp_mask) +{ + struct page_list *plist; + + if (list_empty(vi-freed_pages)) { + plist = kmalloc(sizeof(struct page_list), gfp_mask); + if (!plist) + return NULL; + list_add_tail(plist-list,vi-freed_pages); + plist-page = alloc_page(gfp_mask); What if the allocation fails here? It is handled by the caller. Thanks Shirley -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][RFC] Use return value from kvm_set_irq() to re-inject PIT interrupts.
On Mon, Aug 24, 2009 at 02:44:27PM -0300, Marcelo Tosatti wrote: On Mon, Aug 24, 2009 at 08:16:46PM +0300, Gleb Natapov wrote: On Mon, Aug 24, 2009 at 01:32:56PM -0300, Marcelo Tosatti wrote: On Mon, Aug 24, 2009 at 03:06:23PM +0300, Gleb Natapov wrote: Use return value from kvm_set_irq() to track coalesced PIT interrupts instead of ack/mask notifiers. Gleb, What is the advantage of doing so? Current code very fragile and relies on hacks to work. Lets take calling of ack notifiers on pic reset as an example. Why is it needed? To signal the ack notifiers users that, in case of reset with pending IRR, the given interrupt has been acked (its an artificial ack event). But IRR was not acked. The reason it is done is that otherwise the current logic will prevent further interrupt injection. Is there a need to differentiate between actual interrupt ack and reset with pending IRR? At the time this code was written, there was no indication that differentation would be necessary. This is two different things. Ack notifiers should be called when guest acks interrupt. Calling it on reset is wrong (see below). We can add reset notifiers, but we just build yet another infrastructure to support current reinjection scheme. It is obviously wrong thing to do from assigned devices POV. Thats not entirely clear to me. So what happens if a guest with PIC assigned device resets with a pending IRR? The host interrupt line will be kept disabled, even though the guest is able to process further interrupts? The host interrupt line will be enabled (assigned device ack notifier does this) without clearing interrupt condition in assigned device (guest hasn't acked irq so how can we be sure it ran device's irq handler?). Host will hang. Why ioapic calls mask notifiers but pic doesn't? Because it is not implemented. I see that. Why? Why it was important to implement for ioapic but not for pic? Do we know what doesn't work now? Besides diffstat for the patch shows: 2 files changed, 16 insertions(+), 59 deletions(-) 43 lines less for the same functionality. Looks like clear win to me. Ack notifiers are asynchronous notifications. Using the return value from kvm_set_irq implies that timer emulation is based on a tick generating device on the host side. No notification is needed in the first place. You know immediately if injection fails or not. I don't see why using return value from kvm_set_irq implies that timer emulation is based on a tick generating device on the host side? What can you do with ack notifiers that can't be done without? If you don't have a host timer emulating the guest PIT, to periodically bang on kvm_set_irq, how do you know when to attempt reinjection? You keep calling kvm_set_irq on every guest entry to figure out when reinjection is possible? If we have timer to inject then yes. It is relatively cheap. Most of the time pending count will be zero. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/2] eventfd: new EFD_STATE flag
On 08/24/2009 09:25 PM, Davide Libenzi wrote: Indeed, the default eventfd behaviour is like, well, an event. Signaling (kernel side) or writing (userspace side), signals the event. Waiting (reading) it, will reset the event. If you use EFD_SEMAPHORE, you get a semaphore-like behavior. Events and sempahores are two widely known and used abstractions. The EFD_STATE proposed one, well, no. Not at all. There are libraries that provide notifications (or fire watches) when some value changes. They're much less frequently used than events or semaphores, though. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/2] eventfd: new EFD_STATE flag
On Sun, 23 Aug 2009, Michael S. Tsirkin wrote: On Sun, Aug 23, 2009 at 04:40:51PM +0300, Avi Kivity wrote: On 08/23/2009 04:36 PM, Michael S. Tsirkin wrote: More important here is realization that eventfd is a mutex/semaphore implementation, not a generic event reporting interface as we are trying to use it. Well it is a generic event reporting interface (for example, aio uses it). Davide, I think it's a valid point. For example, what read on eventfd does (zero a counter and return) is not like any semaphore I saw. Indeed, the default eventfd behaviour is like, well, an event. Signaling (kernel side) or writing (userspace side), signals the event. Waiting (reading) it, will reset the event. If you use EFD_SEMAPHORE, you get a semaphore-like behavior. Events and sempahores are two widely known and used abstractions. The EFD_STATE proposed one, well, no. Not at all. - Davide -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][RFC] Use return value from kvm_set_irq() to re-inject PIT interrupts.
On Mon, Aug 24, 2009 at 09:19:05PM +0300, Gleb Natapov wrote: It is obviously wrong thing to do from assigned devices POV. Thats not entirely clear to me. So what happens if a guest with PIC assigned device resets with a pending IRR? The host interrupt line will be kept disabled, even though the guest is able to process further interrupts? The host interrupt line will be enabled (assigned device ack notifier does this) without clearing interrupt condition in assigned device (guest hasn't acked irq so how can we be sure it ran device's irq handler?). Host will hang. Actually, on the second thought, it will not hang. Next time host interrupt handler runs it will disable interrupt once again. -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: vbus design points: shm and shm-signals
Gregory Haskins wrote: Gregory Haskins wrote: Ingo Molnar wrote: We all love faster code and better management interfaces and tons of your prior patches got accepted by Avi. This time you didnt even _try_ to improve virtio. Im sorry, but you are mistaken: http://lkml.indiana.edu/hypermail/linux/kernel/0904.2/02443.html BTW: One point that I forgot to point out in this most recent thread that I am particularly proud of here is the design of the vbus shared-memory model. Despite some claims to the contrary; not only is it possible to improve virtio with vbus (as evident by the patch referenced above)...I specifically designed vbus with virtio considerations in mind from the start! In fact, the design is conducive to accelerating a variety of other models as well. Read on for details. Vbus was designed it to be _agnostic_ to the shm algorithm in general. This allows you to, of course, run ring algorithms (such as virtqueues, or IOQs), but really any other designs as well, such as shared-tables, etc. A guest driver sees the following interface: struct vbus_device_proxy_ops { int (*open)(struct vbus_device_proxy *dev, int version, int flags); int (*close)(struct vbus_device_proxy *dev, int flags); int (*shm)(struct vbus_device_proxy *dev, int id, int prio, void *ptr, size_t len, struct shm_signal_desc *sigdesc, struct shm_signal **signal, int flags); int (*call)(struct vbus_device_proxy *dev, u32 func, void *data, size_t len, int flags); void (*release)(struct vbus_device_proxy *dev); }; note the ops-shm() method. This allows the driver to register some arbitrary pointer (ptr, len) with the host, optionally embedding a shm_signal_desc object in the memory. If sigdesc is non-null, the connector will allocate and return a fully formed shm_signal object in **signal. Fundamentally, how is this different than the virtio-add_buf concept? virtio provides a mechanism to register scatter/gather lists, associate a handle with them, and provides a mechanism for retrieving notification that the buffer has been processed. vbus provides a mechanism to register a single buffer with an integer handle, priority, and a signaling mechanism. So virtio provides builtin support for scatter/gathers whereas vbus models priority. But fundamentally, they seem like almost identical concepts. If we added priority to virtio-add_buf, would it be equivalent in your mind functionally speaking? What does one do with priority, btw? Is there something I'm overlooking? Regards, Anthony Liguroi -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: vbus design points: shm and shm-signals
Hi Anthony, Anthony Liguori wrote: Gregory Haskins wrote: Gregory Haskins wrote: Ingo Molnar wrote: We all love faster code and better management interfaces and tons of your prior patches got accepted by Avi. This time you didnt even _try_ to improve virtio. Im sorry, but you are mistaken: http://lkml.indiana.edu/hypermail/linux/kernel/0904.2/02443.html BTW: One point that I forgot to point out in this most recent thread that I am particularly proud of here is the design of the vbus shared-memory model. Despite some claims to the contrary; not only is it possible to improve virtio with vbus (as evident by the patch referenced above)...I specifically designed vbus with virtio considerations in mind from the start! In fact, the design is conducive to accelerating a variety of other models as well. Read on for details. Vbus was designed it to be _agnostic_ to the shm algorithm in general. This allows you to, of course, run ring algorithms (such as virtqueues, or IOQs), but really any other designs as well, such as shared-tables, etc. A guest driver sees the following interface: struct vbus_device_proxy_ops { int (*open)(struct vbus_device_proxy *dev, int version, int flags); int (*close)(struct vbus_device_proxy *dev, int flags); int (*shm)(struct vbus_device_proxy *dev, int id, int prio, void *ptr, size_t len, struct shm_signal_desc *sigdesc, struct shm_signal **signal, int flags); int (*call)(struct vbus_device_proxy *dev, u32 func, void *data, size_t len, int flags); void (*release)(struct vbus_device_proxy *dev); }; note the ops-shm() method. This allows the driver to register some arbitrary pointer (ptr, len) with the host, optionally embedding a shm_signal_desc object in the memory. If sigdesc is non-null, the connector will allocate and return a fully formed shm_signal object in **signal. Fundamentally, how is this different than the virtio-add_buf concept? From my POV, they are at different levels. Calling vbus-shm() is for establishing a shared-memory region including routing the memory and signal-path contexts. You do this once at device init time, and then run some algorithm on top (such as a virtqueue design). virtio-add_buf() OTOH, is a run-time function. You do this to modify the shared-memory region that is already established at init time by something like vbus-shm(). You would do this to queue a network packet, for instance. That said, shm-signal's closest analogy to virtio would be vq-kick(), vq-callback(), vq-enable_cb(), and vq-disable_cb(). The difference is that the notification mechanism isn't associated with a particular type of shared-memory construct (such as a virt-queue), but instead can be used with any shared-mem algorithm (at least, if I designed it properly). The closest analogy for vbus-shm() to virtio would be vdev-config-find_vqs(). Again, the difference is that the algorithm (ring, etc) is not dictated by the call. You then overlay something like virtqueue on top. virtio provides a mechanism to register scatter/gather lists, associate a handle with them, and provides a mechanism for retrieving notification that the buffer has been processed. Yes, and I agree this is very useful for many/most algorithms...but not all. Sometimes you don't want ring-like semantics, but instead want something like an idempotent table. (Think of things like interrupt controllers, timers, etc). Rings, of course, have a trait that all updates are retained in fifo order. For many things (e.g. network, block io, etc), this is exactly what you want. If I say send packet X now, and send packet Y later, I want the system to do both (and perhaps in that order), so a ring scheme works well. However, sometimes you may want to say time is now X, and later time is now Y. The update value of 'X' is technically superseded by Y and is stale. But a ring may allow both to exist in-flight within the shm simultaneously if the recipient (guest or host) is lagging, and the X may be processed even though its data is now irrelevant. What we really want is the transform of X-Y to invalidate anything else in flight so that only Y is visible. So in a case like this, we may want a different algorithm. Something like a table which always contains the current/valid value, and a way to signal in both directions when something interesting happens to that data. If you think about it, a ring is a superset of this construct...the ring meta-data is the shared-table (e.g. HEAD ptr, TAIL ptr, COUNT, etc). So we start by introducing the basic shm concept, and allow the next layer (virtio/virtqueue) in the stack to refine it for its needs. vbus provides a mechanism to register a single buffer with an integer handle, priority, and a signaling mechanism. Again, I think we are talking about two different layers. You would never put entries into a virtio-ring
user question: graphic acceleration for CAD
Hello Community, I just want to ask if it is planned to support 3d acceleration features for windows guests on linux hosts to enhance windows based CAD applications. I am not aware of the features modern CAD software is using so I cannot provide a list of requested features, sorry. I am not sure if it is possible to forward features of the host driver to the guest but let me know if something like this is planned even if it is only available for one guest at the same time. I am not member of the list so please CC me. Thank you, Aleks -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: vhost net: performance with ping benchmark
On Mon, Aug 24, 2009 at 11:12:41AM +0300, Michael S. Tsirkin wrote: At Rusty's suggestion, I tested vhost base performance with ping. Results below, and seem to be what you'd expect. Rusty, any chance you could look at the code? Is it in reasonable shape? I think it makes sense to merge it through you. What do you think? One comment on file placement: I put files under a separate vhost directory to avoid confusion with virtio-net which runs in guest. Does this sound sane? Also, can a minimal version (without TSO, tap or any other features) be merged upstream first so that features can be added later? Or do we have to wait until it's more full featured? Finally, can it reasonably make 2.6.32, or you think it needs more time out of tree? Thanks very much, -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: vbus design points: shm and shm-signals
Gregory Haskins wrote: Anthony Liguori wrote: Fundamentally, how is this different than the virtio-add_buf concept? From my POV, they are at different levels. Calling vbus-shm() is for establishing a shared-memory region including routing the memory and signal-path contexts. You do this once at device init time, and then run some algorithm on top (such as a virtqueue design). virtio-add_buf() OTOH, is a run-time function. You do this to modify the shared-memory region that is already established at init time by something like vbus-shm(). You would do this to queue a network packet, for instance. That said, shm-signal's closest analogy to virtio would be vq-kick(), vq-callback(), vq-enable_cb(), and vq-disable_cb(). The difference is that the notification mechanism isn't associated with a particular type of shared-memory construct (such as a virt-queue), but instead can be used with any shared-mem algorithm (at least, if I designed it properly). The closest analogy for vbus-shm() to virtio would be vdev-config-find_vqs(). Again, the difference is that the algorithm (ring, etc) is not dictated by the call. You then overlay something like virtqueue on top. BTW: Another way to think of this is that virtio-add_buf() is really buffer assignment, whereas vbus-shm() is buffer sharing. The former is meant to follow an assign, consume, re-assign, reclaim model, where the changing pointer ownership implicitly serializes the writability of the buffer. Its used (quite effectively) for things like passing a network-packet around. Conversely, the latter case (buffer sharing) is designed for concurrent writers. Its used for things like ring-metadata, shared-table designs, etc. Anything that generally is designed for a longer-term, parallel update model, instead of a consume/reclaim model. Whether we realize it or not, we generally build buffer-assignment algorithms on top of buffer-sharing algorithms. Therefore, while virtio technically has both of these components, it only exposes the former (buffer-assignment) as a user-extensible ABI (vq-add_buf). The latter (buffer-sharing) is inextricably linked to the underlying virtqueue ABI (vdev-find_vqs) (or, at least it is today). This is why I keep emphasizing that they are different layers of the same stack. From a device point of view, virtio adds a robust ring model with buffer-assignment capabilities, support for scatter-gather, etc. Vbus underneath it provides a robust buffer-sharing design with considerations for things like end-to-end prioritization, mitigation of various virt-like inefficiencies (hypercalls, interrupts, eois, spurious re-signals), etc. The idea is you can then join the two together to do something like build 8-rx virtqueues for your virtio-net to support prio. If you take these things into consideration on the backend design as well, you can actually tie it in end-to-end to gain performance and capabilities not previously available in KVM (or possibly any virt platform). HTH, Kind Regards, -Greg signature.asc Description: OpenPGP digital signature
Re: [PATCH 0/2] eventfd: new EFD_STATE flag
On Mon, Aug 24, 2009 at 11:25:01AM -0700, Davide Libenzi wrote: On Sun, 23 Aug 2009, Michael S. Tsirkin wrote: On Sun, Aug 23, 2009 at 04:40:51PM +0300, Avi Kivity wrote: On 08/23/2009 04:36 PM, Michael S. Tsirkin wrote: More important here is realization that eventfd is a mutex/semaphore implementation, not a generic event reporting interface as we are trying to use it. Well it is a generic event reporting interface (for example, aio uses it). Davide, I think it's a valid point. For example, what read on eventfd does (zero a counter and return) is not like any semaphore I saw. Indeed, the default eventfd behaviour is like, well, an event. Signaling (kernel side) or writing (userspace side), signals the event. Waiting (reading) it, will reset the event. If you use EFD_SEMAPHORE, you get a semaphore-like behavior. Events and sempahores are two widely known and used abstractions. The EFD_STATE proposed one, well, no. Not at all. Hmm. All we try to do is, associate a small key with the event that we signal. Is it really that uncommon/KVM specific? - Davide -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/2] eventfd: new EFD_STATE flag
On Mon, 24 Aug 2009, Avi Kivity wrote: On 08/24/2009 09:25 PM, Davide Libenzi wrote: Indeed, the default eventfd behaviour is like, well, an event. Signaling (kernel side) or writing (userspace side), signals the event. Waiting (reading) it, will reset the event. If you use EFD_SEMAPHORE, you get a semaphore-like behavior. Events and sempahores are two widely known and used abstractions. The EFD_STATE proposed one, well, no. Not at all. There are libraries that provide notifications (or fire watches) when some value changes. They're much less frequently used than events or semaphores, though. There are userspace libraries that do almost everything, but you hardly see things like pthread_(EFD_STATE-like)_create() or similar system interfaces based on such abstraction. Is that really difficult to understand where I'm standing, leaving the KVM hat off for a moment? - Davide -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/2] eventfd: new EFD_STATE flag
There are userspace libraries that do almost everything, but you hardly see things like pthread_(EFD_STATE-like)_create() or similar system interfaces based on such abstraction. It actually seems as close to a condition variable as an eventfd can be. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/2] eventfd: new EFD_STATE flag
On Tue, 25 Aug 2009, Michael S. Tsirkin wrote: On Mon, Aug 24, 2009 at 11:25:01AM -0700, Davide Libenzi wrote: On Sun, 23 Aug 2009, Michael S. Tsirkin wrote: On Sun, Aug 23, 2009 at 04:40:51PM +0300, Avi Kivity wrote: On 08/23/2009 04:36 PM, Michael S. Tsirkin wrote: More important here is realization that eventfd is a mutex/semaphore implementation, not a generic event reporting interface as we are trying to use it. Well it is a generic event reporting interface (for example, aio uses it). Davide, I think it's a valid point. For example, what read on eventfd does (zero a counter and return) is not like any semaphore I saw. Indeed, the default eventfd behaviour is like, well, an event. Signaling (kernel side) or writing (userspace side), signals the event. Waiting (reading) it, will reset the event. If you use EFD_SEMAPHORE, you get a semaphore-like behavior. Events and sempahores are two widely known and used abstractions. The EFD_STATE proposed one, well, no. Not at all. Hmm. All we try to do is, associate a small key with the event that we signal. Is it really that uncommon/KVM specific? All I'm trying to do, is to avoid that eventfd will become an horrible multiplexor for every freaky one-time-use behaviors arising inside kernel modules. - Davide -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/2] eventfd: new EFD_STATE flag
On Tue, 25 Aug 2009, Paolo Bonzini wrote: There are userspace libraries that do almost everything, but you hardly see things like pthread_(EFD_STATE-like)_create() or similar system interfaces based on such abstraction. It actually seems as close to a condition variable as an eventfd can be. A pthread condition typical code usage maps to eventfd like: while (read(efd, ...) 0) if (CONDITION) break; So a pthread condition is really a wakeup gate like eventfd is. EFD_STATE has nothing to do with a pthread condition. - Davide -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] VMX: Return to userspace on invalid state emulation failure
Return to userspace instead of repeatedly trying to emulate instructions that have already failed Signed-off-by: Mohammed Gamal m.gamal...@gmail.com --- arch/x86/kvm/vmx.c |6 +- 1 files changed, 5 insertions(+), 1 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6b57eed..c559bb7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3337,6 +3337,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (err != EMULATE_DONE) { kvm_report_emulation_failure(vcpu, emulation failure); + vcpu-run-exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu-run-internal.suberror = KVM_INTERNAL_ERROR_EMULATION; break; } @@ -3607,7 +3609,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx-entry_time = ktime_get(); /* Handle invalid guest state instead of entering VMX */ - if (vmx-emulation_required emulate_invalid_guest_state) { + if (vmx-emulation_required emulate_invalid_guest_state +!(vcpu-run-exit_reason == KVM_EXIT_INTERNAL_ERROR + vcpu-run-internal.suberror == KVM_INTERNAL_ERROR_EMULATION)) { handle_invalid_guest_state(vcpu); return; } -- 1.6.0.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: vbus design points: shm and shm-signals
Gregory Haskins wrote: Hi Anthony, Fundamentally, how is this different than the virtio-add_buf concept? From my POV, they are at different levels. Calling vbus-shm() is for establishing a shared-memory region including routing the memory and signal-path contexts. You do this once at device init time, and then run some algorithm on top (such as a virtqueue design). virtio explicitly avoids having a single setup-memory-region call because it was designed to accommodate things like Xen grant tables whereas you have a fixed number of sharable buffers that need to be setup and torn down as you use them. You can certainly use add_buf() to setup a persistent mapping but it's not the common usage. For KVM, since all memory is accessible by the host without special setup, add_buf() never results in an exit (it's essentially a nop). So I think from that perspective, add_buf() is a functional superset of vbus-shm(). virtio-add_buf() OTOH, is a run-time function. You do this to modify the shared-memory region that is already established at init time by something like vbus-shm(). You would do this to queue a network packet, for instance. That said, shm-signal's closest analogy to virtio would be vq-kick(), vq-callback(), vq-enable_cb(), and vq-disable_cb(). The difference is that the notification mechanism isn't associated with a particular type of shared-memory construct (such as a virt-queue), but instead can be used with any shared-mem algorithm (at least, if I designed it properly). Obviously, virtio allows multiple ring implements based on how it does layering. The key point is that it doesn't expose that to the consumer of the device. Do you see a compelling reason to have an interface at this layer? virtio provides a mechanism to register scatter/gather lists, associate a handle with them, and provides a mechanism for retrieving notification that the buffer has been processed. Yes, and I agree this is very useful for many/most algorithms...but not all. Sometimes you don't want ring-like semantics, but instead want something like an idempotent table. (Think of things like interrupt controllers, timers, etc). We haven't crossed this bridge yet because we haven't implemented one of these devices. One approach would be to use add_buf() to register fixed shared memory regions. Because our rings are fixed sized, this implies a fixed number of shared memory mappings. You could also extend virtio to provide a mechanism to register unlimited numbers of shared memory regions. The problem with this is that it doesn't work well for hypervisors with fixed shared-memory regions (like Xen). However, sometimes you may want to say time is now X, and later time is now Y. The update value of 'X' is technically superseded by Y and is stale. But a ring may allow both to exist in-flight within the shm simultaneously if the recipient (guest or host) is lagging, and the X may be processed even though its data is now irrelevant. What we really want is the transform of X-Y to invalidate anything else in flight so that only Y is visible. We actually do this today but we just don't use virtio. I'm not sure we need a single bus that can serve both of these purposes. What does this abstraction buy us? If you think about it, a ring is a superset of this construct...the ring meta-data is the shared-table (e.g. HEAD ptr, TAIL ptr, COUNT, etc). So we start by introducing the basic shm concept, and allow the next layer (virtio/virtqueue) in the stack to refine it for its needs. I think there's a trade off between practicality and theoretical abstractions. Surely, a system can be constructed simply with notification and shared memory primitives. This is what Xen does via event channels and grant tables. In practice, this ends up being cumbersome and results in complex drivers. Compare netfront to virtio-net, for instance. We choose to abstract at the ring level precisely because it simplifies driver implementations. I think we've been very successful here. virtio does not accommodate devices that don't fit into a ring model very well today. There's certainly room to discuss how to do this. If there is to be a layer below virtio's ring semantics, I don't think that vbus is this because it mandates much higher levels of the stack (namely, device enumeration). IOW, I can envision a model that looked like PCI - virtio-pci - virtio-shm - virtio-ring - virtio-net Whereas generic-shm-mechanism provided a non-ring interface for non-ring devices. That doesn't preclude non virtio-pci transports, it just suggests how we would do the layering. So maybe there's a future for vbus as virtio-shm? How attached are you to your device discovery infrastructure? If you introduced a virtio-shm layer to the virtio API that looked a bit like vbus' device API, and then decoupled the device discovery bits into a virtio-vbus transport, I
Re: vbus design points: shm and shm-signals
Anthony Liguori wrote: IOW, I can envision a model that looked like PCI - virtio-pci - virtio-shm - virtio-ring - virtio-net Let me stress that what's important here is that devices target either virtio-ring or virtio-shm. If we had another transport, those drivers would be agnostic toward it. We really want to preserve the ability to use all devices over a PCI transport. That's a critical requirement for us. The problem with vbus as it stands today, is that it presents vbus - virtio-ring - virtio-net and allows drivers to target either virtio-ring or vbus directly. If a driver targets vbus directly, then the driver is no longer transport agnostic and we could not support that driver over PCI. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv3 0/4] qemu-kvm: vhost net support
On Sun, Aug 23, 2009 at 1:22 PM, Michael S. Tsirkinm...@redhat.com wrote: Just had a different, but slightly similar problem when the host running qemu had forwarding enabled. Is it possible your host is forwarding the packets somewhere else, and that's why we get the dupes? sysctl -w net.ipv4.conf.all.forwarding=0 Yes! This seems to be the problem. As expected, I can just disable forwarding on eth10 and the duplicates disappear. Thanks, Alex -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2829519 ] extboot.bin is not built
Bugs item #2829519, was opened at 2009-07-30 09:13 Message generated for change (Comment added) made by sf-robot You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2829519group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: qemu Group: None Status: Closed Resolution: Fixed Priority: 5 Private: No Submitted By: Dominik Klein (dominikklein) Assigned to: Nobody/Anonymous (nobody) Summary: extboot.bin is not built Initial Comment: I am on openSuSE 11.1 x86_64 on a Dell machine with an Intel E5440 CPU. I try to compile qemu 0.10.5 using ./configure --prefix=/usr make make install Everything looks good but then I find that extboot.bin is neither (tried to) build nor installed. Copying a version from an older qemu installation worked as suggested by a colleague, but I guess that's not the way it is meant to be. I attached a file with the output of configure, make and make install. If you need anything else, just let me know or contact me on IRC, Nickname kleind in #kvm on freenode -- Comment By: SourceForge Robot (sf-robot) Date: 2009-08-25 02:20 Message: This Tracker item was closed automatically by the system. It was previously set to a Pending status, and the original submitter did not respond within 14 days (the time period specified by the administrator of this Tracker). -- Comment By: xming (xming) Date: 2009-08-22 15:38 Message: extboot.bin is not built with qemu-kvm-0.11.0-rc1, can this be fixed for the next rc? -- Comment By: Avi Kivity (avik) Date: 2009-08-10 11:55 Message: Fixed in 0.10.6 -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2829519group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2725669 ] kvm init script breaks network interfaces with multiple IPs
Bugs item #2725669, was opened at 2009-04-01 20:44 Message generated for change (Settings changed) made by sf-robot You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2725669group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Closed Resolution: Rejected Priority: 5 Private: No Submitted By: Paul Donohue (paulsd) Assigned to: Nobody/Anonymous (nobody) Summary: kvm init script breaks network interfaces with multiple IPs Initial Comment: If multiple IP addresses are assigned to a network interface (Using interface aliases - for example 'ifconfig eth0 10.0.0.1 ; ifconfig eth0:1 10.0.0.2'), then the kvm init script causes the interface to become unresponsive when it creates a bridge using the interface. I haven't yet had a need to use bridging for my VMs, so I haven't yet tried to figure out how to properly configure a bridge when multiple IPs are in use on the host system (I assume the multiple IPs simply need to be configured using aliases of the bridge itself - for example 'ifconfig sw0 10.0.0.1 ; ifconfig sw0:1 10.0.0.2' - but I haven't actually tried it). Therefore, I am not sure at the moment how the kvm init script needs to be updated to fix this problem. Regardless, I do have a number of machines which are using multiple IPs on the host system, and I recently installed kvm on them, then discovered that after the next reboot of each machine, the network interface is unresponsive until I disable the kvm init script and reboot again. So, ideally the kvm init script should be updated to properly handle aliased interfaces, but at the very least, it needs to be updated to detect aliased interfaces and refuse to create a bridge for them, since that seems to completely break the underlying interface. -- Comment By: SourceForge Robot (sf-robot) Date: 2009-08-25 02:20 Message: This Tracker item was closed automatically by the system. It was previously set to a Pending status, and the original submitter did not respond within 14 days (the time period specified by the administrator of this Tracker). -- Comment By: Avi Kivity (avik) Date: 2009-08-10 12:13 Message: The kvm init script is shipped as an example only. You should use the networking setup that came with your distribution. -- Comment By: Paul Donohue (paulsd) Date: 2009-04-01 23:48 Message: Yes, it does, in the userspace tree, under the scripts subdirectory: http://git.kernel.org/?p=virt/kvm/kvm-userspace.git;a=blob;f=scripts/kvm;h=cddc931fd3b289f3c325e23b55f261e996328bd6;hb=HEAD -- Comment By: Brian Jackson (iggy_cav) Date: 2009-04-01 21:08 Message: KVM doesn't come with an init script in the tarball. This is most likely provided by your distro or some other third party. You should contact them for support. -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2725669group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2543539 ] Can't install Windows 98
Bugs item #2543539, was opened at 2009-01-28 17:44 Message generated for change (Settings changed) made by sf-robot You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2543539group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Closed Resolution: None Priority: 5 Private: No Submitted By: Kouichi Kusanagi (k_kusanagi) Assigned to: Nobody/Anonymous (nobody) Summary: Can't install Windows 98 Initial Comment: I attempted to install Windows 98. It hangs at Hardware Detection. Progress bar stopped at 14% and kvm repeat these messeges. emulation failed (mmio) rip 3e49 f2 ae 75 14 emulation failed (pagetable) rip 3e49 f2 ae 75 14 Attached patch fixes hang but install still fail. When progress bar reaches 29%, general protection fault occurs. Details are as follows MSGSRV32 : SYSDETMG.DLL : 0004:4ef0 Registers: EAX=2000 CS=2137 EIP=4ef0 EFLGS=0206 EBX=2000 SS=1fc7 ES{=8270 EBP=828a ECX= DS=30b7 ESI=00020006 FS= EDX=2637 ES=2637 EDI= GS=0157 Bytes at CS:EIP: 26 8b 47 06 03 46 f6 8b f8 8c 46 fe 8b d8 26 f6 Stack dump: 0073d400 2e738276 2e8530b7 30b7 2637 0002 82c62e0c 21374d4a 1fc782b2 1fc782b4 1fc782bc 1fc782b6 1fc782b8 1fc782ba 30b7d400 00734d44 cpu: AMD Phenom X4 9550 kvm: kvm-83 kernel: v2.6.29-rc1-190-g37a76bd host: Debian sid amd64 guest: Windows 98 command line: kvm -serial none -parallel none -k en-us -monitor stdio -net none -usb -usbdevice tablet -localtime -hda win98 -cdrom win98.iso -boot d -- Comment By: SourceForge Robot (sf-robot) Date: 2009-08-25 02:20 Message: This Tracker item was closed automatically by the system. It was previously set to a Pending status, and the original submitter did not respond within 14 days (the time period specified by the administrator of this Tracker). -- Comment By: Avi Kivity (avik) Date: 2009-08-10 12:16 Message: Please post the patch to the mailing list (kvm@vger.kernel.org) -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2543539group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2099075 ] qcow2 images corruption
Bugs item #2099075, was opened at 2008-09-07 19:22 Message generated for change (Settings changed) made by sf-robot You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2099075group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Closed Resolution: Fixed Priority: 5 Private: No Submitted By: Fabio Coatti (cova) Assigned to: Nobody/Anonymous (nobody) Summary: qcow2 images corruption Initial Comment: I'm running a 32bit vm under x86_64 AMD host with kvm-74. using raw image all works fine, but with qcow2 image I'm able to get deep fs corruption with a disk intensive operation like kernel compilation. it happened every time I tried, and -no-kvm-irqchip didn't make any difference. I've browsed the archives for hints, but no luck. Details: Host (64): Linux 2.6.26.3 #6 SMP PREEMPT Sun Aug 31 16:00:51 CEST 2008 x86_64 AMD Phenom(tm) 9850 Quad-Core Processor AuthenticAMD GNU/Linux single CPU/quad core. fs on image partition: XFS Gentoo ~amd64, kvm compiled on host system. Guest (32): Linux 2.6.26.3vm #4 Tue Aug 26 17:29:00 CEST 2008 i686 QEMU Virtual CPU version 0.9.1 AuthenticAMD GNU/Linux Gentoo x86 command line: kvm -m 1G -drive file=test.qcow2,if=virtio,boot=on -localtime -net nic,macaddr=DE:AD:BE:EF:15:5,model=virtio -net tap fs on image: ext3 kvm-img info of image that show corruption: image: test.qcow2 file format: qcow2 virtual size: 10G (10737418240 bytes) disk size: 103M cluster_size: 4096 backing file: gentoo-i386-virtio.qcow2 (actual path: gentoo-i386-virtio.qcow2) kvm-img info of working image: image: gentoo-i386-virtio.img file format: raw virtual size: 10G (10737418240 bytes) disk size: 4.5G virtio partitions are in use on guest machine Let me know it other details are needed. Thanks for any answer. -- Comment By: SourceForge Robot (sf-robot) Date: 2009-08-25 02:20 Message: This Tracker item was closed automatically by the system. It was previously set to a Pending status, and the original submitter did not respond within 14 days (the time period specified by the administrator of this Tracker). -- Comment By: Avi Kivity (avik) Date: 2009-08-10 12:22 Message: qcow2 problems should be fixed in kvm-88. -- Comment By: Fabio Coatti (cova) Date: 2008-09-07 21:46 Message: Logged In: YES user_id=220554 Originator: YES Many thanks for the quick answer. Not sure about how to get kvm head, I've used http://people.qumranet.com:/avi/snapshots/kvm-snapshot-20080906.tar.gz The result is the same: fs corruption using qcow2 image. as suggested, I've tried with ide emulation and it seems that I'm unable to reproduce the bug, with this command line: kvm -m 1G test-hda.qcow2 -localtime -net nic,macaddr=DE:AD:BE:EF:15:5,model=rtl8139 -net tap all goes fine: 3 kernel compilation on the same image, rebooting each time, without glitches. maybe it's related to virtio disk driver? Of course I'm available for other info and tests, if needed. Thanks. -- Comment By: Dor Laor (thekozmo) Date: 2008-09-07 20:39 Message: Logged In: YES user_id=2124464 Originator: NO Hi there, Can you first check against kvm head, there was a missing tlb flush that was fixed. Second, can you replace virtio with scsi/ide and check if the bug still exist? 10x -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2099075group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-1906189 ] All SMP guests often halt
Bugs item #1906189, was opened at 2008-03-03 11:33 Message generated for change (Comment added) made by sf-robot You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=1906189group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Closed Resolution: None Priority: 5 Private: No Submitted By: Technologov (technologov) Assigned to: Nobody/Anonymous (nobody) Summary: All SMP guests often halt Initial Comment: All SMP configurations are very unstable - both on Intel and AMD. KVM-62. Symptons: guests often soft-lock ups, or more precisely, they slow down to unacceptable speeds. Guests may hard-lockup totally, or even BSOD in some cases. I have tried: Windows 2000 Windows XP Windows Server 2003 Windows Server 2008 The KVM acts, but it looks like a loop. = [alex...@pink-intel win2000-Pro]$ dmesg | tail -n40 Ignoring de-assert INIT to vcpu 0 Ignoring de-assert INIT to vcpu 1 Ignoring de-assert INIT to vcpu 0 Ignoring de-assert INIT to vcpu 1 SIPI to vcpu 1 vector 0x10 apic write: bad size=1 fee00030 Ignoring de-assert INIT to vcpu 0 Ignoring de-assert INIT to vcpu 1 Ignoring de-assert INIT to vcpu 0 Ignoring de-assert INIT to vcpu 1 SIPI to vcpu 1 vector 0x10 apic write: bad size=1 fee00030 Ignoring de-assert INIT to vcpu 0 Ignoring de-assert INIT to vcpu 1 Ignoring de-assert INIT to vcpu 0 Ignoring de-assert INIT to vcpu 1 Ignoring de-assert INIT to vcpu 1 SIPI to vcpu 1 vector 0x21 SIPI to vcpu 1 vector 0x21 SIPI to vcpu 1 vector 0x21 Ignoring de-assert INIT to vcpu 0 Ignoring de-assert INIT to vcpu 1 SIPI to vcpu 1 vector 0x21 Ignoring de-assert INIT to vcpu 0 Ignoring de-assert INIT to vcpu 1 SIPI to vcpu 1 vector 0x10 apic write: bad size=1 fee00030 Ignoring de-assert INIT to vcpu 0 Ignoring de-assert INIT to vcpu 1 Ignoring de-assert INIT to vcpu 0 Ignoring de-assert INIT to vcpu 1 Ignoring de-assert INIT to vcpu 1 SIPI to vcpu 1 vector 0x21 SIPI to vcpu 1 vector 0x21 SIPI to vcpu 1 vector 0x21 Ignoring de-assert INIT to vcpu 0 Ignoring de-assert INIT to vcpu 1 SIPI to vcpu 1 vector 0x21 Ignoring de-assert INIT to vcpu 0 Ignoring de-assert INIT to vcpu 1 = (gdb) bt #0 0x003a016c9aa7 in ioctl () from /lib64/libc.so.6 #1 0x0051bb29 in kvm_run (kvm=0x2a9b040, vcpu=0) at libkvm.c:850 #2 0x004fda86 in kvm_cpu_exec (env=value optimized out) at /root/Linstall/kvm-62rc2/qemu/qemu-kvm.c:127 #3 0x004fe5d5 in kvm_main_loop_cpu (env=0x2b56490) at /root/Linstall/kvm-62rc2/qemu/qemu-kvm.c:307 #4 0x004110fd in main (argc=44675488, argv=value optimized out) at /root/Linstall/kvm-62rc2/qemu/vl.c:7862 = kvm statistics efer_reload 103701 0 exits512480997 20642 fpu_reload24781662 799 halt_exits 1824249 170 halt_wakeup 828699 68 host_state_reload 495932451617 hypercalls 0 0 insn_emulation 389188282 14239 insn_emulation_fail 1110 0 invlpg 0 0 io_exits 28855411 928 irq_exits 191313613248 irq_window 0 0 largepages 0 0 mmio_exits16078802 0 mmu_cache_miss 4219404 415 mmu_flooded4110773 410 mmu_pde_zapped 499335 6 mmu_pte_updated 103816391327 mmu_pte_write 145679441737 mmu_recycled 17419 0 mmu_shadow_zapped 4372079 410 = -Alexey, 03.03.2008. -- Comment By: SourceForge Robot (sf-robot) Date: 2009-08-25 02:20 Message: This Tracker item was closed automatically by the system. It was previously set to a Pending status, and the original submitter did not respond within 14 days (the time period specified by the administrator of this Tracker). -- Comment By: Avi Kivity (avik) Date: 2009-08-10 12:29 Message: Does this still happen? -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=1906189group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-1941302 ] Cannot boot guests with hugetlbfs
Bugs item #1941302, was opened at 2008-04-13 11:21 Message generated for change (Settings changed) made by sf-robot You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=1941302group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Closed Resolution: Fixed Priority: 5 Private: No Submitted By: yunfeng (yunfeng) Assigned to: Nobody/Anonymous (nobody) Summary: Cannot boot guests with hugetlbfs Initial Comment: I meet problem to get hugetlbfs work on my test box. If i added --mem-path to qemu, the guest will always fail to boot with the error messages below printed on qemu console: ### ata0 master: ATA-0 Hard-Disk ( 0 MBytes) ata1 master: ATAPI-0 Device Booting from Hard Disk . Booting from Hard Disk failed: could not read the boot disk ## And I checked my system, hugepages has been enabled. [r...@vt-dp8 ~]# grep -i huge /proc/meminfo HugePages_Total: 500 HugePages_Free:496 HugePages_Rsvd: 0 Hugepagesize: 2048 kB [r...@vt-dp8 ~]# mount /dev/sda1 on / type ext3 (rw) none on /dev/shm type tmpfs (rw,size=4G) nodev on /hugepages type hugetlbfs (rw) /dev/sda3 on /share type ext2 (rw) /dev/sda2 on /mnt/sda2 type ext3 (rw) The command i am using: qemu -m 256 -monitor pty -net nic,macaddr=00:16:3e:48:d4:aa,model=rtl8139 -net tap,script=/etc/kvm/qemu-ifup -hda /dev/sda --mem-path /hugepages/ If I remove --mem-path, the guest can be booted up. -- Comment By: SourceForge Robot (sf-robot) Date: 2009-08-25 02:20 Message: This Tracker item was closed automatically by the system. It was previously set to a Pending status, and the original submitter did not respond within 14 days (the time period specified by the administrator of this Tracker). -- Comment By: Avi Kivity (avik) Date: 2009-08-10 12:24 Message: Should be fixed in kvm-88. -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=1941302group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-1984384 ] soft lockup - CPU#5 stuck for 11s! [qemu-system-x86:4966]
Bugs item #1984384, was opened at 2008-06-04 11:49 Message generated for change (Comment added) made by sf-robot You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=1984384group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Closed Resolution: Fixed Priority: 5 Private: No Submitted By: Rafal Wijata (ravpl) Assigned to: Nobody/Anonymous (nobody) Summary: soft lockup - CPU#5 stuck for 11s! [qemu-system-x86:4966] Initial Comment: I'm using kvm-69 running on Linux 2.6.24.7-92.fc8 #1 SMP Wed May 7 16:26:02 EDT 2008 x86_64 x86_64 x86_64 GNU/Linux kvm modules loaded from kvm-69 rather than kernel provided My system almost freezed after I killed qemu process. I saw many, many tasks in 'D' state, along with [reiserfs/?] tasks. Normally I would consider it reiserfs bug(and maybe it is), but two things - it happened after qemu process was killed(running with 6cpus, 6G memory, 16G hdd placed on reiserfs placed on 200M/s hdd) - dmesg showed following messages(2 total), which suggest it stucked in kvm BUG: soft lockup - CPU#5 stuck for 11s! [qemu-system-x86:4966] CPU 5: Modules linked in: ipt_REJECT nf_conntrack_ipv4 iptable_filter ip_tables kvm_intel(U) kvm(U) tun nfs lockd nfs_acl autofs4 coretemp hwmon fuse sunrpc bridge xt_tcpudp nf_conntrack_ipv6 xt_state nf_conntrack ip6t_REJECT ip6table_filter ip6_tables x_tables ipv6 cpufreq_ondemand acpi_cpufreq reiserfs ext2 dm_mirror dm_multipath dm_mod i5000_edac iTCO_wdt serio_raw pcspkr iTCO_vendor_support e1000 button edac_core i2c_i801 ata_piix i2c_core pata_acpi ata_generic sg usb_storage ahci libata shpchp 3w_9xxx sd_mod scsi_mod ext3 jbd mbcache uhci_hcd ohci_hcd ehci_hcd Pid: 4966, comm: qemu-system-x86 Not tainted 2.6.24.7-92.fc8 #1 RIP: 0010:[8834b29e] [8834b29e] :kvm:rmap_remove+0x170/0x198 RSP: 0018:8101f4df5bd8 EFLAGS: 0246 RAX: 0002 RBX: 81004294af60 RCX: RDX: RSI: 0106 RDI: 8101770448c0 RBP: 8101ce0454d0 R08: c20001b86030 R09: 8101d3587118 R10: 0019e7ea R11: 8101394dd9c0 R12: 8100240cece0 R13: R14: 0019e7ea R15: 0018 FS: () GS:81021f049580() knlGS: CS: 0010 DS: 002b ES: 002b CR0: 8005003b CR2: f7ff6000 CR3: 00021b5e5000 CR4: 26e0 DR0: DR1: DR2: DR3: DR6: 0ff0 DR7: 0400 Call Trace: [8834b1dd] :kvm:rmap_remove+0xaf/0x198 [8834b372] :kvm:kvm_mmu_zap_page+0x8a/0x25e [8834b9f3] :kvm:free_mmu_pages+0x12/0x34 [8834bac9] :kvm:kvm_mmu_destroy+0x1d/0x5e [88346979] :kvm:kvm_arch_vcpu_uninit+0x1d/0x38 [8834555b] :kvm:kvm_vcpu_uninit+0x9/0x15 [88163aa8] :kvm_intel:vmx_free_vcpu+0x74/0x84 [8834657b] :kvm:kvm_arch_destroy_vm+0x69/0xb4 [88345538] :kvm:kvm_vcpu_release+0x13/0x18 [810a35d4] __fput+0xc2/0x18f [810a0de7] filp_close+0x5d/0x65 [8103b3df] put_files_struct+0x66/0xc4 [8103c6f7] do_exit+0x28c/0x76b [8103cc55] sys_exit_group+0x0/0xe [81044163] get_signal_to_deliver+0x3aa/0x3d8 [8100b359] do_notify_resume+0xa8/0x732 [8126b7f6] unlock_kernel+0x32/0x33 [881c01db] :reiserfs:reiserfs_setattr+0x26e/0x27d [810a1866] do_truncate+0x70/0x79 [8100bf17] sysret_signal+0x1c/0x27 [8100c1a7] ptregscall_common+0x67/0xb0 -- Comment By: SourceForge Robot (sf-robot) Date: 2009-08-25 02:20 Message: This Tracker item was closed automatically by the system. It was previously set to a Pending status, and the original submitter did not respond within 14 days (the time period specified by the administrator of this Tracker). -- Comment By: Avi Kivity (avik) Date: 2009-08-10 12:27 Message: Should be fixed in git. -- Comment By: Avi Kivity (avik) Date: 2008-06-04 15:45 Message: Logged In: YES user_id=539971 Originator: NO Okay, I added a cond_resched() in free_mmu_pages(). That should avoid the softlockup tick. File Added: prevent-softlockup-on-kvm-destroy.patch -- Comment By: david ahern (dsahern) Date: 2008-06-04 15:08 Message: Logged In: YES user_id=1755596 Originator: NO My host did not crash, only the guest. I actually was not aware it had gone down until I went to login. At that point I went digging through syslog to find out when it died (my control scripts log startup and shutdown). The
[ kvm-Bugs-2024740 ] Win2003 SMP installation crash
Bugs item #2024740, was opened at 2008-07-22 14:36 Message generated for change (Settings changed) made by sf-robot You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2024740group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Closed Resolution: None Priority: 5 Private: No Submitted By: Marcelo Tosatti (mtosatti) Assigned to: Nobody/Anonymous (nobody) Summary: Win2003 SMP installation crash Initial Comment: Crash during first stage of Win2003 enterprise installation (text-mode), 4 vcpu's: Ignoring de-assert INIT to vcpu 0 Ignoring de-assert INIT to vcpu 1 Ignoring de-assert INIT to vcpu 2 Ignoring de-assert INIT to vcpu 3 Ignoring de-assert INIT to vcpu 0 Ignoring de-assert INIT to vcpu 1 Ignoring de-assert INIT to vcpu 2 Ignoring de-assert INIT to vcpu 3 handle_exception: unexpected, vectoring info 0x8406 intr info 0x8b0c With today's git tree (commit ea8b7f0542e0420240d057f7954808c65c4d13fc). Flexpriority enabled host. qemu/x86_64-softmmu/qemu-system-x86_64 -hda /root/images/win2003.img \ -cdrom /root/isos/en_windows_server_2003_with_sp1_enterprise.iso \ -m 2000 -usbdevice tablet -vnc :2 -smp 4 -boot d -- Comment By: SourceForge Robot (sf-robot) Date: 2009-08-25 02:20 Message: This Tracker item was closed automatically by the system. It was previously set to a Pending status, and the original submitter did not respond within 14 days (the time period specified by the administrator of this Tracker). -- Comment By: Avi Kivity (avik) Date: 2009-08-10 12:30 Message: Still happens? -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2024740group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2490866 ] repeatable corruption with qcow2 on kvm-79
Bugs item #2490866, was opened at 2009-01-06 21:10 Message generated for change (Comment added) made by sf-robot You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2490866group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: qemu Group: None Status: Closed Resolution: Fixed Priority: 5 Private: No Submitted By: Adrian Bridgett (abridgett) Assigned to: Nobody/Anonymous (nobody) Summary: repeatable corruption with qcow2 on kvm-79 Initial Comment: Creating a qcow2 image, mkfs.ext3, sometimes mounting it would fail immediately, but in all cases it would corrupt (overwritten with zeros) after starting up backuppc on it. This is KVM-79 on a Debian lenny host and guest. This occured using virtio or not. Swapping to a raw file or LV worked flawlessly. I've tested the box with memtest and I don't have issues elsewhere but I've seen corruptions on other images. host and guest are both 2.6.26-1-adm64 kernel (debian lenny) I'm running 32-bit userspace everywhere. Dual core Intel Core2 E6300. I see KVM-81 has improve qcow2 data integrity with cache=writethrough which _might_ be what I'm hitting - but I can't find more details about this to check (and backport patch to debian package or wait for newer debian package). thanks. -- Comment By: SourceForge Robot (sf-robot) Date: 2009-08-25 02:20 Message: This Tracker item was closed automatically by the system. It was previously set to a Pending status, and the original submitter did not respond within 14 days (the time period specified by the administrator of this Tracker). -- Comment By: Avi Kivity (avik) Date: 2009-08-10 12:18 Message: Should be fixed in kvm-88. -- Comment By: Adrian Bridgett (abridgett) Date: 2009-01-24 14:17 Message: I've tested with new debian KVM-83 package - which also removes that fedora patch. I can't reproduce the corruption I had anymore. Feel free to close this bug. Thanks. -- Comment By: Daniel van Vugt (danv) Date: 2009-01-21 06:47 Message: As the original problem is resolved, it's description now inaccurate, and my problem is ongoing, please close this bug. I will open a new one. -- Comment By: Daniel van Vugt (danv) Date: 2009-01-15 01:47 Message: Reproduced qcow2 corruption again, using kvm-83 now. Same steps as before. -- Comment By: Daniel van Vugt (danv) Date: 2009-01-13 11:50 Message: My problem is kvm-82 vanilla, compiled with no special options. It happens consistently with kvm-82, but is not necessarily new to kvm-82 because I haven't used these images extensively with prior kvm releases. No, I do not do anything dangerous like run multiple guests with the same image or manage snapshots while the guest is up (only at pre-boot, when stopped). To reproduce repeatedly in the two cases I've seen it recently: Case A: Windows Server 2003 (standard and enterprise) x86: 1. Boot guest (previously created under kvm-81) 2. Let Windows detect the new hardware (kvm-82 changes) 3. Reboot as instructed 4. Windows guest continues to work after multiple reboots and the console shows snapshots intact 5. Shut down the Windows guest properly 6. Image is now invalid as soon as qemu(kvm) exits (not recognizable as qcow2, not bootable and no snapshots reported by qemu-img info) Case B: Windows 7 beta x86 1. Installed under kvm-82 2. Boot guest 3. Shut down guest 4. Image is now invalid as described. This is a very serious problem. Thought it sounds like a new separate bug may be required... -- Comment By: Laszlo Dvornik (ldvornik) Date: 2009-01-13 10:23 Message: I narrowed down my problem to one Fedora patch, which include in Debian too, but not in vanilla kvm, so vanilla kvm isn't effected. The Fedora patch: http://cvs.fedoraproject.org/viewvc/rpms/kvm/devel/kvm-62-block-rw-range-check.patch?revision=1.7view=markup I removed the CVE-2008-0928-fedora.patch from the debian source package and recompiled it, the partitioning problem solved. Fedora bugzilla entry: https://bugzilla.redhat.com/show_bug.cgi?id=433560 A question. Is this vulnerability confirmed by qemu devels or why only distributions fixed this? Their fix seems to cause problems with qcow2, qcow, vmdk formats. The patch works well last in debian before 79+dfsg-3, when they started to use Fedora's updated patch. Perhaps the packagers modified the patch for newer kvms badly. Debian kvm git repo's commit:
[ kvm-Bugs-1831632 ] Windows XP setup Performance regression
Bugs item #1831632, was opened at 2007-11-14 08:46 Message generated for change (Comment added) made by sf-robot You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=1831632group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Closed Resolution: None Priority: 5 Private: No Submitted By: Technologov (technologov) Assigned to: Nobody/Anonymous (nobody) Summary: Windows XP setup Performance regression Initial Comment: Windows XP setup Performance drops slightly with nearly each KVM release. Below is the summary table: KVM-36 - ~14:00 KVM-37 - ~15:00 KVM-40 - ~15:00 KVM-42 - ~15:00 KVM-43 - ~16:00 KVM-44 - ~16:00 KVM-45 - ~16:00 KVM-48 - ~16:00 KVM-51 - ~18:00 KVM-52 - ~19:00 As of now, I don't know what causes that. -Alexey -- Comment By: SourceForge Robot (sf-robot) Date: 2009-08-25 02:20 Message: This Tracker item was closed automatically by the system. It was previously set to a Pending status, and the original submitter did not respond within 14 days (the time period specified by the administrator of this Tracker). -- Comment By: Avi Kivity (avik) Date: 2009-08-10 12:32 Message: How does it measure now? -- Comment By: Technologov (technologov) Date: 2007-11-14 08:48 Message: Logged In: YES user_id=1839746 Originator: YES Tests performed on Intel Core 2 - 2.0 GHz (Xeon 5130), Fedora 7 x86-64. -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=1831632group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-1929279 ] kernel BUG at kvm-64/kernel/mmu.c:560!
Bugs item #1929279, was opened at 2008-03-30 11:21 Message generated for change (Settings changed) made by sf-robot You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=1929279group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: kernel Group: None Status: Closed Resolution: None Priority: 5 Private: No Submitted By: buggaboo (buggaboo) Assigned to: Nobody/Anonymous (nobody) Summary: kernel BUG at kvm-64/kernel/mmu.c:560! Initial Comment: # what cpu model: AMD Athlon(tm) 64 X2 Dual Core Processor 4000+ # kvm version: kvm-64 # host kernel version: 2.6.24.4-x86_64-smp-tuxonice, vanilla + tuxonice # host kernel arch: x86_64 # guest: Windows XP sp2, bitness: 32 # qemu command line: vdeq qemu-system-x86_64 -m 512 -smp 2 \ -soundhw sb16,es1370 \ -usb -usbdevice tablet \ -net vde,vlan=0,sock=/var/run/vde.ctl \ -net nic,vlan=0,macaddr=52:54:00:00:AA:02 \ -hda vdisk-winxp-tooled-out1.img -smb virtual # -no-kvm switch: refuses to boot without kvm This kernel bug seems to be triggered everytime when I attempt to install http://downloads.sourceforge.net/andlinux/andlinux-beta1rc6-kde.exe also the guest hangs when I leave it alone after ~2 hours or so without giving any input. -- Comment By: SourceForge Robot (sf-robot) Date: 2009-08-25 02:20 Message: This Tracker item was closed automatically by the system. It was previously set to a Pending status, and the original submitter did not respond within 14 days (the time period specified by the administrator of this Tracker). -- Comment By: Avi Kivity (avik) Date: 2009-08-10 12:39 Message: How exactly do you install this andlinux? Note the bug may also be related to a hardware error. Can you try this on a different machine? -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=1929279group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: vhost net: performance with ping benchmark
Michael S. Tsirkin wrote: On Mon, Aug 24, 2009 at 11:12:41AM +0300, Michael S. Tsirkin wrote: At Rusty's suggestion, I tested vhost base performance with ping. Results below, and seem to be what you'd expect. Rusty, any chance you could look at the code? Is it in reasonable shape? I think it makes sense to merge it through you. What do you think? One comment on file placement: I put files under a separate vhost directory to avoid confusion with virtio-net which runs in guest. Does this sound sane? Also, can a minimal version (without TSO, tap or any other features) be merged upstream first so that features can be added later? Or do we have to wait until it's more full featured? Finally, can it reasonably make 2.6.32, or you think it needs more time out of tree? I think 2.6.32 is pushing it. I think some time is needed to flush out the userspace interface. In particular, I don't think Mark's comments have been adequately addressed. If a version were merged without GSO support, some mechanism to do feature detection would be needed in the userspace API. I think this is likely going to be needed regardless. I also think the tap compatibility suggestion would simplify the consumption of this in userspace. I'd like some time to look at get_state/set_state ioctl()s along with dirty tracking support. It's a much better model for live migration IMHO. I think so more thorough benchmarking would be good too. In particular, netperf/iperf runs would be nice. Regards, Anthony Liguori Thanks very much, -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
kvm-76 aborted on on ibm server
Using kvm-76 on 2.6.21mv kernel on ibm server x3650(64bit processor) target, I got the following error.Any suggestions.. unhandled vm exit: 0x8021 vcpu_id 0 rax b101 rbx rcx rdx rsi rdi rsp 0ff8 rbp r8 r9 r10 r11 r12 r13 r14 r15 rip fe6e rflags 00023002 cs 0100 (/ p 1 dpl 0 db 1 s 1 type b l 0 g 1 avl 0) ds 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0) es (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) ss 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0) fs (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) gs (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) tr 0148 (c04405c0/2067 p 1 dpl 0 db 0 s 0 type b l 0 g 0 avl 0) ldt (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0) gdt 5020/2cf idt 52f0/7ff cr0 8001003b cr2 0 cr3 1005000 cr4 2d4 cr8 0 efer 0 Aborted __ -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
KVM pvmmu: do not batch pte updates from interrupt context
Commit b8bcfe997e4 made paravirt pte updates synchronous in interrupt context. Unfortunately the KVM pv mmu code caches the lazy/nonlazy mode internally, so a pte update from interrupt context during a lazy mmu operation can be batched while it should be performed synchronously. https://bugzilla.redhat.com/show_bug.cgi?id=518022 Drop the internal mode variable and use paravirt_get_lazy_mode(), which returns the correct state. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index c664d51..63b0ec8 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -34,7 +34,6 @@ struct kvm_para_state { u8 mmu_queue[MMU_QUEUE_SIZE]; int mmu_queue_len; - enum paravirt_lazy_mode mode; }; static DEFINE_PER_CPU(struct kvm_para_state, para_state); @@ -77,7 +76,7 @@ static void kvm_deferred_mmu_op(void *buffer, int len) { struct kvm_para_state *state = kvm_para_state(); - if (state-mode != PARAVIRT_LAZY_MMU) { + if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) { kvm_mmu_op(buffer, len); return; } @@ -185,10 +184,7 @@ static void kvm_release_pt(unsigned long pfn) static void kvm_enter_lazy_mmu(void) { - struct kvm_para_state *state = kvm_para_state(); - paravirt_enter_lazy_mmu(); - state-mode = paravirt_get_lazy_mode(); } static void kvm_leave_lazy_mmu(void) @@ -197,7 +193,6 @@ static void kvm_leave_lazy_mmu(void) mmu_queue_flush(state); paravirt_leave_lazy_mmu(); - state-mode = paravirt_get_lazy_mode(); } static void __init paravirt_ops_setup(void) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: vhost net: performance with ping benchmark
On 08/25/2009 05:22 AM, Anthony Liguori wrote: I think 2.6.32 is pushing it. 2.6.32 is pushing it, but we need to push it. I think some time is needed to flush out the userspace interface. In particular, I don't think Mark's comments have been adequately addressed. If a version were merged without GSO support, some mechanism to do feature detection would be needed in the userspace API. I don't see any point in merging without gso (unless it beats userspace with gso, which I don't think will happen). In any case we'll need feature negotiation. I think this is likely going to be needed regardless. I also think the tap compatibility suggestion would simplify the consumption of this in userspace. What about veth pairs? I'd like some time to look at get_state/set_state ioctl()s along with dirty tracking support. It's a much better model for live migration IMHO. My preference is ring proxying. Not we'll need ring proxying (or at least event proxying) for non-MSI guests. I think so more thorough benchmarking would be good too. In particular, netperf/iperf runs would be nice. Definitely. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/2] eventfd: new EFD_STATE flag
On 08/25/2009 01:08 AM, Davide Libenzi wrote: Is that really difficult to understand where I'm standing, leaving the KVM hat off for a moment? I understand it perfectly. I take the same position with kvm. I'm providing more data in the hope that you'll change you mind, not trying to flood you with email so you'll give up. We can always create our eventfd-lookalike for kvm, but I'd rather not do that (other options include a userspace proxy through existing interfaces, it might even be better than changing eventfd if we decide performance for level-triggered interrupts is not critical). -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html