date:20090824

[COMMIT master] KVM: x86 emulator: Report unhandled instructions

2009-08-24 Thread Avi Kivity

From: Mohammed Gamal m.gamal...@gmail.com

Report unhandled instructions in the syslog on emulation failure

Signed-off-by: Mohammed Gamal m.gamal...@gmail.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 1f0ff4a..3d6a562 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2194,6 +2194,7 @@ writeback:
 
 done:
if (rc == X86EMUL_UNHANDLEABLE) {
+   kvm_report_emulation_failure(ctxt-vcpu, unhandled 
instruction);
c-eip = saved_eip;
return -1;
}
@@ -2467,7 +2468,7 @@ twobyte_insn:
goto writeback;
 
 cannot_emulate:
-   DPRINTF(Cannot emulate %02x\n, c-b);
+   kvm_report_emulation_failure(ctxt-vcpu, unhandled instruction);
c-eip = saved_eip;
return -1;
 }
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: x86 emulator: Introduce No64 decode option

2009-08-24 Thread Avi Kivity

From: Mohammed Gamal m.gamal...@gmail.com

Introduces a new decode option No64, which is used for instructions that are
invalid in long mode.

Signed-off-by: Mohammed Gamal m.gamal...@gmail.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 1cdfec5..1f0ff4a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -75,6 +75,8 @@
 #define Group   (114) /* Bits 3:5 of modrm byte extend opcode */
 #define GroupDual   (115) /* Alternate decoding of mod == 3 */
 #define GroupMask   0xff/* Group number stored in bits 0:7 */
+/* Misc flags */
+#define No64   (128)
 /* Source 2 operand type */
 #define Src2None(029)
 #define Src2CL  (129)
@@ -93,21 +95,21 @@ static u32 opcode_table[256] = {
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
-   ImplicitOps | Stack, ImplicitOps | Stack,
+   ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x08 - 0x0F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
-   0, 0, ImplicitOps | Stack, 0,
+   0, 0, ImplicitOps | Stack | No64, 0,
/* 0x10 - 0x17 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
-   ImplicitOps | Stack, ImplicitOps | Stack,
+   ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x18 - 0x1F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
-   ImplicitOps | Stack, ImplicitOps | Stack,
+   ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x20 - 0x27 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -161,7 +163,7 @@ static u32 opcode_table[256] = {
/* 0x90 - 0x97 */
DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
/* 0x98 - 0x9F */
-   0, 0, SrcImm | Src2Imm16, 0,
+   0, 0, SrcImm | Src2Imm16 | No64, 0,
ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
/* 0xA0 - 0xA7 */
ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
@@ -188,7 +190,7 @@ static u32 opcode_table[256] = {
ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
/* 0xC8 - 0xCF */
0, 0, 0, ImplicitOps | Stack,
-   ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps,
+   ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
/* 0xD0 - 0xD7 */
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
@@ -201,7 +203,7 @@ static u32 opcode_table[256] = {
ByteOp | SrcImmUByte, SrcImmUByte,
/* 0xE8 - 0xEF */
SrcImm | Stack, SrcImm | ImplicitOps,
-   SrcImmU | Src2Imm16, SrcImmByte | ImplicitOps,
+   SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
/* 0xF0 - 0xF7 */
@@ -967,6 +969,11 @@ done_prefixes:
}
}
 
+   if (mode == X86EMUL_MODE_PROT64  (c-d  No64)) {
+   kvm_report_emulation_failure(ctxt-vcpu, invalid x86/64 
instruction);;
+   return -1;
+   }
+
if (c-d  Group) {
group = c-d  GroupMask;
c-modrm = insn_fetch(u8, 1, c-eip);
@@ -1739,15 +1746,9 @@ special_insn:
emulate_2op_SrcV(add, c-src, c-dst, ctxt-eflags);
break;
case 0x06:  /* push es */
-   if (ctxt-mode == X86EMUL_MODE_PROT64)
-   goto cannot_emulate;
-
emulate_push_sreg(ctxt, VCPU_SREG_ES);
break;
case 0x07:  /* pop es */
-if (ctxt-mode == X86EMUL_MODE_PROT64)
-goto cannot_emulate;
-
rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
if (rc != 0)
goto done;
@@ -1757,9 +1758,6 @@ special_insn:
emulate_2op_SrcV(or, c-src, c-dst, ctxt-eflags);
break;
case 0x0e:  /* push cs */
-if (ctxt-mode == X86EMUL_MODE_PROT64)
-goto cannot_emulate;
-
emulate_push_sreg(ctxt, VCPU_SREG_CS);
break;
case 0x10 ... 0x15:
@@ -1767,15 +1765,9 @@ special_insn:
emulate_2op_SrcV(adc, c-src, c-dst, ctxt-eflags);

[COMMIT master] KVM: Don't pass kvm_run arguments

2009-08-24 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

They're just copies of vcpu-run, which is readily accessible.

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 33901be..b080590 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -509,8 +509,8 @@ struct kvm_x86_ops {
 
void (*tlb_flush)(struct kvm_vcpu *vcpu);
 
-   void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
-   int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
+   void (*run)(struct kvm_vcpu *vcpu);
+   int (*handle_exit)(struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
@@ -571,7 +571,7 @@ enum emulation_result {
 #define EMULTYPE_NO_DECODE (1  0)
 #define EMULTYPE_TRAP_UD   (1  1)
 #define EMULTYPE_SKIP  (1  2)
-int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
+int emulate_instruction(struct kvm_vcpu *vcpu,
unsigned long cr2, u16 error_code, int emulation_type);
 void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
@@ -588,9 +588,9 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 
data);
 
 struct x86_emulate_ctxt;
 
-int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
+int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in,
 int size, unsigned port);
-int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
+int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
   int size, unsigned long count, int down,
gva_t address, int rep, unsigned port);
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 3d6a562..15593e8 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1826,7 +1826,7 @@ special_insn:
break;
case 0x6c:  /* insb */
case 0x6d:  /* insw/insd */
-if (kvm_emulate_pio_string(ctxt-vcpu, NULL,
+if (kvm_emulate_pio_string(ctxt-vcpu,
1,
(c-d  ByteOp) ? 1 : c-op_bytes,
c-rep_prefix ?
@@ -1842,7 +1842,7 @@ special_insn:
return 0;
case 0x6e:  /* outsb */
case 0x6f:  /* outsw/outsd */
-   if (kvm_emulate_pio_string(ctxt-vcpu, NULL,
+   if (kvm_emulate_pio_string(ctxt-vcpu,
0,
(c-d  ByteOp) ? 1 : c-op_bytes,
c-rep_prefix ?
@@ -2135,7 +2135,7 @@ special_insn:
case 0xef: /* out (e/r)ax,dx */
port = c-regs[VCPU_REGS_RDX];
io_dir_in = 0;
-   do_io:  if (kvm_emulate_pio(ctxt-vcpu, NULL, io_dir_in,
+   do_io:  if (kvm_emulate_pio(ctxt-vcpu, io_dir_in,
   (c-d  ByteOp) ? 1 : c-op_bytes,
   port) != 0) {
c-eip = saved_eip;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 6f38178..ffd3c97 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2734,7 +2734,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, 
u32 error_code)
if (r)
goto out;
 
-   er = emulate_instruction(vcpu, vcpu-run, cr2, error_code, 0);
+   er = emulate_instruction(vcpu, cr2, error_code, 0);
 
switch (er) {
case EMULATE_DONE:
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7853dd3..2df9b45 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -286,7 +286,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
 
if (!svm-next_rip) {
-   if (emulate_instruction(vcpu, vcpu-run, 0, 0, EMULTYPE_SKIP) !=
+   if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) !=
EMULATE_DONE)
printk(KERN_DEBUG %s: NOP\n, __func__);
return;
@@ -1178,7 +1178,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, 
unsigned long value,
}
 }
 
-static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int pf_interception(struct vcpu_svm *svm)
 {
u64 fault_address;
u32 error_code;
@@ -1192,8 +1192,10 @@ static int pf_interception(struct vcpu_svm *svm, struct 
kvm_run *kvm_run)
return kvm_mmu_page_fault(svm-vcpu, fault_address, error_code);
 }
 
-static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+static int

Re: [PATCH][RESEND] x86 emulator: Add 'push/pop sreg' instructions

2009-08-24 Thread Avi Kivity


On 08/23/2009 02:24 PM, Mohammed Gamal wrote:

Signed-off-by: Mohammed Gamalm.gamal...@gmail.com
   


Applied, thanks.


+static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
+struct x86_emulate_ops *ops, int seg)
+{
+   struct decode_cache *c =ctxt-decode;
+   u16 selector;
+   int rc;
+
+   rc = emulate_pop(ctxt, ops,selector, c-op_bytes);
   


This overflows the stack.  I changed 'selector' to be unsigned long to 
fix this.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][RESEND] x86 emulator: Introduce No64 decode option

2009-08-24 Thread Avi Kivity


On 08/23/2009 02:24 PM, Mohammed Gamal wrote:

Introduces a new decode option No64, which is used for instructions that are
invalid in long mode.
   


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][RESEND] x86 emulator: Report unhandled instructions

2009-08-24 Thread Avi Kivity


On 08/23/2009 02:24 PM, Mohammed Gamal wrote:

Report unhandled instructions in the syslog on emulation failure
   


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][RESEND] Add push/pop instructions test in test harness

2009-08-24 Thread Avi Kivity


On 08/23/2009 02:24 PM, Mohammed Gamal wrote:

Signed-off-by: Mohammed Gamalm.gamal...@gmail.com
   



Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] VMX: Return to userspace on invalid state emulation failure

2009-08-24 Thread Avi Kivity


On 08/24/2009 07:07 AM, Mohammed Gamal wrote:

Return to userspace instead of repeatedly trying to emulate
instructions that have already failed

Signed-off-by: Mohammed Gamalm.gamal...@gmail.com
---
  arch/x86/kvm/vmx.c |5 -
  1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1ee811c..6030671 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3341,6 +3341,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu 
*vcpu,

if (err != EMULATE_DONE) {
kvm_report_emulation_failure(vcpu, emulation failure);
+   kvm_run-exit_reason = KVM_EXIT_INTERNAL_ERROR;
+   kvm_run-internal.suberror = 
KVM_INTERNAL_ERROR_EMULATION;
break;
}

@@ -3612,7 +3614,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
vmx-entry_time = ktime_get();

/* Handle invalid guest state instead of entering VMX */
-   if (vmx-emulation_required  emulate_invalid_guest_state) {
+   if (vmx-emulation_required  emulate_invalid_guest_state
+ kvm_run-internal.suberror != KVM_INTERNAL_ERROR_EMULATION) 
{
handle_invalid_guest_state(vcpu, kvm_run);
return;
}
   


kvm_run-internal.suberror is an uninitialized variable and can contain 
any value.  You need a different communication channel here.



--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ kvm-Bugs-2351676 ] Guests hang periodically on Ubuntu-8.10

2009-08-24 Thread SourceForge.net

Bugs item #2351676, was opened at 2008-11-26 19:59
Message generated for change (Comment added) made by z-image
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2351676group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Chris Jones (c_jones)
Assigned to: Nobody/Anonymous (nobody)
Summary: Guests hang periodically on Ubuntu-8.10

Initial Comment:
I'm seeing periodic hangs on my guests.  I've been unable so far to find a 
trigger - they always boot fine, but after anywhere from 10 minutes to 24 hours 
they eventually hang completely.

My setup:
  * AMD Athlon X2 4850e (2500 MHz dual core)
  * 4Gig memory
  * Ubuntu 8.10 server, 64-bit
  * KVMs tried:
: kvm-72 (shipped with ubuntu)
: kvm-79 (built myself, --patched-kernel option)
  * Kernels tried:
: 2.6.27.7 (kernel.org, self built)
: 2.6.27-7-server from Ubuntu 8.10 distribution

  In guests
  * Ubuntu 8.10 server, 64-bit (virtual machine install)
  * kernel 2.6.27-7-server from Ubuntu 8.10

I'm running the guests like:
  sudo /usr/local/bin/qemu-system-x86_64\
 -daemonize \
 -no-kvm-irqchip\
 -hda Imgs/ndev_root.img\
 -m 1024\
 -cdrom ISOs/ubuntu-8.10-server-amd64.iso   \
 -vnc :4\
 -net nic,macaddr=DE:AD:BE:EF:04:04,model=e1000 \
 -net tap,ifname=tap4,script=/home/chris/kvm/qemu-ifup.sh 

The problem does not happen if I use -no-kvm.

I've tried some other options that have no effect:
  -no-kvm-pit
  -no-acpi

The disk images are raw format.

When the guests hang, I cannot ping them, and the vnc console us hung.  The 
qemu monitor is still accessible, and the guests recover if I issue a 
system_reset command from the monitor.  However, often, the console will not 
take keyboard after doing so.

When the guest is hung, kvm_stat shows all 0s for the counters:

efer_relo  exits  fpu_reloa  halt_exit  halt_wake  host_stat  hypercall
+insn_emul  insn_emul invlpg   io_exits  irq_exits  irq_windo  largepage
+mmio_exit  mmu_cache  mmu_flood  mmu_pde_z  mmu_pte_u  mmu_pte_w  mmu_recyc
+mmu_shado  nmi_windo   pf_fixed   pf_guest  remote_tl  request_i  signal_ex
+tlb_flush
  0  0  0  0  0  0  0
+0  0  0  0  0  0  0  0
+0  0  0  0  0  0  0  0
+0  0  0  0  0  0

gdb shows two threads - both waiting:

c(gdb) info threads
  2 Thread 0x414f1950 (LWP 422)  0x7f36f07a03e1 in sigtimedwait ()
   from /lib/libc.so.6
  1 Thread 0x7f36f1f306e0 (LWP 414)  0x7f36f084b482 in select ()
   from /lib/libc.so.6
(gdb) thread 1
[Switching to thread 1 (Thread 0x7f36f1f306e0 (LWP 414))]#0  0x7f36f084b482
+in select () from /lib/libc.so.6
(gdb) bt
#0  0x7f36f084b482 in select () from /lib/libc.so.6
#1  0x004094cb in main_loop_wait (timeout=0)
at /home/chris/pkgs/kvm/kvm-79/qemu/vl.c:4719
#2  0x0050a7ea in kvm_main_loop ()
at /home/chris/pkgs/kvm/kvm-79/qemu/qemu-kvm.c:619
#3  0x0040fafc in main (argc=value optimized out,
argv=0x79f41948) at /home/chris/pkgs/kvm/kvm-79/qemu/vl.c:4871
(gdb) thread 2
[Switching to thread 2 (Thread 0x414f1950 (LWP 422))]#0  0x7f36f07a03e1 in
+sigtimedwait () from /lib/libc.so.6
(gdb) bt
#0  0x7f36f07a03e1 in sigtimedwait () from /lib/libc.so.6
#1  0x0050a560 in kvm_main_loop_wait (env=0xc319e0, timeout=0)
at /home/chris/pkgs/kvm/kvm-79/qemu/qemu-kvm.c:284
#2  0x0050aaf7 in ap_main_loop (_env=value optimized out)
at /home/chris/pkgs/kvm/kvm-79/qemu/qemu-kvm.c:425
#3  0x7f36f11ba3ea in start_thread () from /lib/libpthread.so.0
#4  0x7f36f0852c6d in clone () from /lib/libc.so.6
#5  0x in ?? ()


Any clues to help me resolve this would be much appreciated.


--

Comment By: Teodor Milkov (z-image)
Date: 2009-08-24 10:45

Message:
With 2.6.31-rc6 it is running fine for almost 72 hours. Looks like the
problem is gone in 2.6.31.

--

Comment By: Teodor Milkov (z-image)
Date: 2009-08-21 11:53

Message:
With -no-kvm-pit it is running fine for almost 20 hours. Didn't survive
that long without -no-kvm-pit.

--

Comment By: Daniel Poelzleithner (poelzi)
Date: 2009-08-20 18:20

Message:
I'm still in investigation but I got new informations so far. There seem to
be diffenerent issues that

vhost net: performance with ping benchmark

2009-08-24 Thread Michael S. Tsirkin

At Rusty's suggestion, I tested vhost base performance with ping.
Results below, and seem to be what you'd expect.  I'm working on TSO
support, expect results shortly.



latency with ping (lower is better):
native:
[r...@virtlab17 ~]# ping -c 100 -f -q  21.1.50.4
PING 21.1.50.4 (21.1.50.4) 56(84) bytes of data.

--- 21.1.50.4 ping statistics ---
100 packets transmitted, 100 received, 0% packet loss, time 73624ms
rtt min/avg/max/mdev = 0.047/0.061/1.253/0.036 ms, ipg/ewma 0.073/0.097 ms

vhost:
[r...@virtlab17 ~]# ping -c 100 -f -q  20.1.50.4  
PING 20.1.50.4 (20.1.50.4) 56(84) bytes of data.

--- 20.1.50.4 ping statistics ---
100 packets transmitted, 100 received, 0% packet loss, time 92308ms
rtt min/avg/max/mdev = 0.064/0.080/1.062/0.041 ms, ipg/ewma 0.092/0.083 ms

userspace:
[r...@virtlab17 ~]# ping -c 10 -f -q  20.1.50.4
PING 20.1.50.4 (20.1.50.4) 56(84) bytes of data.

--- 20.1.50.4 ping statistics ---
10 packets transmitted, 10 received, 0% packet loss, time 54473ms
rtt min/avg/max/mdev = 0.219/0.505/2.342/0.131 ms, ipg/ewma 0.544/0.485 ms


Conclusion: for latency difference between native and vhost is about
20usec, userspace is way slower.
This basically matches what was observed with venet.

##

throughput with ping (lower time is better):

native:
[r...@virtlab17 ~]# ping -s 1024 -l 120 -c 10 -f -q 20.1.50.2
PING 20.1.50.2 (20.1.50.2) 1024(1052) bytes of data.

--- 20.1.50.2 ping statistics ---
10 packets transmitted, 10 received, 0% packet loss, time 3582ms
rtt min/avg/max/mdev = 0.105/4.155/5.471/0.471 ms, pipe 120, ipg/ewma 
0.035/4.567 ms

vhost:
[r...@virtlab17 ~]# ping -s 1024 -l 120 -c 10 -f -q 20.1.50.4
PING 20.1.50.4 (20.1.50.4) 1024(1052) bytes of data.

--- 20.1.50.4 ping statistics ---
10 packets transmitted, 10 received, 0% packet loss, time 3900ms
rtt min/avg/max/mdev = 0.354/4.129/6.009/0.520 ms, pipe 120, ipg/ewma 
0.039/3.109 ms

userspace:

[r...@virtlab17 ~]# ping -s 1024 -l 120 -c 100 -f -q 20.1.50.4
PING 20.1.50.4 (20.1.50.4) 1024(1052) bytes of data.

--- 20.1.50.4 ping statistics ---
100 packets transmitted, 999731 received, 0% packet loss, time 45082ms
rtt min/avg/max/mdev = 0.299/4.130/8.143/1.094 ms, pipe 120, ipg/ewma 
0.045/1.117 ms

Conclusion: for throughput vhost is half-way between native and
userspace. Again, same thing as was observed with venet.

-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v4 0/9] make interrupt injection lockless (almost)

2009-08-24 Thread Gleb Natapov

kvm-irq_lock protects too much stuff, but still fail to protect
everything it was design to protect (see ack notifiers call in pic). I
want to make IRQ injection fast path as lockless as possible. This patch
series removes kvm-irq_lock from irq injection path effectively making
interrupt injection to lapic lockless (several kvm_irq_delivery_to_apic()
may run in parallel), but access to lapic was never fully locked in the
first place. VCPU could access lapic in parallel with interrupt injection.
Patches 2-3 changes irq routing data structure to much more efficient one.

v1-v2:
  Drop MSI injection interface (for now).
  Use irq_lock to protect irq routing and ack notifiers.
  Splitting irq routing table changes to two patches (+ comments
  addressed).
  Drop ioapic/pic lock before calling ack notifiers.
v2-v3
  Drop patch that changes irq_lock to spinlock.
  Use mutex for ioapic lock.
  Do not call ack notifier if there is no GSI mapping.
  Call pic_clear_isr() after PIC state completely changed.
v3-v4
  Add patch to move irq sharing information to irqchip level
  Do not remove call of ack notifiers on pic reset (yet).
  Call irq-set() function outside of RCU read section

Gleb Natapov (9):
  Call pic_clear_isr() on pic reset to reuse logic there.
  Move irq sharing information to irqchip level.
  Change irq routing table to use gsi indexed array.
  Maintain back mapping from irqchip/pin to gsi.
  Move irq routing data structure to rcu locking
  Move irq ack notifier list to arch independent code.
  Convert irq notifiers lists to RCU locking.
  Move IO APIC to its own lock.
  Drop kvm-irq_lock lock from irq injection path.

 arch/ia64/include/asm/kvm.h  |1 +
 arch/ia64/include/asm/kvm_host.h |1 -
 arch/ia64/kvm/kvm-ia64.c |9 +--
 arch/x86/include/asm/kvm.h   |1 +
 arch/x86/include/asm/kvm_host.h  |2 -
 arch/x86/kvm/i8254.c |2 -
 arch/x86/kvm/i8259.c |   44 +
 arch/x86/kvm/irq.h   |1 +
 arch/x86/kvm/lapic.c |7 +-
 arch/x86/kvm/x86.c   |   12 +--
 include/linux/kvm_host.h |   20 +++-
 virt/kvm/eventfd.c   |2 -
 virt/kvm/ioapic.c|   80 +++
 virt/kvm/ioapic.h|5 +
 virt/kvm/irq_comm.c  |  212 ++
 virt/kvm/kvm_main.c  |4 +-
 16 files changed, 239 insertions(+), 164 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v4 3/9] Change irq routing table to use gsi indexed array.

2009-08-24 Thread Gleb Natapov

Use gsi indexed array instead of scanning all entries on each interrupt
injection.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 include/linux/kvm_host.h |   16 +++--
 virt/kvm/irq_comm.c  |   88 +++--
 virt/kvm/kvm_main.c  |1 -
 3 files changed, 66 insertions(+), 39 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index beab24b..802c080 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -129,7 +129,17 @@ struct kvm_kernel_irq_routing_entry {
} irqchip;
struct msi_msg msi;
};
-   struct list_head link;
+   struct hlist_node link;
+};
+
+struct kvm_irq_routing_table {
+   struct kvm_kernel_irq_routing_entry *rt_entries;
+   u32 nr_rt_entries;
+   /*
+* Array indexed by gsi. Each entry contains list of irq chips
+* the gsi is connected to.
+*/
+   struct hlist_head map[0];
 };
 
 struct kvm {
@@ -167,7 +177,7 @@ struct kvm {
 
struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
-   struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
+   struct kvm_irq_routing_table *irq_routing;
struct hlist_head mask_notifier_list;
 #endif
 
@@ -396,7 +406,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic 
*ioapic,
   union kvm_ioapic_redirect_entry *entry,
   unsigned long *deliver_bitmask);
 #endif
-int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
   struct kvm_irq_ack_notifier *kian);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 11aa702..c9cfa70 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -144,10 +144,12 @@ static int kvm_set_msi(struct 
kvm_kernel_irq_routing_entry *e,
  *  = 0   Interrupt was coalesced (previous irq is still pending)
  *   0   Number of CPUs interrupt was delivered to
  */
-int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 {
struct kvm_kernel_irq_routing_entry *e;
int ret = -1;
+   struct kvm_irq_routing_table *irq_rt;
+   struct hlist_node *n;
 
trace_kvm_set_irq(irq, level, irq_source_id);
 
@@ -157,8 +159,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int 
irq, int level)
 * IOAPIC.  So set the bit in both. The guest will ignore
 * writes to the unused one.
 */
-   list_for_each_entry(e, kvm-irq_routing, link)
-   if (e-gsi == irq) {
+   irq_rt = kvm-irq_routing;
+   if (irq  irq_rt-nr_rt_entries)
+   hlist_for_each_entry(e, n, irq_rt-map[irq], link) {
int r = e-set(e, kvm, irq_source_id, level);
if (r  0)
continue;
@@ -170,20 +173,23 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int 
irq, int level)
 
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
-   struct kvm_kernel_irq_routing_entry *e;
struct kvm_irq_ack_notifier *kian;
struct hlist_node *n;
unsigned gsi = pin;
+   int i;
 
trace_kvm_ack_irq(irqchip, pin);
 
-   list_for_each_entry(e, kvm-irq_routing, link)
+   for (i = 0; i  kvm-irq_routing-nr_rt_entries; i++) {
+   struct kvm_kernel_irq_routing_entry *e;
+   e = kvm-irq_routing-rt_entries[i];
if (e-type == KVM_IRQ_ROUTING_IRQCHIP 
e-irqchip.irqchip == irqchip 
e-irqchip.pin == pin) {
gsi = e-gsi;
break;
}
+   }
 
hlist_for_each_entry(kian, n, kvm-arch.irq_ack_notifier_list, link)
if (kian-gsi == gsi)
@@ -278,26 +284,30 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, 
bool mask)
kimn-func(kimn, mask);
 }
 
-static void __kvm_free_irq_routing(struct list_head *irq_routing)
-{
-   struct kvm_kernel_irq_routing_entry *e, *n;
-
-   list_for_each_entry_safe(e, n, irq_routing, link)
-   kfree(e);
-}
-
 void kvm_free_irq_routing(struct kvm *kvm)
 {
mutex_lock(kvm-irq_lock);
-   __kvm_free_irq_routing(kvm-irq_routing);
+   kfree(kvm-irq_routing);
mutex_unlock(kvm-irq_lock);
 }
 
-static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+static int setup_routing_entry(struct kvm_irq_routing_table *rt,
+  struct kvm_kernel_irq_routing_entry *e,
   const struct kvm_irq_routing_entry *ue)
 {
int r = -EINVAL;
int

[PATCH v4 2/9] Move irq sharing information to irqchip level.

2009-08-24 Thread Gleb Natapov

This removes assumptions that max GSIs is smaller than number of pins.
Sharing is tracked on pin level not GSI level.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 arch/x86/include/asm/kvm_host.h |1 -
 arch/x86/kvm/irq.h  |1 +
 include/linux/kvm_host.h|2 +-
 virt/kvm/ioapic.h   |1 +
 virt/kvm/irq_comm.c |   57 +++---
 5 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 33901be..6b02f86 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -413,7 +413,6 @@ struct kvm_arch{
gpa_t ept_identity_map_addr;
 
unsigned long irq_sources_bitmap;
-   unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
u64 vm_init_tsc;
 };
 
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 7d6058a..c025a23 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -71,6 +71,7 @@ struct kvm_pic {
int output; /* intr from master PIC */
struct kvm_io_device dev;
void (*ack_notifier)(void *opaque, int irq);
+   unsigned long irq_states[16];
 };
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f814512..beab24b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -121,7 +121,7 @@ struct kvm_kernel_irq_routing_entry {
u32 gsi;
u32 type;
int (*set)(struct kvm_kernel_irq_routing_entry *e,
-   struct kvm *kvm, int level);
+  struct kvm *kvm, int irq_source_id, int level);
union {
struct {
unsigned irqchip;
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 7080b71..6e461ad 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -41,6 +41,7 @@ struct kvm_ioapic {
u32 irr;
u32 pad;
union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
+   unsigned long irq_states[IOAPIC_NUM_PINS];
struct kvm_io_device dev;
struct kvm *kvm;
void (*ack_notifier)(void *opaque, int irq);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 001663f..11aa702 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -31,20 +31,39 @@
 
 #include ioapic.h
 
+static inline int kvm_irq_line_state(unsigned long *irq_state,
+int irq_source_id, int level)
+{
+   /* Logical OR for level trig interrupt */
+   if (level)
+   set_bit(irq_source_id, irq_state);
+   else
+   clear_bit(irq_source_id, irq_state);
+
+   return !!(*irq_state);
+}
+
 static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
-  struct kvm *kvm, int level)
+  struct kvm *kvm, int irq_source_id, int level)
 {
 #ifdef CONFIG_X86
-   return kvm_pic_set_irq(pic_irqchip(kvm), e-irqchip.pin, level);
+   struct kvm_pic *pic = pic_irqchip(kvm);
+   level = kvm_irq_line_state(pic-irq_states[e-irqchip.pin],
+  irq_source_id, level);
+   return kvm_pic_set_irq(pic, e-irqchip.pin, level);
 #else
return -1;
 #endif
 }
 
 static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
- struct kvm *kvm, int level)
+ struct kvm *kvm, int irq_source_id, int level)
 {
-   return kvm_ioapic_set_irq(kvm-arch.vioapic, e-irqchip.pin, level);
+   struct kvm_ioapic *ioapic = kvm-arch.vioapic;
+   level = kvm_irq_line_state(ioapic-irq_states[e-irqchip.pin],
+  irq_source_id, level);
+
+   return kvm_ioapic_set_irq(ioapic, e-irqchip.pin, level);
 }
 
 inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
@@ -96,10 +115,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
 }
 
 static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
-  struct kvm *kvm, int level)
+  struct kvm *kvm, int irq_source_id, int level)
 {
struct kvm_lapic_irq irq;
 
+   if (!level)
+   return -1;
+
trace_kvm_msi_set_irq(e-msi.address_lo, e-msi.data);
 
irq.dest_id = (e-msi.address_lo 
@@ -125,34 +147,19 @@ static int kvm_set_msi(struct 
kvm_kernel_irq_routing_entry *e,
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 {
struct kvm_kernel_irq_routing_entry *e;
-   unsigned long *irq_state, sig_level;
int ret = -1;
 
trace_kvm_set_irq(irq, level, irq_source_id);
 
WARN_ON(!mutex_is_locked(kvm-irq_lock));
 
-   if (irq  KVM_IOAPIC_NUM_PINS) {
-   irq_state = (unsigned long *)kvm-arch.irq_states[irq];
-
-   /* Logical OR for level trig interrupt */
-   if (level)
-

[PATCH v4 1/9] Call pic_clear_isr() on pic reset to reuse logic there.

2009-08-24 Thread Gleb Natapov

Also move call of ack notifiers after pic state change.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 arch/x86/kvm/i8259.c |   22 +-
 1 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 01f1516..ccc941a 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -225,22 +225,11 @@ int kvm_pic_read_irq(struct kvm *kvm)
 
 void kvm_pic_reset(struct kvm_kpic_state *s)
 {
-   int irq, irqbase, n;
+   int irq;
struct kvm *kvm = s-pics_state-irq_request_opaque;
struct kvm_vcpu *vcpu0 = kvm-bsp_vcpu;
+   u8 irr = s-irr, isr = s-imr;
 
-   if (s == s-pics_state-pics[0])
-   irqbase = 0;
-   else
-   irqbase = 8;
-
-   for (irq = 0; irq  PIC_NUM_PINS/2; irq++) {
-   if (vcpu0  kvm_apic_accept_pic_intr(vcpu0))
-   if (s-irr  (1  irq) || s-isr  (1  irq)) {
-   n = irq + irqbase;
-   kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
-   }
-   }
s-last_irr = 0;
s-irr = 0;
s-imr = 0;
@@ -256,6 +245,13 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
s-rotate_on_auto_eoi = 0;
s-special_fully_nested_mode = 0;
s-init4 = 0;
+
+   for (irq = 0; irq  PIC_NUM_PINS/2; irq++) {
+   if (vcpu0  kvm_apic_accept_pic_intr(vcpu0))
+   if (irr  (1  irq) || isr  (1  irq)) {
+   pic_clear_isr(s, irq);
+   }
+   }
 }
 
 static void pic_ioport_write(void *opaque, u32 addr, u32 val)
-- 
1.6.3.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v4 4/9] Maintain back mapping from irqchip/pin to gsi.

2009-08-24 Thread Gleb Natapov

Maintain back mapping from irqchip/pin to gsi to speedup
interrupt acknowledgment notifications.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 arch/ia64/include/asm/kvm.h |1 +
 arch/x86/include/asm/kvm.h  |1 +
 include/linux/kvm_host.h|1 +
 virt/kvm/irq_comm.c |   31 ++-
 4 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index 18a7e49..bc90c75 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -60,6 +60,7 @@ struct kvm_ioapic_state {
 #define KVM_IRQCHIP_PIC_MASTER   0
 #define KVM_IRQCHIP_PIC_SLAVE1
 #define KVM_IRQCHIP_IOAPIC   2
+#define KVM_NR_IRQCHIPS  3
 
 #define KVM_CONTEXT_SIZE   8*1024
 
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 4a5fe91..f02e87a 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -79,6 +79,7 @@ struct kvm_ioapic_state {
 #define KVM_IRQCHIP_PIC_MASTER   0
 #define KVM_IRQCHIP_PIC_SLAVE1
 #define KVM_IRQCHIP_IOAPIC   2
+#define KVM_NR_IRQCHIPS  3
 
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 802c080..00e4762 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -133,6 +133,7 @@ struct kvm_kernel_irq_routing_entry {
 };
 
 struct kvm_irq_routing_table {
+   int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS];
struct kvm_kernel_irq_routing_entry *rt_entries;
u32 nr_rt_entries;
/*
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index c9cfa70..6a8434d 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -175,25 +175,16 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned 
irqchip, unsigned pin)
 {
struct kvm_irq_ack_notifier *kian;
struct hlist_node *n;
-   unsigned gsi = pin;
-   int i;
+   int gsi;
 
trace_kvm_ack_irq(irqchip, pin);
 
-   for (i = 0; i  kvm-irq_routing-nr_rt_entries; i++) {
-   struct kvm_kernel_irq_routing_entry *e;
-   e = kvm-irq_routing-rt_entries[i];
-   if (e-type == KVM_IRQ_ROUTING_IRQCHIP 
-   e-irqchip.irqchip == irqchip 
-   e-irqchip.pin == pin) {
-   gsi = e-gsi;
-   break;
-   }
-   }
-
-   hlist_for_each_entry(kian, n, kvm-arch.irq_ack_notifier_list, link)
-   if (kian-gsi == gsi)
-   kian-irq_acked(kian);
+   gsi = kvm-irq_routing-chip[irqchip][pin];
+   if (gsi != -1)
+   hlist_for_each_entry(kian, n, kvm-arch.irq_ack_notifier_list,
+link)
+   if (kian-gsi == gsi)
+   kian-irq_acked(kian);
 }
 
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
@@ -330,6 +321,9 @@ static int setup_routing_entry(struct kvm_irq_routing_table 
*rt,
}
e-irqchip.irqchip = ue-u.irqchip.irqchip;
e-irqchip.pin = ue-u.irqchip.pin + delta;
+   if (e-irqchip.pin = KVM_IOAPIC_NUM_PINS)
+   goto out;
+   rt-chip[ue-u.irqchip.irqchip][e-irqchip.pin] = ue-gsi;
break;
case KVM_IRQ_ROUTING_MSI:
e-set = kvm_set_msi;
@@ -354,7 +348,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
unsigned flags)
 {
struct kvm_irq_routing_table *new, *old;
-   u32 i, nr_rt_entries = 0;
+   u32 i, j, nr_rt_entries = 0;
int r;
 
for (i = 0; i  nr; ++i) {
@@ -375,6 +369,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
new-rt_entries = (void *)new-map[nr_rt_entries];
 
new-nr_rt_entries = nr_rt_entries;
+   for (i = 0; i  3; i++)
+   for (j = 0; j  KVM_IOAPIC_NUM_PINS; j++)
+   new-chip[i][j] = -1;
 
for (i = 0; i  nr; ++i) {
r = -EINVAL;
-- 
1.6.3.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v4 5/9] Move irq routing data structure to rcu locking

2009-08-24 Thread Gleb Natapov


Signed-off-by: Gleb Natapov g...@redhat.com
---
 virt/kvm/irq_comm.c |   16 +++-
 1 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 6a8434d..8350050 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -159,7 +159,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 
irq, int level)
 * IOAPIC.  So set the bit in both. The guest will ignore
 * writes to the unused one.
 */
-   irq_rt = kvm-irq_routing;
+   rcu_read_lock();
+   irq_rt = rcu_dereference(kvm-irq_routing);
if (irq  irq_rt-nr_rt_entries)
hlist_for_each_entry(e, n, irq_rt-map[irq], link) {
int r = e-set(e, kvm, irq_source_id, level);
@@ -168,6 +169,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 
irq, int level)
 
ret = r + ((ret  0) ? 0 : ret);
}
+   rcu_read_unlock();
return ret;
 }
 
@@ -179,7 +181,10 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned 
irqchip, unsigned pin)
 
trace_kvm_ack_irq(irqchip, pin);
 
-   gsi = kvm-irq_routing-chip[irqchip][pin];
+   rcu_read_lock();
+   gsi = rcu_dereference(kvm-irq_routing)-chip[irqchip][pin];
+   rcu_read_unlock();
+
if (gsi != -1)
hlist_for_each_entry(kian, n, kvm-arch.irq_ack_notifier_list,
 link)
@@ -277,9 +282,9 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool 
mask)
 
 void kvm_free_irq_routing(struct kvm *kvm)
 {
-   mutex_lock(kvm-irq_lock);
+   /* Called only during vm destruction. Nobody can use the pointer
+  at this stage */
kfree(kvm-irq_routing);
-   mutex_unlock(kvm-irq_lock);
 }
 
 static int setup_routing_entry(struct kvm_irq_routing_table *rt,
@@ -385,8 +390,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
mutex_lock(kvm-irq_lock);
old = kvm-irq_routing;
-   kvm-irq_routing = new;
+   rcu_assign_pointer(kvm-irq_routing, new);
mutex_unlock(kvm-irq_lock);
+   synchronize_rcu();
 
new = old;
r = 0;
-- 
1.6.3.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v4 7/9] Convert irq notifiers lists to RCU locking.

2009-08-24 Thread Gleb Natapov

Use RCU locking for mask/ack notifiers lists.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 virt/kvm/irq_comm.c |   22 --
 1 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index d7393d6..71a5a43 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -183,19 +183,19 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned 
irqchip, unsigned pin)
 
rcu_read_lock();
gsi = rcu_dereference(kvm-irq_routing)-chip[irqchip][pin];
-   rcu_read_unlock();
-
if (gsi != -1)
-   hlist_for_each_entry(kian, n, kvm-irq_ack_notifier_list, link)
+   hlist_for_each_entry_rcu(kian, n, kvm-irq_ack_notifier_list,
+link)
if (kian-gsi == gsi)
kian-irq_acked(kian);
+   rcu_read_unlock();
 }
 
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
   struct kvm_irq_ack_notifier *kian)
 {
mutex_lock(kvm-irq_lock);
-   hlist_add_head(kian-link, kvm-irq_ack_notifier_list);
+   hlist_add_head_rcu(kian-link, kvm-irq_ack_notifier_list);
mutex_unlock(kvm-irq_lock);
 }
 
@@ -203,8 +203,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian)
 {
mutex_lock(kvm-irq_lock);
-   hlist_del_init(kian-link);
+   hlist_del_init_rcu(kian-link);
mutex_unlock(kvm-irq_lock);
+   synchronize_rcu();
 }
 
 int kvm_request_irq_source_id(struct kvm *kvm)
@@ -255,7 +256,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int 
irq,
 {
mutex_lock(kvm-irq_lock);
kimn-irq = irq;
-   hlist_add_head(kimn-link, kvm-mask_notifier_list);
+   hlist_add_head_rcu(kimn-link, kvm-mask_notifier_list);
mutex_unlock(kvm-irq_lock);
 }
 
@@ -263,8 +264,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int 
irq,
  struct kvm_irq_mask_notifier *kimn)
 {
mutex_lock(kvm-irq_lock);
-   hlist_del(kimn-link);
+   hlist_del_rcu(kimn-link);
mutex_unlock(kvm-irq_lock);
+   synchronize_rcu();
 }
 
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
@@ -272,11 +274,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, 
bool mask)
struct kvm_irq_mask_notifier *kimn;
struct hlist_node *n;
 
-   WARN_ON(!mutex_is_locked(kvm-irq_lock));
-
-   hlist_for_each_entry(kimn, n, kvm-mask_notifier_list, link)
+   rcu_read_lock();
+   hlist_for_each_entry_rcu(kimn, n, kvm-mask_notifier_list, link)
if (kimn-irq == irq)
kimn-func(kimn, mask);
+   rcu_read_unlock();
 }
 
 void kvm_free_irq_routing(struct kvm *kvm)
-- 
1.6.3.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v4 6/9] Move irq ack notifier list to arch independent code.

2009-08-24 Thread Gleb Natapov

Mask irq notifier list is already there.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 arch/ia64/include/asm/kvm_host.h |1 -
 arch/x86/include/asm/kvm_host.h  |1 -
 include/linux/kvm_host.h |1 +
 virt/kvm/irq_comm.c  |5 ++---
 virt/kvm/kvm_main.c  |1 +
 5 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index d9b6325..a362e67 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -475,7 +475,6 @@ struct kvm_arch {
struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain;
int iommu_flags;
-   struct hlist_head irq_ack_notifier_list;
 
unsigned long irq_sources_bitmap;
unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6b02f86..ee13379 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -400,7 +400,6 @@ struct kvm_arch{
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
-   struct hlist_head irq_ack_notifier_list;
int vapics_in_nmi_mode;
 
unsigned int tss_addr;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 00e4762..75cf6ee 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -180,6 +180,7 @@ struct kvm {
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
struct kvm_irq_routing_table *irq_routing;
struct hlist_head mask_notifier_list;
+   struct hlist_head irq_ack_notifier_list;
 #endif
 
 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 8350050..d7393d6 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -186,8 +186,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned 
irqchip, unsigned pin)
rcu_read_unlock();
 
if (gsi != -1)
-   hlist_for_each_entry(kian, n, kvm-arch.irq_ack_notifier_list,
-link)
+   hlist_for_each_entry(kian, n, kvm-irq_ack_notifier_list, link)
if (kian-gsi == gsi)
kian-irq_acked(kian);
 }
@@ -196,7 +195,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
   struct kvm_irq_ack_notifier *kian)
 {
mutex_lock(kvm-irq_lock);
-   hlist_add_head(kian-link, kvm-arch.irq_ack_notifier_list);
+   hlist_add_head(kian-link, kvm-irq_ack_notifier_list);
mutex_unlock(kvm-irq_lock);
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 50cc001..783fa7c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -945,6 +945,7 @@ static struct kvm *kvm_create_vm(void)
goto out;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
INIT_HLIST_HEAD(kvm-mask_notifier_list);
+   INIT_HLIST_HEAD(kvm-irq_ack_notifier_list);
 #endif
 
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
-- 
1.6.3.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v4 8/9] Move IO APIC to its own lock.

2009-08-24 Thread Gleb Natapov


Signed-off-by: Gleb Natapov g...@redhat.com
---
 arch/ia64/kvm/kvm-ia64.c |7 +---
 arch/x86/kvm/i8259.c |   22 +---
 arch/x86/kvm/lapic.c |5 +--
 arch/x86/kvm/x86.c   |   10 +
 virt/kvm/ioapic.c|   80 +++---
 virt/kvm/ioapic.h|4 ++
 virt/kvm/irq_comm.c  |   23 -
 7 files changed, 100 insertions(+), 51 deletions(-)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 0ad09f0..4a98314 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -851,8 +851,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
r = 0;
switch (chip-chip_id) {
case KVM_IRQCHIP_IOAPIC:
-   memcpy(chip-chip.ioapic, ioapic_irqchip(kvm),
-   sizeof(struct kvm_ioapic_state));
+   r = kvm_get_ioapic(kvm, chip-chip.ioapic);
break;
default:
r = -EINVAL;
@@ -868,9 +867,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct 
kvm_irqchip *chip)
r = 0;
switch (chip-chip_id) {
case KVM_IRQCHIP_IOAPIC:
-   memcpy(ioapic_irqchip(kvm),
-   chip-chip.ioapic,
-   sizeof(struct kvm_ioapic_state));
+   r = kvm_set_ioapic(kvm, chip-chip.ioapic);
break;
default:
r = -EINVAL;
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index ccc941a..d057c0c 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -38,7 +38,15 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
s-isr_ack |= (1  irq);
if (s != s-pics_state-pics[0])
irq += 8;
+   /*
+* We are dropping lock while calling ack notifiers since ack
+* notifier callbacks for assigned devices call into PIC recursively.
+* Other interrupt may be delivered to PIC while lock is dropped but
+* it should be safe since PIC state is already updated at this stage.
+*/
+   spin_unlock(s-pics_state-lock);
kvm_notify_acked_irq(s-pics_state-kvm, SELECT_PIC(irq), irq);
+   spin_lock(s-pics_state-lock);
 }
 
 void kvm_pic_clear_isr_ack(struct kvm *kvm)
@@ -176,16 +184,18 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
 static inline void pic_intack(struct kvm_kpic_state *s, int irq)
 {
s-isr |= 1  irq;
-   if (s-auto_eoi) {
-   if (s-rotate_on_auto_eoi)
-   s-priority_add = (irq + 1)  7;
-   pic_clear_isr(s, irq);
-   }
/*
 * We don't clear a level sensitive interrupt here
 */
if (!(s-elcr  (1  irq)))
s-irr = ~(1  irq);
+
+   if (s-auto_eoi) {
+   if (s-rotate_on_auto_eoi)
+   s-priority_add = (irq + 1)  7;
+   pic_clear_isr(s, irq);
+   }
+
 }
 
 int kvm_pic_read_irq(struct kvm *kvm)
@@ -294,9 +304,9 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 
val)
priority = get_priority(s, s-isr);
if (priority != 8) {
irq = (priority + s-priority_add)  7;
-   pic_clear_isr(s, irq);
if (cmd == 5)
s-priority_add = (irq + 1)  7;
+   pic_clear_isr(s, irq);
pic_update_irq(s-pics_state);
}
break;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ce195f8..f24d4d0 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -471,11 +471,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
trigger_mode = IOAPIC_LEVEL_TRIG;
else
trigger_mode = IOAPIC_EDGE_TRIG;
-   if (!(apic_get_reg(apic, APIC_SPIV)  APIC_SPIV_DIRECTED_EOI)) {
-   mutex_lock(apic-vcpu-kvm-irq_lock);
+   if (!(apic_get_reg(apic, APIC_SPIV)  APIC_SPIV_DIRECTED_EOI))
kvm_ioapic_update_eoi(apic-vcpu-kvm, vector, trigger_mode);
-   mutex_unlock(apic-vcpu-kvm-irq_lock);
-   }
 }
 
 static void apic_send_ipi(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0f22f72..f49b2a1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2023,9 +2023,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, 
struct kvm_irqchip *chip)
sizeof(struct kvm_pic_state));
break;
case KVM_IRQCHIP_IOAPIC:
-   memcpy(chip-chip.ioapic,
-   ioapic_irqchip(kvm),
-   sizeof(struct kvm_ioapic_state));
+   r = kvm_get_ioapic(kvm, chip-chip.ioapic);
break;
default:

[PATCH v4 9/9] Drop kvm-irq_lock lock from irq injection path.

2009-08-24 Thread Gleb Natapov

The only thing it protects now is interrupt injection into lapic and
this can work lockless. Even now with kvm-irq_lock in place access
to lapic is not entirely serialized since vcpu access doesn't take
kvm-irq_lock.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 arch/ia64/kvm/kvm-ia64.c |2 --
 arch/x86/kvm/i8254.c |2 --
 arch/x86/kvm/lapic.c |2 --
 arch/x86/kvm/x86.c   |2 --
 virt/kvm/eventfd.c   |2 --
 virt/kvm/irq_comm.c  |6 +-
 virt/kvm/kvm_main.c  |2 --
 7 files changed, 1 insertions(+), 17 deletions(-)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 4a98314..f534e0f 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -982,10 +982,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
if (irqchip_in_kernel(kvm)) {
__s32 status;
-   mutex_lock(kvm-irq_lock);
status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
irq_event.irq, irq_event.level);
-   mutex_unlock(kvm-irq_lock);
if (ioctl == KVM_IRQ_LINE_STATUS) {
irq_event.status = status;
if (copy_to_user(argp, irq_event,
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 82ad523..b857ca3 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -688,10 +688,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
struct kvm_vcpu *vcpu;
int i;
 
-   mutex_lock(kvm-irq_lock);
kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 1);
kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 0);
-   mutex_unlock(kvm-irq_lock);
 
/*
 * Provides NMI watchdog support via Virtual Wire mode.
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index f24d4d0..e41e948 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -501,9 +501,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
   irq.vector);
 
-   mutex_lock(apic-vcpu-kvm-irq_lock);
kvm_irq_delivery_to_apic(apic-vcpu-kvm, apic, irq);
-   mutex_unlock(apic-vcpu-kvm-irq_lock);
 }
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f49b2a1..fe68745 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2268,10 +2268,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
if (irqchip_in_kernel(kvm)) {
__s32 status;
-   mutex_lock(kvm-irq_lock);
status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
irq_event.irq, irq_event.level);
-   mutex_unlock(kvm-irq_lock);
if (ioctl == KVM_IRQ_LINE_STATUS) {
irq_event.status = status;
if (copy_to_user(argp, irq_event,
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 99017e8..95954ad 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -61,10 +61,8 @@ irqfd_inject(struct work_struct *work)
struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
struct kvm *kvm = irqfd-kvm;
 
-   mutex_lock(kvm-irq_lock);
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 1);
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0);
-   mutex_unlock(kvm-irq_lock);
 }
 
 /*
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 036a431..47c0194 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -82,8 +82,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
int i, r = -1;
struct kvm_vcpu *vcpu, *lowest = NULL;
 
-   WARN_ON(!mutex_is_locked(kvm-irq_lock));
-
if (irq-dest_mode == 0  irq-dest_id == 0xff 
kvm_is_dm_lowest_prio(irq))
printk(KERN_INFO kvm: apic: phys broadcast and lowest prio\n);
@@ -138,7 +136,7 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry 
*e,
return kvm_irq_delivery_to_apic(kvm, NULL, irq);
 }
 
-/* This should be called with the kvm-irq_lock mutex held
+/*
  * Return value:
  *   0   Interrupt was ignored (masked or not delivered for other reasons)
  *  = 0   Interrupt was coalesced (previous irq is still pending)
@@ -153,8 +151,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 
irq, int level)
 
trace_kvm_set_irq(irq, level, irq_source_id);
 
-   WARN_ON(!mutex_is_locked(kvm-irq_lock));
-
/* Not possible to detect if the guest uses the PIC or the
 * IOAPIC.  So set the bit in both. The guest will ignore
 * writes to the unused one.
diff --git a/virt/kvm/kvm_main.c

[PATCH] VMX: Return to userspace on invalid state emulation failure

2009-08-24 Thread Mohammed Gamal

Return to userspace instead of repeatedly trying to emulate
instructions that have already failed

Signed-off-by: Mohammed Gamal m.gamal...@gmail.com
---
 arch/x86/kvm/vmx.c |6 +-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1ee811c..423e44f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3341,6 +3341,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu 
*vcpu,
 
if (err != EMULATE_DONE) {
kvm_report_emulation_failure(vcpu, emulation failure);
+   kvm_run-exit_reason = KVM_EXIT_INTERNAL_ERROR;
+   kvm_run-internal.suberror = 
KVM_INTERNAL_ERROR_EMULATION;
break;
}
 
@@ -3612,7 +3614,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
vmx-entry_time = ktime_get();
 
/* Handle invalid guest state instead of entering VMX */
-   if (vmx-emulation_required  emulate_invalid_guest_state) {
+   if (vmx-emulation_required  emulate_invalid_guest_state
+!(kvm_run-exit_reason == KVM_EXIT_INTERNAL_ERROR  
+ kvm_run-internal.suberror == KVM_INTERNAL_ERROR_EMULATION)) {
handle_invalid_guest_state(vcpu, kvm_run);
return;
}
-- 
1.6.0.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH][RFC] Use return value from kvm_set_irq() to re-inject PIT interrupts.

2009-08-24 Thread Gleb Natapov

Use return value from kvm_set_irq() to track coalesced PIT interrupts
instead of ack/mask notifiers.

Signed-off-by: Gleb Natapov g...@redhat.com
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index b857ca3..0b63991 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -231,20 +231,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu)
 {
struct kvm_pit *pit = vcpu-kvm-arch.vpit;
 
-   if (pit  kvm_vcpu_is_bsp(vcpu)  pit-pit_state.irq_ack)
-   return atomic_read(pit-pit_state.pit_timer.pending);
-   return 0;
-}
-
-static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
-{
-   struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
-irq_ack_notifier);
-   spin_lock(ps-inject_lock);
-   if (atomic_dec_return(ps-pit_timer.pending)  0)
-   atomic_inc(ps-pit_timer.pending);
-   ps-irq_ack = 1;
-   spin_unlock(ps-inject_lock);
+   return atomic_read(pit-pit_state.pit_timer.pending);
 }
 
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
@@ -297,7 +284,6 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 
val, int is_period)
pt-vcpu = pt-kvm-bsp_vcpu;
 
atomic_set(pt-pending, 0);
-   ps-irq_ack = 1;
 
hrtimer_start(pt-timer, ktime_add_ns(ktime_get(), interval),
  HRTIMER_MODE_ABS);
@@ -577,17 +563,6 @@ void kvm_pit_reset(struct kvm_pit *pit)
mutex_unlock(pit-pit_state.lock);
 
atomic_set(pit-pit_state.pit_timer.pending, 0);
-   pit-pit_state.irq_ack = 1;
-}
-
-static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
-{
-   struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
-
-   if (!mask) {
-   atomic_set(pit-pit_state.pit_timer.pending, 0);
-   pit-pit_state.irq_ack = 1;
-   }
 }
 
 static const struct kvm_io_device_ops pit_dev_ops = {
@@ -619,7 +594,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 
mutex_init(pit-pit_state.lock);
mutex_lock(pit-pit_state.lock);
-   spin_lock_init(pit-pit_state.inject_lock);
 
kvm-arch.vpit = pit;
pit-kvm = kvm;
@@ -628,17 +602,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
pit_state-pit = pit;
hrtimer_init(pit_state-pit_timer.timer,
 CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-   pit_state-irq_ack_notifier.gsi = 0;
-   pit_state-irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
-   kvm_register_irq_ack_notifier(kvm, pit_state-irq_ack_notifier);
pit_state-pit_timer.reinject = true;
mutex_unlock(pit-pit_state.lock);
 
kvm_pit_reset(pit);
 
-   pit-mask_notifier.func = pit_mask_notifer;
-   kvm_register_irq_mask_notifier(kvm, 0, pit-mask_notifier);
-
kvm_iodevice_init(pit-dev, pit_dev_ops);
ret = __kvm_io_bus_register_dev(kvm-pio_bus, pit-dev);
if (ret  0)
@@ -670,10 +638,6 @@ void kvm_free_pit(struct kvm *kvm)
struct hrtimer *timer;
 
if (kvm-arch.vpit) {
-   kvm_unregister_irq_mask_notifier(kvm, 0,
-  kvm-arch.vpit-mask_notifier);
-   kvm_unregister_irq_ack_notifier(kvm,
-   kvm-arch.vpit-pit_state.irq_ack_notifier);
mutex_lock(kvm-arch.vpit-pit_state.lock);
timer = kvm-arch.vpit-pit_state.pit_timer.timer;
hrtimer_cancel(timer);
@@ -683,12 +647,12 @@ void kvm_free_pit(struct kvm *kvm)
}
 }
 
-static void __inject_pit_timer_intr(struct kvm *kvm)
+static int __inject_pit_timer_intr(struct kvm *kvm)
 {
struct kvm_vcpu *vcpu;
-   int i;
+   int i, r;
 
-   kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 1);
+   r = kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 1);
kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 0);
 
/*
@@ -703,6 +667,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
if (kvm-arch.vapics_in_nmi_mode  0)
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_apic_nmi_wd_deliver(vcpu);
+
+   return r;
 }
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
@@ -711,20 +677,14 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
struct kvm *kvm = vcpu-kvm;
struct kvm_kpit_state *ps;
 
-   if (pit) {
-   int inject = 0;
-   ps = pit-pit_state;
-
-   /* Try to inject pending interrupts when
-* last one has been acked.
-*/
-   spin_lock(ps-inject_lock);
-   if (atomic_read(ps-pit_timer.pending)  ps-irq_ack) {
-   ps-irq_ack = 0;
-   inject = 1;
-   }
-   spin_unlock(ps-inject_lock);
-   if (inject)
-   __inject_pit_timer_intr(kvm);
-   }
+

Re: [PATCH] Fix sysenter migration issue on AMD CPUs

2009-08-24 Thread Thomas Besser

Andre Przywara wrote:
 Stephane, Thomas: Can you verify this?

I'm not very familiar with compiling kvm-mod from git sources. And your
patch does not apply to svm.c shipped with kernel 2.6.30.5

So at the moment I have no clue, how to verify. Is there any short howto out
there, how to get kvm module from git source?

Regards
Thomas


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Fix sysenter migration issue on AMD CPUs

2009-08-24 Thread Andre Przywara


Thomas Besser wrote:

Andre Przywara wrote:

Stephane, Thomas: Can you verify this?


I'm not very familiar with compiling kvm-mod from git sources. And your
patch does not apply to svm.c shipped with kernel 2.6.30.5
You shouldn't have seen any problems with 2.6.30.5, since the code in 
question (sysenter/syscall emulation) is not in here.




So at the moment I have no clue, how to verify. Is there any short howto out
there, how to get kvm module from git source?
You can use the attached patch, which applies against 
kvm-kmod-devel-88.tar.gz


If that does not help, tell me with what tree or tarball you are usually 
generating the KVM kernel modules. I use Avi's latest git tree, which is 
regularly synced with 2.6.31.rcx. The patch from Friday should apply 
against this one.

$ git clone git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm.git
You will have to build your whole kernel with this tree, if you build 
only the modules from here they will certainly mismatch your running kernel.


Regards,
Andre.

--
Andre Przywara
AMD-Operating System Research Center (OSRC), Dresden, Germany
Tel: +49 351 448 3567 12
to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Karl-Hammerschmidt-Str. 34, 85609 Dornach b. Muenchen
Geschaeftsfuehrer: Thomas M. McCoy; Giuliano Meroni
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632
diff --git a/x86/svm.c b/x86/svm.c
index fb29061..75d18bf 100644
--- a/x86/svm.c
+++ b/x86/svm.c
@@ -117,7 +117,6 @@ struct vcpu_svm {
 	unsigned long vmcb_pa;
 	struct svm_cpu_data *svm_data;
 	uint64_t asid_generation;
-	uint64_t sysenter_cs;
 	uint64_t sysenter_esp;
 	uint64_t sysenter_eip;
 
@@ -436,8 +435,6 @@ static void svm_vcpu_init_msrpm(u32 *msrpm)
 #endif
 	set_msr_interception(msrpm, MSR_K6_STAR, 1, 1);
 	set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1);
-	set_msr_interception(msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
-	set_msr_interception(msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
 }
 
 static void svm_enable_lbrv(struct vcpu_svm *svm)
@@ -2062,7 +2059,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 		break;
 #endif
 	case MSR_IA32_SYSENTER_CS:
-		*data = svm-sysenter_cs;
+		*data = svm-vmcb-save.sysenter_cs;
 		break;
 	case MSR_IA32_SYSENTER_EIP:
 		*data = svm-sysenter_eip;
@@ -2151,13 +2148,15 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 		break;
 #endif
 	case MSR_IA32_SYSENTER_CS:
-		svm-sysenter_cs = data;
+		svm-vmcb-save.sysenter_cs = data;
 		break;
 	case MSR_IA32_SYSENTER_EIP:
 		svm-sysenter_eip = data;
+		svm-vmcb-save.sysenter_eip = data;
 		break;
 	case MSR_IA32_SYSENTER_ESP:
 		svm-sysenter_esp = data;
+		svm-vmcb-save.sysenter_esp = data;
 		break;
 	case MSR_IA32_DEBUGCTLMSR:
 		if (!svm_has(SVM_FEATURE_LBRV)) {

Re: [PATCH] Fix sysenter migration issue on AMD CPUs

2009-08-24 Thread Thomas Besser

Andre Przywara wrote:

 Thomas Besser wrote:
 Andre Przywara wrote:
 Stephane, Thomas: Can you verify this?
 
 I'm not very familiar with compiling kvm-mod from git sources. And your
 patch does not apply to svm.c shipped with kernel 2.6.30.5
 You shouldn't have seen any problems with 2.6.30.5, since the code in
 question (sysenter/syscall emulation) is not in here.

Both hosts running 2.6.30.5 with kvm as module from kernel source. So I
should have another problem with live migration and qemu-kvm (0.10.6). 

Probably this http://article.gmane.org/gmane.comp.emulators.kvm.devel/39185

Thanx
Thomas

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[no subject]

2009-08-24 Thread Igor Trindade Oliveira

subscribe kvm


  

Veja quais são os assuntos do momento no Yahoo! +Buscados
http://br.maisbuscados.yahoo.com

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: qemu-kvm segfaults in qemu_del_timer (0.10.5 and 0.10.6)

2009-08-24 Thread Chris Webb

Chris Webb ch...@arachsys.com writes:

 With the following applied, VNC connections and disconnections still work
 correctly, so it doesn't horribly break anything, but I can't immediately
 confirm whether it will cure the rare segfaults as I haven't yet found a
 rapid way of reproducing the crashes other than by waiting for one.

Just to follow up on this: the backported patch has cured the vast majority of
VNC crashes we've been seeing on 0.10.6, although I've still seen this earlier
today:

Core was generated by `qemu-kvm -m 512 -smp 1 -uuid 
d6f2cb13-7421-4baa-a978-eda9bec9d075 -pidfile /var'.
Program terminated with signal 11, Segmentation fault.
[New process 16847]
[New process 16855]
(gdb) bt
#0  0x7fe42e9c6cb1 in memcpy () from /lib/libc.so.6
#1  0x004917e4 in vnc_write (vs=0x31a7f50, data=0x7fffe3a19230, len=2) 
at vnc.c:323
#2  0x004919bf in vnc_write_u16 (vs=0x7fe2f8cae023, value=value 
optimized out) at vnc.c:1035
#3  0x00491bf3 in vnc_framebuffer_update (vs=0x7fe2f8cae023, 
x=-475950544, y=2, w=16385, h=1, encoding=6)
at vnc.c:286
#4  0x00496660 in send_framebuffer_update (vs=0x7fe2f8cae023, 
x=-475950544, y=196, w=208, h=1) at vnc.c:598
#5  0x00496f65 in vnc_update_client (opaque=value optimized out) at 
vnc.c:754
#6  0x0040822a in main_loop_wait (timeout=value optimized out)
at /packages/qemu-kvm+vncfix/src-nUlCId/vl.c:1240
#7  0x0051753a in kvm_main_loop () at 
/packages/qemu-kvm+vncfix/src-nUlCId/qemu-kvm.c:596
#8  0x0040c8a5 in main (argc=value optimized out, argv=value 
optimized out, envp=value optimized out)
at /packages/qemu-kvm+vncfix/src-nUlCId/vl.c:3850
(gdb) f 1
#1  0x004917e4 in vnc_write (vs=0x31a7f50, data=0x7fffe3a19230, len=2) 
at vnc.c:323
323 memcpy(buffer-buffer + buffer-offset, data, len);
(gdb) f 1
#1  0x004917e4 in vnc_write (vs=0x31a7f50, data=0x7fffe3a19230, len=2) 
at vnc.c:323
323 memcpy(buffer-buffer + buffer-offset, data, len);
(gdb) p *vs
$1 = {timer = 0x2b90b20, csock = 18, ds = 0x28a1a20, vd = 0x28b0fc0, 
need_update = 1, dirty_row = {{0, 0, 0, 
  0} repeats 197 times, {65535, 262128, 0, 0}, {4294967295, 1, 0, 0}, 
{4294967288, 262143, 0, 0}, {4294443008, 
  262143, 0, 0}, {131071, 262128, 0, 0}, {4294967295, 1, 0, 0}, 
{4294967292, 262143, 0, 0}, {4294443008, 262143, 
  0, 0}, {131071, 262136, 0, 0}, {4294967295, 1, 0, 0}, {4294967292, 
262143, 0, 0}, {4294443008, 262143, 0, 0}, {
  131071, 262136, 0, 0}, {4294967295, 1, 0, 0}, {4294967292, 262143, 0, 0}, 
{4294705152, 262143, 0, 0}, {131071, 
  262136, 0, 0}, {4294967295, 1, 0, 0}, {4294967294, 262143, 0, 0}, 
{4294705152, 262143, 0, 0}, {131071, 262140, 
  0, 0}, {4294967295, 1, 0, 0}, {4294967294, 262143, 0, 0}, {4294836224, 
262143, 0, 0}, {131071, 262140, 0, 0}, {
  4294967295, 1, 0, 0}, {4294967294, 262143, 0, 0}, {4294836224, 262143, 0, 
0}, {131071, 262140, 0, 0}, {
  4294967295, 1, 0, 0}, {4294967295, 262143, 0, 0}, {4294836224, 262143, 0, 
0}, {131071, 262142, 0, 0}, {
  4294967295, 1, 0, 0}, {4294967295, 262143, 0, 0}, {4294901760, 262143, 0, 
0}, {131071, 262142, 0, 0}, {
  4294967295, 1, 0, 0}, {4294967295, 262143, 0, 0}, {4294901760, 262143, 0, 
0}, {131071, 262142, 0, 0}, {
  4294967295, 131073, 0, 0}, {4294967295, 262143, 0, 0}, {4294901760, 
262143, 0, 0}, {131071, 262143, 0, 0}, {
  4294967295, 131073, 0, 0}, {4294967295, 262143, 0, 0}, {4294934528, 
262143, 0, 0}, {131071, 262143, 0, 0}, {
  4294967295, 131075, 0, 0}, {4294967295, 262143, 0, 0}, {4294934528, 
262143, 0, 0}, {131071, 262143, 0, 0}, {
  4294967295, 196611, 0, 0}, {4294967295, 262143, 0, 0}, {4294934528, 
262143, 0, 0}, {2147614719, 262143, 0, 0}, {
  4294967295, 196611, 0, 0}, {4294967295, 262143, 0, 0}, {4294950912, 
262143, 0, 0}, {2147614719, 262143, 0, 0}, {
  4294967295, 196611, 0, 0}, {4294967295, 262143, 0, 0}, {4294950912, 
262143, 0, 0}, {2147614719, 262143, 0, 0}, {
  4294967295, 229379, 0, 0}, {4294967295, 262143, 0, 0}, {4294950912, 
262143, 0, 0}, {3221356543, 262143, 0, 0}, {
  4294967295, 229379, 0, 0}, {4294967295, 262143, 0, 0}, {4294950912, 
262143, 0, 0}, {3221356543, 262143, 0, 0}, {
  4294967295, 229377, 0, 0}, {4294967295, 262143, 0, 0}, {4294959104, 
262143, 0, 0}, {3221356543, 262143, 0, 0}, {
  4294967295, 245761, 0, 0}, {4294967295, 262143, 0, 0}, {4294959104, 
262143, 0, 0}, {3758227455, 262143, 0, 0}, {
  4294967295, 245761, 0, 0}, {4294967295, 262143, 0, 0}, {4294959104, 
262143, 0, 0}, {3758227455, 262143, 0, 0}, {
  4294967295, 245761, 0, 0}, {4294967295, 262143, 0, 0}, {4294963200, 
262143, 0, 0}, {3758227455, 262143, 0, 0}, {
  4294967295, 253953, 0, 0}, {4294967295, 262143, 0, 0}, {4294963200, 
262143, 0, 0}, {4026662911, 262143, 0, 0}, {
  4294967295, 253953, 0, 0}, {4294967295, 262143, 0, 0}, {4294963200, 
262143, 0, 0}, {4026662911, 262143, 0, 0}, {
  4294967295, 253953, 0, 0},

Re: [PATCH][RFC] Use return value from kvm_set_irq() to re-inject PIT interrupts.

2009-08-24 Thread Marcelo Tosatti

On Mon, Aug 24, 2009 at 03:06:23PM +0300, Gleb Natapov wrote:
 Use return value from kvm_set_irq() to track coalesced PIT interrupts
 instead of ack/mask notifiers.

Gleb,

What is the advantage of doing so?

Ack notifiers are asynchronous notifications. Using the return value
from kvm_set_irq implies that timer emulation is based on a tick
generating device on the host side.

What I mean is that the ack notifications are useful, since they are
asynchronous.

Supposing your goal is to get rid of ack notifiers, due to their burden 
in irqchip code?

 Signed-off-by: Gleb Natapov g...@redhat.com
 diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
 index b857ca3..0b63991 100644
 --- a/arch/x86/kvm/i8254.c
 +++ b/arch/x86/kvm/i8254.c
 @@ -231,20 +231,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu)
  {
   struct kvm_pit *pit = vcpu-kvm-arch.vpit;
  
 - if (pit  kvm_vcpu_is_bsp(vcpu)  pit-pit_state.irq_ack)
 - return atomic_read(pit-pit_state.pit_timer.pending);
 - return 0;
 -}
 -
 -static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
 -{
 - struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
 -  irq_ack_notifier);
 - spin_lock(ps-inject_lock);
 - if (atomic_dec_return(ps-pit_timer.pending)  0)
 - atomic_inc(ps-pit_timer.pending);
 - ps-irq_ack = 1;
 - spin_unlock(ps-inject_lock);
 + return atomic_read(pit-pit_state.pit_timer.pending);
  }
  
  void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
 @@ -297,7 +284,6 @@ static void create_pit_timer(struct kvm_kpit_state *ps, 
 u32 val, int is_period)
   pt-vcpu = pt-kvm-bsp_vcpu;
  
   atomic_set(pt-pending, 0);
 - ps-irq_ack = 1;
  
   hrtimer_start(pt-timer, ktime_add_ns(ktime_get(), interval),
 HRTIMER_MODE_ABS);
 @@ -577,17 +563,6 @@ void kvm_pit_reset(struct kvm_pit *pit)
   mutex_unlock(pit-pit_state.lock);
  
   atomic_set(pit-pit_state.pit_timer.pending, 0);
 - pit-pit_state.irq_ack = 1;
 -}
 -
 -static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
 -{
 - struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
 -
 - if (!mask) {
 - atomic_set(pit-pit_state.pit_timer.pending, 0);
 - pit-pit_state.irq_ack = 1;
 - }
  }
  
  static const struct kvm_io_device_ops pit_dev_ops = {
 @@ -619,7 +594,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
  
   mutex_init(pit-pit_state.lock);
   mutex_lock(pit-pit_state.lock);
 - spin_lock_init(pit-pit_state.inject_lock);
  
   kvm-arch.vpit = pit;
   pit-kvm = kvm;
 @@ -628,17 +602,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 
 flags)
   pit_state-pit = pit;
   hrtimer_init(pit_state-pit_timer.timer,
CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 - pit_state-irq_ack_notifier.gsi = 0;
 - pit_state-irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
 - kvm_register_irq_ack_notifier(kvm, pit_state-irq_ack_notifier);
   pit_state-pit_timer.reinject = true;
   mutex_unlock(pit-pit_state.lock);
  
   kvm_pit_reset(pit);
  
 - pit-mask_notifier.func = pit_mask_notifer;
 - kvm_register_irq_mask_notifier(kvm, 0, pit-mask_notifier);
 -
   kvm_iodevice_init(pit-dev, pit_dev_ops);
   ret = __kvm_io_bus_register_dev(kvm-pio_bus, pit-dev);
   if (ret  0)
 @@ -670,10 +638,6 @@ void kvm_free_pit(struct kvm *kvm)
   struct hrtimer *timer;
  
   if (kvm-arch.vpit) {
 - kvm_unregister_irq_mask_notifier(kvm, 0,
 -kvm-arch.vpit-mask_notifier);
 - kvm_unregister_irq_ack_notifier(kvm,
 - kvm-arch.vpit-pit_state.irq_ack_notifier);
   mutex_lock(kvm-arch.vpit-pit_state.lock);
   timer = kvm-arch.vpit-pit_state.pit_timer.timer;
   hrtimer_cancel(timer);
 @@ -683,12 +647,12 @@ void kvm_free_pit(struct kvm *kvm)
   }
  }
  
 -static void __inject_pit_timer_intr(struct kvm *kvm)
 +static int __inject_pit_timer_intr(struct kvm *kvm)
  {
   struct kvm_vcpu *vcpu;
 - int i;
 + int i, r;
  
 - kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 1);
 + r = kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 1);
   kvm_set_irq(kvm, kvm-arch.vpit-irq_source_id, 0, 0);
  
   /*
 @@ -703,6 +667,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
   if (kvm-arch.vapics_in_nmi_mode  0)
   kvm_for_each_vcpu(i, vcpu, kvm)
   kvm_apic_nmi_wd_deliver(vcpu);
 +
 + return r;
  }
  
  void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
 @@ -711,20 +677,14 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
   struct kvm *kvm = vcpu-kvm;
   struct kvm_kpit_state *ps;
  
 - if (pit) {
 - int inject = 0;
 - ps = pit-pit_state;
 -
 - /* Try to

Re: [PATCH][RFC] Use return value from kvm_set_irq() to re-inject PIT interrupts.

2009-08-24 Thread Gleb Natapov

On Mon, Aug 24, 2009 at 01:32:56PM -0300, Marcelo Tosatti wrote:
 On Mon, Aug 24, 2009 at 03:06:23PM +0300, Gleb Natapov wrote:
  Use return value from kvm_set_irq() to track coalesced PIT interrupts
  instead of ack/mask notifiers.
 
 Gleb,
 
 What is the advantage of doing so?
 
Current code very fragile and relies on hacks to work. Lets take calling
of ack notifiers on pic reset as an example. Why is it needed? It is
obviously wrong thing to do from assigned devices POV. Why ioapic calls
mask notifiers but pic doesn't?

Besides diffstat for the patch shows:
2 files changed, 16 insertions(+), 59 deletions(-)

43 lines less for the same functionality. Looks like clear win to me.

 Ack notifiers are asynchronous notifications. Using the return value
 from kvm_set_irq implies that timer emulation is based on a tick
 generating device on the host side.
No notification is needed in the first place. You know immediately
if injection fails or not. I don't see why using return value from
kvm_set_irq implies that timer emulation is based on a tick generating
device on the host side? What can you do with ack notifiers that can't
be done without?

 What I mean is that the ack notifications are useful, since they are
 asynchronous.
 
What I mean is that no notification is needed at all since result is
known immediately.

 Supposing your goal is to get rid of ack notifiers, due to their burden 
 in irqchip code?
 
Unfortunately to get rid of ack notifiers we need to get rid of assigned
devices. I will gladly do that, but I doubt Avi shares my enthusiasm.
The patch to remove mask notification already sits in my patch queue though.

  Signed-off-by: Gleb Natapov g...@redhat.com
  diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
  index b857ca3..0b63991 100644
  --- a/arch/x86/kvm/i8254.c
  +++ b/arch/x86/kvm/i8254.c
  @@ -231,20 +231,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu)
   {
  struct kvm_pit *pit = vcpu-kvm-arch.vpit;
   
  -   if (pit  kvm_vcpu_is_bsp(vcpu)  pit-pit_state.irq_ack)
  -   return atomic_read(pit-pit_state.pit_timer.pending);
  -   return 0;
  -}
  -
  -static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
  -{
  -   struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
  -irq_ack_notifier);
  -   spin_lock(ps-inject_lock);
  -   if (atomic_dec_return(ps-pit_timer.pending)  0)
  -   atomic_inc(ps-pit_timer.pending);
  -   ps-irq_ack = 1;
  -   spin_unlock(ps-inject_lock);
  +   return atomic_read(pit-pit_state.pit_timer.pending);
   }
   
   void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
  @@ -297,7 +284,6 @@ static void create_pit_timer(struct kvm_kpit_state *ps, 
  u32 val, int is_period)
  pt-vcpu = pt-kvm-bsp_vcpu;
   
  atomic_set(pt-pending, 0);
  -   ps-irq_ack = 1;
   
  hrtimer_start(pt-timer, ktime_add_ns(ktime_get(), interval),
HRTIMER_MODE_ABS);
  @@ -577,17 +563,6 @@ void kvm_pit_reset(struct kvm_pit *pit)
  mutex_unlock(pit-pit_state.lock);
   
  atomic_set(pit-pit_state.pit_timer.pending, 0);
  -   pit-pit_state.irq_ack = 1;
  -}
  -
  -static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
  -{
  -   struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
  -
  -   if (!mask) {
  -   atomic_set(pit-pit_state.pit_timer.pending, 0);
  -   pit-pit_state.irq_ack = 1;
  -   }
   }
   
   static const struct kvm_io_device_ops pit_dev_ops = {
  @@ -619,7 +594,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 
  flags)
   
  mutex_init(pit-pit_state.lock);
  mutex_lock(pit-pit_state.lock);
  -   spin_lock_init(pit-pit_state.inject_lock);
   
  kvm-arch.vpit = pit;
  pit-kvm = kvm;
  @@ -628,17 +602,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 
  flags)
  pit_state-pit = pit;
  hrtimer_init(pit_state-pit_timer.timer,
   CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  -   pit_state-irq_ack_notifier.gsi = 0;
  -   pit_state-irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
  -   kvm_register_irq_ack_notifier(kvm, pit_state-irq_ack_notifier);
  pit_state-pit_timer.reinject = true;
  mutex_unlock(pit-pit_state.lock);
   
  kvm_pit_reset(pit);
   
  -   pit-mask_notifier.func = pit_mask_notifer;
  -   kvm_register_irq_mask_notifier(kvm, 0, pit-mask_notifier);
  -
  kvm_iodevice_init(pit-dev, pit_dev_ops);
  ret = __kvm_io_bus_register_dev(kvm-pio_bus, pit-dev);
  if (ret  0)
  @@ -670,10 +638,6 @@ void kvm_free_pit(struct kvm *kvm)
  struct hrtimer *timer;
   
  if (kvm-arch.vpit) {
  -   kvm_unregister_irq_mask_notifier(kvm, 0,
  -  kvm-arch.vpit-mask_notifier);
  -   kvm_unregister_irq_ack_notifier(kvm,
  -   kvm-arch.vpit-pit_state.irq_ack_notifier);
  mutex_lock(kvm-arch.vpit-pit_state.lock);
  timer =

Re: [PATCH][RFC] Use return value from kvm_set_irq() to re-inject PIT interrupts.

2009-08-24 Thread Marcelo Tosatti

On Mon, Aug 24, 2009 at 08:16:46PM +0300, Gleb Natapov wrote:
 On Mon, Aug 24, 2009 at 01:32:56PM -0300, Marcelo Tosatti wrote:
  On Mon, Aug 24, 2009 at 03:06:23PM +0300, Gleb Natapov wrote:
   Use return value from kvm_set_irq() to track coalesced PIT interrupts
   instead of ack/mask notifiers.
  
  Gleb,
  
  What is the advantage of doing so?
  
 Current code very fragile and relies on hacks to work. Lets take calling
 of ack notifiers on pic reset as an example. Why is it needed? 

To signal the ack notifiers users that, in case of reset with pending
IRR, the given interrupt has been acked (its an artificial ack event).

Is there a need to differentiate between actual interrupt ack and reset
with pending IRR? At the time this code was written, there was no
indication that differentation would be necessary.

 It is obviously wrong thing to do from assigned devices POV.

Thats not entirely clear to me. So what happens if a guest with PIC
assigned device resets with a pending IRR? The host interrupt line will
be kept disabled, even though the guest is able to process further
interrupts?

 Why ioapic calls mask notifiers but pic doesn't?

Because it is not implemented.

 Besides diffstat for the patch shows:
 2 files changed, 16 insertions(+), 59 deletions(-)
 
 43 lines less for the same functionality. Looks like clear win to me.
 
  Ack notifiers are asynchronous notifications. Using the return value
  from kvm_set_irq implies that timer emulation is based on a tick
  generating device on the host side.
 No notification is needed in the first place. You know immediately
 if injection fails or not. I don't see why using return value from
 kvm_set_irq implies that timer emulation is based on a tick generating
 device on the host side? What can you do with ack notifiers that can't
 be done without?

If you don't have a host timer emulating the guest PIT, to periodically
bang on kvm_set_irq, how do you know when to attempt reinjection?

You keep calling kvm_set_irq on every guest entry to figure out when 
reinjection is possible?

  What I mean is that the ack notifications are useful, since they are
  asynchronous.
  
 What I mean is that no notification is needed at all since result is
 known immediately.
?
  Supposing your goal is to get rid of ack notifiers, due to their burden 
  in irqchip code?
  
 Unfortunately to get rid of ack notifiers we need to get rid of assigned
 devices. I will gladly do that, but I doubt Avi shares my enthusiasm.
 The patch to remove mask notification already sits in my patch queue though.
 
   Signed-off-by: Gleb Natapov g...@redhat.com
   diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
   index b857ca3..0b63991 100644
   --- a/arch/x86/kvm/i8254.c
   +++ b/arch/x86/kvm/i8254.c
   @@ -231,20 +231,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu)
{
 struct kvm_pit *pit = vcpu-kvm-arch.vpit;

   - if (pit  kvm_vcpu_is_bsp(vcpu)  pit-pit_state.irq_ack)
   - return atomic_read(pit-pit_state.pit_timer.pending);
   - return 0;
   -}
   -
   -static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
   -{
   - struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
   -  irq_ack_notifier);
   - spin_lock(ps-inject_lock);
   - if (atomic_dec_return(ps-pit_timer.pending)  0)
   - atomic_inc(ps-pit_timer.pending);
   - ps-irq_ack = 1;
   - spin_unlock(ps-inject_lock);
   + return atomic_read(pit-pit_state.pit_timer.pending);
}

void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
   @@ -297,7 +284,6 @@ static void create_pit_timer(struct kvm_kpit_state 
   *ps, u32 val, int is_period)
 pt-vcpu = pt-kvm-bsp_vcpu;

 atomic_set(pt-pending, 0);
   - ps-irq_ack = 1;

 hrtimer_start(pt-timer, ktime_add_ns(ktime_get(), interval),
   HRTIMER_MODE_ABS);
   @@ -577,17 +563,6 @@ void kvm_pit_reset(struct kvm_pit *pit)
 mutex_unlock(pit-pit_state.lock);

 atomic_set(pit-pit_state.pit_timer.pending, 0);
   - pit-pit_state.irq_ack = 1;
   -}
   -
   -static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool 
   mask)
   -{
   - struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
   -
   - if (!mask) {
   - atomic_set(pit-pit_state.pit_timer.pending, 0);
   - pit-pit_state.irq_ack = 1;
   - }
}

static const struct kvm_io_device_ops pit_dev_ops = {
   @@ -619,7 +594,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 
   flags)

 mutex_init(pit-pit_state.lock);
 mutex_lock(pit-pit_state.lock);
   - spin_lock_init(pit-pit_state.inject_lock);

 kvm-arch.vpit = pit;
 pit-kvm = kvm;
   @@ -628,17 +602,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 
   flags)
 pit_state-pit = pit;
 hrtimer_init(pit_state-pit_timer.timer,
  CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
   - pit_state-irq_ack_notifier.gsi = 0;
   -

Re: [RFC] defer skb allocation in virtio_net -- mergable buff part

2009-08-24 Thread Shirley Ma

Hello Avi,

Thanks for you review. I was on vacation last week.

On Sun, 2009-08-16 at 16:47 +0300, Avi Kivity wrote:
 Alternatives include:
 - store the link in the page itself
 - have an array of pages per list element instead of just one pointer
 - combine the two, store an array of page pointers in one of the free
 pages
 - use the struct page::lru member
 
 The last is the most traditional and easiest so I'd recommend it
 (though 
 it still takes the cacheline hit).

I prefer the combine of the two. But I will compare the performance
differences if no much difference, we can use the easiest one.

  +static struct page_list *get_a_free_page(struct virtnet_info *vi,
 gfp_t gfp_mask)
  +{
  + struct page_list *plist;
  +
  + if (list_empty(vi-freed_pages)) {
  + plist = kmalloc(sizeof(struct page_list), gfp_mask);
  + if (!plist)
  + return NULL;
  + list_add_tail(plist-list,vi-freed_pages);
  + plist-page = alloc_page(gfp_mask);
 
 
 What if the allocation fails here?

It is handled by the caller.

Thanks
Shirley

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][RFC] Use return value from kvm_set_irq() to re-inject PIT interrupts.

2009-08-24 Thread Gleb Natapov

On Mon, Aug 24, 2009 at 02:44:27PM -0300, Marcelo Tosatti wrote:
 On Mon, Aug 24, 2009 at 08:16:46PM +0300, Gleb Natapov wrote:
  On Mon, Aug 24, 2009 at 01:32:56PM -0300, Marcelo Tosatti wrote:
   On Mon, Aug 24, 2009 at 03:06:23PM +0300, Gleb Natapov wrote:
Use return value from kvm_set_irq() to track coalesced PIT interrupts
instead of ack/mask notifiers.
   
   Gleb,
   
   What is the advantage of doing so?
   
  Current code very fragile and relies on hacks to work. Lets take calling
  of ack notifiers on pic reset as an example. Why is it needed? 
 
 To signal the ack notifiers users that, in case of reset with pending
 IRR, the given interrupt has been acked (its an artificial ack event).
 
But IRR was not acked. The reason it is done is that otherwise the
current logic will prevent further interrupt injection. 

 Is there a need to differentiate between actual interrupt ack and reset
 with pending IRR? At the time this code was written, there was no
 indication that differentation would be necessary.
This is two different things. Ack notifiers should be called when guest
acks interrupt. Calling it on reset is wrong (see below). We can add reset
notifiers, but we just build yet another infrastructure to support
current reinjection scheme.

 
  It is obviously wrong thing to do from assigned devices POV.
 
 Thats not entirely clear to me. So what happens if a guest with PIC
 assigned device resets with a pending IRR? The host interrupt line will
 be kept disabled, even though the guest is able to process further
 interrupts?
The host interrupt line will be enabled (assigned device ack notifier
does this) without clearing interrupt condition in assigned device
(guest hasn't acked irq so how can we be sure it ran device's irq
handler?). Host will hang.

  Why ioapic calls mask notifiers but pic doesn't?
 
 Because it is not implemented.
I see that. Why? Why it was important to implement for ioapic but not
for pic? Do we know what doesn't work now?

 
  Besides diffstat for the patch shows:
  2 files changed, 16 insertions(+), 59 deletions(-)
  
  43 lines less for the same functionality. Looks like clear win to me.
  
   Ack notifiers are asynchronous notifications. Using the return value
   from kvm_set_irq implies that timer emulation is based on a tick
   generating device on the host side.
  No notification is needed in the first place. You know immediately
  if injection fails or not. I don't see why using return value from
  kvm_set_irq implies that timer emulation is based on a tick generating
  device on the host side? What can you do with ack notifiers that can't
  be done without?
 
 If you don't have a host timer emulating the guest PIT, to periodically
 bang on kvm_set_irq, how do you know when to attempt reinjection?
 
 You keep calling kvm_set_irq on every guest entry to figure out when 
 reinjection is possible?
If we have timer to inject then yes. It is relatively cheap. Most of the
time pending count will be zero.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/2] eventfd: new EFD_STATE flag

2009-08-24 Thread Avi Kivity


On 08/24/2009 09:25 PM, Davide Libenzi wrote:

Indeed, the default eventfd behaviour is like, well, an event. Signaling
(kernel side) or writing (userspace side), signals the event.
Waiting (reading) it, will reset the event.
If you use EFD_SEMAPHORE, you get a semaphore-like behavior.
Events and sempahores are two widely known and used abstractions.
The EFD_STATE proposed one, well, no. Not at all.
   


There are libraries that provide notifications (or fire watches) when 
some value changes.  They're much less frequently used than events or 
semaphores, though.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/2] eventfd: new EFD_STATE flag

2009-08-24 Thread Davide Libenzi

On Sun, 23 Aug 2009, Michael S. Tsirkin wrote:

 On Sun, Aug 23, 2009 at 04:40:51PM +0300, Avi Kivity wrote:
  On 08/23/2009 04:36 PM, Michael S. Tsirkin wrote:
  More important here is realization that eventfd is a mutex/semaphore
  implementation, not a generic event reporting interface as we are trying
  to use it.
 
 
  Well it is a generic event reporting interface (for example, aio uses it).
 
 Davide, I think it's a valid point.  For example, what read on eventfd
 does (zero a counter and return) is not like any semaphore I saw.


Indeed, the default eventfd behaviour is like, well, an event. Signaling 
(kernel side) or writing (userspace side), signals the event.
Waiting (reading) it, will reset the event.
If you use EFD_SEMAPHORE, you get a semaphore-like behavior.
Events and sempahores are two widely known and used abstractions.
The EFD_STATE proposed one, well, no. Not at all.



- Davide


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][RFC] Use return value from kvm_set_irq() to re-inject PIT interrupts.

2009-08-24 Thread Gleb Natapov

On Mon, Aug 24, 2009 at 09:19:05PM +0300, Gleb Natapov wrote:
   It is obviously wrong thing to do from assigned devices POV.
  
  Thats not entirely clear to me. So what happens if a guest with PIC
  assigned device resets with a pending IRR? The host interrupt line will
  be kept disabled, even though the guest is able to process further
  interrupts?
 The host interrupt line will be enabled (assigned device ack notifier
 does this) without clearing interrupt condition in assigned device
 (guest hasn't acked irq so how can we be sure it ran device's irq
 handler?). Host will hang.
 
Actually, on the second thought, it will not hang. Next time host
interrupt handler runs it will disable interrupt once again.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: vbus design points: shm and shm-signals

2009-08-24 Thread Anthony Liguori


Gregory Haskins wrote:

Gregory Haskins wrote:
  

Ingo Molnar wrote:

We all love faster code and better management interfaces and tons 
of your prior patches got accepted by Avi. This time you didnt even 
_try_ to improve virtio.
  

Im sorry, but you are mistaken:

http://lkml.indiana.edu/hypermail/linux/kernel/0904.2/02443.html




BTW: One point that I forgot to point out in this most recent thread
that I am particularly proud of here is the design of the vbus
shared-memory model.  Despite some claims to the contrary; not only is
it possible to improve virtio with vbus (as evident by the patch
referenced above)...I specifically designed vbus with virtio
considerations in mind from the start!  In fact, the design is conducive
to accelerating a variety of other models as well.  Read on for details.

Vbus was designed it to be _agnostic_ to the shm algorithm in general.
This allows you to, of course, run ring algorithms (such as virtqueues,
or IOQs), but really any other designs as well, such as shared-tables, etc.

A guest driver sees the following interface:

struct vbus_device_proxy_ops {
int (*open)(struct vbus_device_proxy *dev, int version, int flags);
int (*close)(struct vbus_device_proxy *dev, int flags);
int (*shm)(struct vbus_device_proxy *dev, int id, int prio,
   void *ptr, size_t len,
   struct shm_signal_desc *sigdesc, struct shm_signal **signal,
   int flags);
int (*call)(struct vbus_device_proxy *dev, u32 func,
void *data, size_t len, int flags);
void (*release)(struct vbus_device_proxy *dev);
};

note the ops-shm() method.  This allows the driver to register some
arbitrary pointer (ptr, len) with the host, optionally embedding a
shm_signal_desc object in the memory.  If sigdesc is non-null, the
connector will allocate and return a fully formed shm_signal object in
**signal.
  


Fundamentally, how is this different than the virtio-add_buf concept?

virtio provides a mechanism to register scatter/gather lists, associate 
a handle with them, and provides a mechanism for retrieving notification 
that the buffer has been processed.


vbus provides a mechanism to register a single buffer with an integer 
handle, priority, and a signaling mechanism.


So virtio provides builtin support for scatter/gathers whereas vbus 
models priority.  But fundamentally, they seem like almost identical 
concepts.


If we added priority to virtio-add_buf, would it be equivalent in your 
mind functionally speaking?


What does one do with priority, btw?

Is there something I'm overlooking?

Regards,

Anthony Liguroi
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: vbus design points: shm and shm-signals

2009-08-24 Thread Gregory Haskins

Hi Anthony,

Anthony Liguori wrote:
 Gregory Haskins wrote:
 Gregory Haskins wrote:
  
 Ingo Molnar wrote:

 We all love faster code and better management interfaces and tons of
 your prior patches got accepted by Avi. This time you didnt even
 _try_ to improve virtio.
   
 Im sorry, but you are mistaken:

 http://lkml.indiana.edu/hypermail/linux/kernel/0904.2/02443.html

 

 BTW: One point that I forgot to point out in this most recent thread
 that I am particularly proud of here is the design of the vbus
 shared-memory model.  Despite some claims to the contrary; not only is
 it possible to improve virtio with vbus (as evident by the patch
 referenced above)...I specifically designed vbus with virtio
 considerations in mind from the start!  In fact, the design is conducive
 to accelerating a variety of other models as well.  Read on for details.

 Vbus was designed it to be _agnostic_ to the shm algorithm in general.
 This allows you to, of course, run ring algorithms (such as virtqueues,
 or IOQs), but really any other designs as well, such as shared-tables,
 etc.

 A guest driver sees the following interface:

 struct vbus_device_proxy_ops {
 int (*open)(struct vbus_device_proxy *dev, int version, int flags);
 int (*close)(struct vbus_device_proxy *dev, int flags);
 int (*shm)(struct vbus_device_proxy *dev, int id, int prio,
void *ptr, size_t len,
struct shm_signal_desc *sigdesc, struct shm_signal **signal,
int flags);
 int (*call)(struct vbus_device_proxy *dev, u32 func,
 void *data, size_t len, int flags);
 void (*release)(struct vbus_device_proxy *dev);
 };

 note the ops-shm() method.  This allows the driver to register some
 arbitrary pointer (ptr, len) with the host, optionally embedding a
 shm_signal_desc object in the memory.  If sigdesc is non-null, the
 connector will allocate and return a fully formed shm_signal object in
 **signal.
   
 
 Fundamentally, how is this different than the virtio-add_buf concept?

From my POV, they are at different levels.  Calling vbus-shm() is for
establishing a shared-memory region including routing the memory and
signal-path contexts.  You do this once at device init time, and then
run some algorithm on top (such as a virtqueue design).

virtio-add_buf() OTOH, is a run-time function.  You do this to modify
the shared-memory region that is already established at init time by
something like vbus-shm().  You would do this to queue a network
packet, for instance.

That said, shm-signal's closest analogy to virtio would be vq-kick(),
vq-callback(), vq-enable_cb(), and vq-disable_cb().  The difference
is that the notification mechanism isn't associated with a particular
type of shared-memory construct (such as a virt-queue), but instead can
be used with any shared-mem algorithm (at least, if I designed it properly).

The closest analogy for vbus-shm() to virtio would be
vdev-config-find_vqs().  Again, the difference is that the algorithm
(ring, etc) is not dictated by the call.  You then overlay something
like virtqueue on top.

 
 virtio provides a mechanism to register scatter/gather lists, associate
 a handle with them, and provides a mechanism for retrieving notification
 that the buffer has been processed.

Yes, and I agree this is very useful for many/most algorithms...but not
all.  Sometimes you don't want ring-like semantics, but instead want
something like an idempotent table.  (Think of things like interrupt
controllers, timers, etc).

Rings, of course, have a trait that all updates are retained in fifo
order.  For many things (e.g. network, block io, etc), this is exactly
what you want.  If I say send packet X now, and send packet Y later,
I want the system to do both (and perhaps in that order), so a ring
scheme works well.

However, sometimes you may want to say time is now X, and later time
is now Y.  The update value of 'X' is technically superseded by Y and
is stale.  But a ring may allow both to exist in-flight within the shm
simultaneously if the recipient (guest or host) is lagging, and the X
may be processed even though its data is now irrelevant.  What we really
want is the transform of X-Y to invalidate anything else in flight so
that only Y is visible.

So in a case like this, we may want a different algorithm.  Something
like a table which always contains the current/valid value, and a way to
signal in both directions when something interesting happens to that data.

If you think about it, a ring is a superset of this construct...the ring
meta-data is the shared-table (e.g. HEAD ptr, TAIL ptr, COUNT, etc).
So we start by introducing the basic shm concept, and allow the next
layer (virtio/virtqueue) in the stack to refine it for its needs.


 
 vbus provides a mechanism to register a single buffer with an integer
 handle, priority, and a signaling mechanism.

Again, I think we are talking about two different layers.  You would
never put entries into a virtio-ring

user question: graphic acceleration for CAD

2009-08-24 Thread Aleks

Hello Community,
I just want to ask if it is planned to support 3d acceleration
features for windows guests on linux hosts to enhance windows 
based CAD applications. I am not aware of the features modern
CAD software is using so I cannot provide a list of requested
features, sorry. I am not sure if it is possible to forward
features of the host driver to the guest but let me know if
something like this is planned even if it is only available
for one guest at the same time.

I am not member of the list so please CC me.

Thank you,
Aleks

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: vhost net: performance with ping benchmark

2009-08-24 Thread Michael S. Tsirkin

On Mon, Aug 24, 2009 at 11:12:41AM +0300, Michael S. Tsirkin wrote:
 At Rusty's suggestion, I tested vhost base performance with ping.
 Results below, and seem to be what you'd expect.

Rusty, any chance you could look at the code?  Is it in reasonable
shape? I think it makes sense to merge it through you. What do you
think?  One comment on file placement: I put files under a separate
vhost directory to avoid confusion with virtio-net which runs in guest.
Does this sound sane?  Also, can a minimal version (without TSO, tap or
any other features) be merged upstream first so that features can be
added later? Or do we have to wait until it's more full featured?
Finally, can it reasonably make 2.6.32, or you think it needs more time
out of tree?

Thanks very much,

-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: vbus design points: shm and shm-signals

2009-08-24 Thread Gregory Haskins

Gregory Haskins wrote:
 Anthony Liguori wrote:
 Fundamentally, how is this different than the virtio-add_buf concept?
 
 From my POV, they are at different levels.  Calling vbus-shm() is for
 establishing a shared-memory region including routing the memory and
 signal-path contexts.  You do this once at device init time, and then
 run some algorithm on top (such as a virtqueue design).
 
 virtio-add_buf() OTOH, is a run-time function.  You do this to modify
 the shared-memory region that is already established at init time by
 something like vbus-shm().  You would do this to queue a network
 packet, for instance.
 
 That said, shm-signal's closest analogy to virtio would be vq-kick(),
 vq-callback(), vq-enable_cb(), and vq-disable_cb().  The difference
 is that the notification mechanism isn't associated with a particular
 type of shared-memory construct (such as a virt-queue), but instead can
 be used with any shared-mem algorithm (at least, if I designed it properly).
 
 The closest analogy for vbus-shm() to virtio would be
 vdev-config-find_vqs().  Again, the difference is that the algorithm
 (ring, etc) is not dictated by the call.  You then overlay something
 like virtqueue on top.

BTW: Another way to think of this is that virtio-add_buf() is really
buffer assignment, whereas vbus-shm() is buffer sharing.  The
former is meant to follow an assign, consume, re-assign, reclaim
model, where the changing pointer ownership implicitly serializes the
writability of the buffer.  Its used (quite effectively) for things like
passing a network-packet around.

Conversely, the latter case (buffer sharing) is designed for
concurrent writers.  Its used for things like ring-metadata,
shared-table designs, etc.  Anything that generally is designed for a
longer-term, parallel update model, instead of a consume/reclaim model.

Whether we realize it or not, we generally build buffer-assignment
algorithms on top of buffer-sharing algorithms.  Therefore, while virtio
technically has both of these components, it only exposes the former
(buffer-assignment) as a user-extensible ABI (vq-add_buf).  The latter
(buffer-sharing) is inextricably linked to the underlying virtqueue ABI
(vdev-find_vqs) (or, at least it is today).

This is why I keep emphasizing that they are different layers of the
same stack.  From a device point of view, virtio adds a robust ring
model with buffer-assignment capabilities, support for scatter-gather,
etc.  Vbus underneath it provides a robust buffer-sharing design with
considerations for things like end-to-end prioritization, mitigation of
various virt-like inefficiencies (hypercalls, interrupts, eois, spurious
re-signals), etc.

The idea is you can then join the two together to do something like
build 8-rx virtqueues for your virtio-net to support prio.  If you take
these things into consideration on the backend design as well, you can
actually tie it in end-to-end to gain performance and capabilities not
previously available in KVM (or possibly any virt platform).

HTH,

Kind Regards,
-Greg



signature.asc
Description: OpenPGP digital signature

Re: [PATCH 0/2] eventfd: new EFD_STATE flag

2009-08-24 Thread Michael S. Tsirkin

On Mon, Aug 24, 2009 at 11:25:01AM -0700, Davide Libenzi wrote:
 On Sun, 23 Aug 2009, Michael S. Tsirkin wrote:
 
  On Sun, Aug 23, 2009 at 04:40:51PM +0300, Avi Kivity wrote:
   On 08/23/2009 04:36 PM, Michael S. Tsirkin wrote:
   More important here is realization that eventfd is a mutex/semaphore
   implementation, not a generic event reporting interface as we are trying
   to use it.
  
  
   Well it is a generic event reporting interface (for example, aio uses it).
  
  Davide, I think it's a valid point.  For example, what read on eventfd
  does (zero a counter and return) is not like any semaphore I saw.
 
 
 Indeed, the default eventfd behaviour is like, well, an event. Signaling 
 (kernel side) or writing (userspace side), signals the event.
 Waiting (reading) it, will reset the event.
 If you use EFD_SEMAPHORE, you get a semaphore-like behavior.
 Events and sempahores are two widely known and used abstractions.
 The EFD_STATE proposed one, well, no. Not at all.

Hmm. All we try to do is, associate a small key with the event
that we signal. Is it really that uncommon/KVM specific?

 
 
 - Davide
 
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/2] eventfd: new EFD_STATE flag

2009-08-24 Thread Davide Libenzi

On Mon, 24 Aug 2009, Avi Kivity wrote:

 On 08/24/2009 09:25 PM, Davide Libenzi wrote:
  Indeed, the default eventfd behaviour is like, well, an event. Signaling
  (kernel side) or writing (userspace side), signals the event.
  Waiting (reading) it, will reset the event.
  If you use EFD_SEMAPHORE, you get a semaphore-like behavior.
  Events and sempahores are two widely known and used abstractions.
  The EFD_STATE proposed one, well, no. Not at all.
 
 
 There are libraries that provide notifications (or fire watches) when some
 value changes.  They're much less frequently used than events or semaphores,
 though.

There are userspace libraries that do almost everything, but you hardly 
see things like pthread_(EFD_STATE-like)_create() or similar system 
interfaces based on such abstraction.
Is that really difficult to understand where I'm standing, leaving the KVM 
hat off for a moment?



- Davide


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/2] eventfd: new EFD_STATE flag

2009-08-24 Thread Paolo Bonzini




There are userspace libraries that do almost everything, but you hardly
see things like pthread_(EFD_STATE-like)_create() or similar system
interfaces based on such abstraction.


It actually seems as close to a condition variable as an eventfd can be.

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/2] eventfd: new EFD_STATE flag

2009-08-24 Thread Davide Libenzi

On Tue, 25 Aug 2009, Michael S. Tsirkin wrote:

 On Mon, Aug 24, 2009 at 11:25:01AM -0700, Davide Libenzi wrote:
  On Sun, 23 Aug 2009, Michael S. Tsirkin wrote:
  
   On Sun, Aug 23, 2009 at 04:40:51PM +0300, Avi Kivity wrote:
On 08/23/2009 04:36 PM, Michael S. Tsirkin wrote:
More important here is realization that eventfd is a mutex/semaphore
implementation, not a generic event reporting interface as we are 
trying
to use it.
   
   
Well it is a generic event reporting interface (for example, aio uses 
it).
   
   Davide, I think it's a valid point.  For example, what read on eventfd
   does (zero a counter and return) is not like any semaphore I saw.
  
  
  Indeed, the default eventfd behaviour is like, well, an event. Signaling 
  (kernel side) or writing (userspace side), signals the event.
  Waiting (reading) it, will reset the event.
  If you use EFD_SEMAPHORE, you get a semaphore-like behavior.
  Events and sempahores are two widely known and used abstractions.
  The EFD_STATE proposed one, well, no. Not at all.
 
 Hmm. All we try to do is, associate a small key with the event
 that we signal. Is it really that uncommon/KVM specific?

All I'm trying to do, is to avoid that eventfd will become an horrible 
multiplexor for every freaky one-time-use behaviors arising inside kernel 
modules.



- Davide


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/2] eventfd: new EFD_STATE flag

2009-08-24 Thread Davide Libenzi

On Tue, 25 Aug 2009, Paolo Bonzini wrote:

 
  There are userspace libraries that do almost everything, but you hardly
  see things like pthread_(EFD_STATE-like)_create() or similar system
  interfaces based on such abstraction.
 
 It actually seems as close to a condition variable as an eventfd can be.

A pthread condition typical code usage maps to eventfd like:

while (read(efd, ...)  0)
if (CONDITION)
break;

So a pthread condition is really a wakeup gate like eventfd is.
EFD_STATE has nothing to do with a pthread condition.



- Davide


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] VMX: Return to userspace on invalid state emulation failure

2009-08-24 Thread Mohammed Gamal

Return to userspace instead of repeatedly trying to emulate
instructions that have already failed

Signed-off-by: Mohammed Gamal m.gamal...@gmail.com
---
 arch/x86/kvm/vmx.c |6 +-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6b57eed..c559bb7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3337,6 +3337,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu 
*vcpu)
 
if (err != EMULATE_DONE) {
kvm_report_emulation_failure(vcpu, emulation failure);
+   vcpu-run-exit_reason = KVM_EXIT_INTERNAL_ERROR;
+   vcpu-run-internal.suberror = 
KVM_INTERNAL_ERROR_EMULATION;
break;
}
 
@@ -3607,7 +3609,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx-entry_time = ktime_get();
 
/* Handle invalid guest state instead of entering VMX */
-   if (vmx-emulation_required  emulate_invalid_guest_state) {
+   if (vmx-emulation_required  emulate_invalid_guest_state
+!(vcpu-run-exit_reason == KVM_EXIT_INTERNAL_ERROR 
+ vcpu-run-internal.suberror == 
KVM_INTERNAL_ERROR_EMULATION)) {
handle_invalid_guest_state(vcpu);
return;
}
-- 
1.6.0.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: vbus design points: shm and shm-signals

2009-08-24 Thread Anthony Liguori


Gregory Haskins wrote:

Hi Anthony,

  

Fundamentally, how is this different than the virtio-add_buf concept?



From my POV, they are at different levels.  Calling vbus-shm() is for
establishing a shared-memory region including routing the memory and
signal-path contexts.  You do this once at device init time, and then
run some algorithm on top (such as a virtqueue design).
  


virtio explicitly avoids having a single setup-memory-region call 
because it was designed to accommodate things like Xen grant tables 
whereas you have a fixed number of sharable

buffers that need to be setup and torn down as you use them.

You can certainly use add_buf() to setup a persistent mapping but it's 
not the common usage.  For KVM, since all memory is accessible by the 
host without special setup, add_buf() never results in an exit (it's 
essentially a nop).


So I think from that perspective, add_buf() is a functional superset of 
vbus-shm().



virtio-add_buf() OTOH, is a run-time function.  You do this to modify
the shared-memory region that is already established at init time by
something like vbus-shm().  You would do this to queue a network
packet, for instance.

That said, shm-signal's closest analogy to virtio would be vq-kick(),
vq-callback(), vq-enable_cb(), and vq-disable_cb().  The difference
is that the notification mechanism isn't associated with a particular
type of shared-memory construct (such as a virt-queue), but instead can
be used with any shared-mem algorithm (at least, if I designed it properly).
  


Obviously, virtio allows multiple ring implements based on how it does 
layering.  The key point is that it doesn't expose that to the consumer 
of the device.


Do you see a compelling reason to have an interface at this layer?


virtio provides a mechanism to register scatter/gather lists, associate
a handle with them, and provides a mechanism for retrieving notification
that the buffer has been processed.



Yes, and I agree this is very useful for many/most algorithms...but not
all.  Sometimes you don't want ring-like semantics, but instead want
something like an idempotent table.  (Think of things like interrupt
controllers, timers, etc).
  


We haven't crossed this bridge yet because we haven't implemented one of 
these devices.  One approach would be to use add_buf() to register fixed 
shared memory regions.  Because our rings are fixed sized, this implies 
a fixed number of shared memory mappings.


You could also extend virtio to provide a mechanism to register 
unlimited numbers of shared memory regions.  The problem with this is 
that it doesn't work well for hypervisors with fixed shared-memory 
regions (like Xen).

However, sometimes you may want to say time is now X, and later time
is now Y.  The update value of 'X' is technically superseded by Y and
is stale.  But a ring may allow both to exist in-flight within the shm
simultaneously if the recipient (guest or host) is lagging, and the X
may be processed even though its data is now irrelevant.  What we really
want is the transform of X-Y to invalidate anything else in flight so
that only Y is visible.
  


We actually do this today but we just don't use virtio.  I'm not sure we 
need a single bus that can serve both of these purposes.  What does this 
abstraction buy us?



If you think about it, a ring is a superset of this construct...the ring
meta-data is the shared-table (e.g. HEAD ptr, TAIL ptr, COUNT, etc).
So we start by introducing the basic shm concept, and allow the next
layer (virtio/virtqueue) in the stack to refine it for its needs.
  


I think there's a trade off between practicality and theoretical 
abstractions.  Surely, a system can be constructed simply with 
notification and shared memory primitives.   This is what Xen does via 
event channels and grant tables.  In practice, this ends up being 
cumbersome and results in complex drivers.  Compare netfront to 
virtio-net, for instance.


We choose to abstract at the ring level precisely because it simplifies 
driver implementations.  I think we've been very successful here.


virtio does not accommodate devices that don't fit into a ring model 
very well today.  There's certainly room to discuss how to do this.  If 
there is to be a layer below virtio's ring semantics, I don't think that 
vbus is this because it mandates much higher levels of the stack 
(namely, device enumeration).


IOW, I can envision a model that looked like PCI - virtio-pci - 
virtio-shm - virtio-ring - virtio-net


Whereas generic-shm-mechanism provided a non-ring interface for non-ring 
devices.  That doesn't preclude non virtio-pci transports, it just 
suggests how we would do the layering.


So maybe there's a future for vbus as virtio-shm?  How attached are you 
to your device discovery infrastructure?


If you introduced a virtio-shm layer to the virtio API that looked a bit 
like vbus' device API, and then decoupled the device discovery bits into 
a virtio-vbus transport, I

Re: vbus design points: shm and shm-signals

2009-08-24 Thread Anthony Liguori


Anthony Liguori wrote:
IOW, I can envision a model that looked like PCI - virtio-pci - 
virtio-shm - virtio-ring - virtio-net


Let me stress that what's important here is that devices target either 
virtio-ring or virtio-shm.  If we had another transport, those drivers 
would be agnostic toward it.  We really want to preserve the ability to 
use all devices over a PCI transport.  That's a critical requirement for us.


The problem with vbus as it stands today, is that it presents vbus - 
virtio-ring - virtio-net and allows drivers to target either 
virtio-ring or vbus directly.  If a driver targets vbus directly, then 
the driver is no longer transport agnostic and we could not support that 
driver over PCI.


Regards,

Anthony Liguori
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCHv3 0/4] qemu-kvm: vhost net support

2009-08-24 Thread Alex Williamson

On Sun, Aug 23, 2009 at 1:22 PM, Michael S. Tsirkinm...@redhat.com wrote:

 Just had a different, but slightly similar problem when the host running
 qemu had forwarding enabled. Is it possible your host is forwarding the
 packets somewhere else, and that's why we get the dupes?
 sysctl -w net.ipv4.conf.all.forwarding=0

Yes!  This seems to be the problem.  As expected, I can just disable
forwarding on eth10 and the duplicates disappear.  Thanks,

Alex
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ kvm-Bugs-2829519 ] extboot.bin is not built

2009-08-24 Thread SourceForge.net

Bugs item #2829519, was opened at 2009-07-30 09:13
Message generated for change (Comment added) made by sf-robot
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2829519group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: qemu
Group: None
Status: Closed
Resolution: Fixed
Priority: 5
Private: No
Submitted By: Dominik Klein (dominikklein)
Assigned to: Nobody/Anonymous (nobody)
Summary: extboot.bin is not built

Initial Comment:
I am on openSuSE 11.1 x86_64 on a Dell machine with an Intel E5440 CPU.

I try to compile qemu 0.10.5 using
./configure --prefix=/usr
make
make install

Everything looks good but then I find that extboot.bin is neither (tried to) 
build nor installed.

Copying a version from an older qemu installation worked as suggested by a 
colleague, but I guess that's not the way it is meant to be.

I attached a file with the output of configure, make and make install. If you 
need anything else, just let me know or contact me on IRC, Nickname kleind in 
#kvm on freenode

--

Comment By: SourceForge Robot (sf-robot)
Date: 2009-08-25 02:20

Message:
This Tracker item was closed automatically by the system. It was
previously set to a Pending status, and the original submitter
did not respond within 14 days (the time period specified by
the administrator of this Tracker).

--

Comment By: xming (xming)
Date: 2009-08-22 15:38

Message:
extboot.bin is not built with qemu-kvm-0.11.0-rc1, can this be fixed for
the next rc?

--

Comment By: Avi Kivity (avik)
Date: 2009-08-10 11:55

Message:
Fixed in 0.10.6

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2829519group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ kvm-Bugs-2725669 ] kvm init script breaks network interfaces with multiple IPs

2009-08-24 Thread SourceForge.net

Bugs item #2725669, was opened at 2009-04-01 20:44
Message generated for change (Settings changed) made by sf-robot
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2725669group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Closed
Resolution: Rejected
Priority: 5
Private: No
Submitted By: Paul Donohue (paulsd)
Assigned to: Nobody/Anonymous (nobody)
Summary: kvm init script breaks network interfaces with multiple IPs

Initial Comment:
If multiple IP addresses are assigned to a network interface (Using interface 
aliases - for example 'ifconfig eth0 10.0.0.1 ; ifconfig eth0:1 10.0.0.2'), 
then the kvm init script causes the interface to become unresponsive when it 
creates a bridge using the interface.

I haven't yet had a need to use bridging for my VMs, so I haven't yet tried to 
figure out how to properly configure a bridge when multiple IPs are in use on 
the host system (I assume the multiple IPs simply need to be configured using 
aliases of the bridge itself - for example 'ifconfig sw0 10.0.0.1 ; ifconfig 
sw0:1 10.0.0.2' - but I haven't actually tried it).  Therefore, I am not sure 
at the moment how the kvm init script needs to be updated to fix this problem.

Regardless, I do have a number of machines which are using multiple IPs on the 
host system, and I recently installed kvm on them, then discovered that after 
the next reboot of each machine, the network interface is unresponsive until I 
disable the kvm init script and reboot again.

So, ideally the kvm init script should be updated to properly handle aliased 
interfaces, but at the very least, it needs to be updated to detect aliased 
interfaces and refuse to create a bridge for them, since that seems to 
completely break the underlying interface.

--

Comment By: SourceForge Robot (sf-robot)
Date: 2009-08-25 02:20

Message:
This Tracker item was closed automatically by the system. It was
previously set to a Pending status, and the original submitter
did not respond within 14 days (the time period specified by
the administrator of this Tracker).

--

Comment By: Avi Kivity (avik)
Date: 2009-08-10 12:13

Message:
The kvm init script is shipped as an example only.  You should use the
networking setup that came with your distribution.

--

Comment By: Paul Donohue (paulsd)
Date: 2009-04-01 23:48

Message:
Yes, it does, in the userspace tree, under the scripts subdirectory:
http://git.kernel.org/?p=virt/kvm/kvm-userspace.git;a=blob;f=scripts/kvm;h=cddc931fd3b289f3c325e23b55f261e996328bd6;hb=HEAD

--

Comment By: Brian Jackson (iggy_cav)
Date: 2009-04-01 21:08

Message:
KVM doesn't come with an init script in the tarball. This is most likely
provided by your distro or some other third party. You should contact them
for support.

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2725669group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ kvm-Bugs-2543539 ] Can't install Windows 98

2009-08-24 Thread SourceForge.net

Bugs item #2543539, was opened at 2009-01-28 17:44
Message generated for change (Settings changed) made by sf-robot
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2543539group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Closed
Resolution: None
Priority: 5
Private: No
Submitted By: Kouichi Kusanagi (k_kusanagi)
Assigned to: Nobody/Anonymous (nobody)
Summary: Can't install Windows 98

Initial Comment:
I attempted to install Windows 98. It hangs at Hardware Detection. Progress bar 
stopped at 14% and kvm repeat these messeges.
emulation failed (mmio) rip 3e49 f2 ae 75 14
emulation failed (pagetable) rip 3e49 f2 ae 75 14

Attached patch fixes hang but install still fail. When progress bar reaches 
29%, general protection fault occurs. Details are as follows
MSGSRV32 
 : SYSDETMG.DLL  : 0004:4ef0
Registers:
EAX=2000 CS=2137 EIP=4ef0 EFLGS=0206
EBX=2000 SS=1fc7 ES{=8270 EBP=828a
ECX= DS=30b7 ESI=00020006 FS=
EDX=2637 ES=2637 EDI= GS=0157
Bytes at CS:EIP:
26 8b 47 06 03 46 f6 8b f8 8c 46 fe 8b d8 26 f6
Stack dump:
0073d400 2e738276 2e8530b7 30b7 2637 0002 82c62e0c
21374d4a 1fc782b2 1fc782b4 1fc782bc 1fc782b6 1fc782b8 1fc782ba
30b7d400 00734d44

cpu: AMD Phenom X4 9550
kvm: kvm-83
kernel: v2.6.29-rc1-190-g37a76bd
host: Debian sid amd64
guest: Windows 98
command line: kvm -serial none -parallel none -k en-us -monitor stdio -net none 
-usb -usbdevice tablet -localtime -hda win98 -cdrom win98.iso -boot d

--

Comment By: SourceForge Robot (sf-robot)
Date: 2009-08-25 02:20

Message:
This Tracker item was closed automatically by the system. It was
previously set to a Pending status, and the original submitter
did not respond within 14 days (the time period specified by
the administrator of this Tracker).

--

Comment By: Avi Kivity (avik)
Date: 2009-08-10 12:16

Message:
Please post the patch to the mailing list (kvm@vger.kernel.org)

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2543539group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ kvm-Bugs-2099075 ] qcow2 images corruption

2009-08-24 Thread SourceForge.net

Bugs item #2099075, was opened at 2008-09-07 19:22
Message generated for change (Settings changed) made by sf-robot
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2099075group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Closed
Resolution: Fixed
Priority: 5
Private: No
Submitted By: Fabio Coatti (cova)
Assigned to: Nobody/Anonymous (nobody)
Summary: qcow2 images corruption

Initial Comment:
I'm running a 32bit vm under x86_64 AMD host with kvm-74. using raw image all 
works fine, but with qcow2 image I'm able to get deep fs corruption with a disk 
intensive operation like kernel compilation.
it happened every time I tried, and -no-kvm-irqchip didn't make any difference.

I've browsed the archives for hints, but no luck.

Details:
Host (64):
Linux  2.6.26.3 #6 SMP PREEMPT Sun Aug 31 16:00:51 CEST 2008 x86_64 AMD 
Phenom(tm) 9850 Quad-Core Processor AuthenticAMD GNU/Linux
single CPU/quad core.
fs on image partition: XFS

Gentoo ~amd64, kvm compiled on host system.
Guest (32):
Linux  2.6.26.3vm #4 Tue Aug 26 17:29:00 CEST 2008 i686 QEMU Virtual CPU 
version 0.9.1 AuthenticAMD GNU/Linux
Gentoo x86


command line:
kvm -m 1G -drive file=test.qcow2,if=virtio,boot=on -localtime -net 
nic,macaddr=DE:AD:BE:EF:15:5,model=virtio -net tap
fs on image: ext3

kvm-img info of image that show corruption:

image: test.qcow2
file format: qcow2
virtual size: 10G (10737418240 bytes)
disk size: 103M
cluster_size: 4096
backing file: gentoo-i386-virtio.qcow2 (actual path: gentoo-i386-virtio.qcow2)

kvm-img info of working image:

image: gentoo-i386-virtio.img
file format: raw
virtual size: 10G (10737418240 bytes)
disk size: 4.5G

virtio partitions are in use on guest machine

Let me know it other details are needed.

Thanks for any answer. 


--

Comment By: SourceForge Robot (sf-robot)
Date: 2009-08-25 02:20

Message:
This Tracker item was closed automatically by the system. It was
previously set to a Pending status, and the original submitter
did not respond within 14 days (the time period specified by
the administrator of this Tracker).

--

Comment By: Avi Kivity (avik)
Date: 2009-08-10 12:22

Message:
qcow2 problems should be fixed in kvm-88.

--

Comment By: Fabio Coatti (cova)
Date: 2008-09-07 21:46

Message:
Logged In: YES 
user_id=220554
Originator: YES

Many thanks for the quick answer.

Not sure about how to get kvm head, I've used
http://people.qumranet.com:/avi/snapshots/kvm-snapshot-20080906.tar.gz
The result is the same: fs corruption using qcow2 image.
as suggested, I've tried with ide emulation and it seems that I'm unable
to reproduce the bug, with this command line:

kvm -m 1G test-hda.qcow2 -localtime -net
nic,macaddr=DE:AD:BE:EF:15:5,model=rtl8139 -net tap

all goes fine: 3 kernel compilation on the same image, rebooting each
time, without glitches.

maybe it's related to virtio disk driver?

Of course I'm available for other info and tests, if needed.

Thanks. 


--

Comment By: Dor Laor (thekozmo)
Date: 2008-09-07 20:39

Message:
Logged In: YES 
user_id=2124464
Originator: NO

Hi there,

Can you first check against kvm head, there was a missing tlb flush that
was fixed.
Second, can you replace virtio with scsi/ide and check if the bug still
exist?
10x

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2099075group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ kvm-Bugs-1906189 ] All SMP guests often halt

2009-08-24 Thread SourceForge.net

Bugs item #1906189, was opened at 2008-03-03 11:33
Message generated for change (Comment added) made by sf-robot
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=1906189group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Closed
Resolution: None
Priority: 5
Private: No
Submitted By: Technologov (technologov)
Assigned to: Nobody/Anonymous (nobody)
Summary: All SMP guests often halt

Initial Comment:
All SMP configurations are very unstable - both on Intel and AMD. KVM-62.

Symptons: guests often soft-lock ups, or more precisely, they slow down to 
unacceptable speeds.
Guests may hard-lockup totally, or even BSOD in some cases.

I have tried:
 Windows 2000
 Windows XP
 Windows Server 2003
 Windows Server 2008

The KVM acts, but it looks like a loop.

=
[alex...@pink-intel win2000-Pro]$ dmesg | tail -n40
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
SIPI to vcpu 1 vector 0x10
apic write: bad size=1 fee00030
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
SIPI to vcpu 1 vector 0x10
apic write: bad size=1 fee00030
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
Ignoring de-assert INIT to vcpu 1
SIPI to vcpu 1 vector 0x21
SIPI to vcpu 1 vector 0x21
SIPI to vcpu 1 vector 0x21
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
SIPI to vcpu 1 vector 0x21
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
SIPI to vcpu 1 vector 0x10
apic write: bad size=1 fee00030
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
Ignoring de-assert INIT to vcpu 1
SIPI to vcpu 1 vector 0x21
SIPI to vcpu 1 vector 0x21
SIPI to vcpu 1 vector 0x21
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
SIPI to vcpu 1 vector 0x21
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1

=
(gdb) bt
#0  0x003a016c9aa7 in ioctl () from /lib64/libc.so.6
#1  0x0051bb29 in kvm_run (kvm=0x2a9b040, vcpu=0) at libkvm.c:850
#2  0x004fda86 in kvm_cpu_exec (env=value optimized out)
at /root/Linstall/kvm-62rc2/qemu/qemu-kvm.c:127
#3  0x004fe5d5 in kvm_main_loop_cpu (env=0x2b56490)
at /root/Linstall/kvm-62rc2/qemu/qemu-kvm.c:307
#4  0x004110fd in main (argc=44675488, argv=value optimized out)
at /root/Linstall/kvm-62rc2/qemu/vl.c:7862
=

kvm statistics

 efer_reload 103701   0
 exits512480997   20642
 fpu_reload24781662 799
 halt_exits 1824249 170
 halt_wakeup 828699  68
 host_state_reload 495932451617
 hypercalls   0   0
 insn_emulation   389188282   14239
 insn_emulation_fail   1110   0
 invlpg   0   0
 io_exits  28855411 928
 irq_exits 191313613248
 irq_window   0   0
 largepages   0   0
 mmio_exits16078802   0
 mmu_cache_miss 4219404 415
 mmu_flooded4110773 410
 mmu_pde_zapped  499335   6
 mmu_pte_updated   103816391327
 mmu_pte_write 145679441737
 mmu_recycled 17419   0
 mmu_shadow_zapped  4372079 410  

=

-Alexey, 03.03.2008.

--

Comment By: SourceForge Robot (sf-robot)
Date: 2009-08-25 02:20

Message:
This Tracker item was closed automatically by the system. It was
previously set to a Pending status, and the original submitter
did not respond within 14 days (the time period specified by
the administrator of this Tracker).

--

Comment By: Avi Kivity (avik)
Date: 2009-08-10 12:29

Message:
Does this still happen?

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=1906189group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ kvm-Bugs-1941302 ] Cannot boot guests with hugetlbfs

2009-08-24 Thread SourceForge.net

Bugs item #1941302, was opened at 2008-04-13 11:21
Message generated for change (Settings changed) made by sf-robot
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=1941302group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Closed
Resolution: Fixed
Priority: 5
Private: No
Submitted By: yunfeng (yunfeng)
Assigned to: Nobody/Anonymous (nobody)
Summary: Cannot boot guests with hugetlbfs

Initial Comment:
I meet problem to get hugetlbfs work on my test box.
If i added --mem-path to qemu, the guest will always fail to boot with the 
error messages below printed on qemu console:
###
ata0 master: ATA-0 Hard-Disk (  0 MBytes)
ata1 master: ATAPI-0 Device

Booting from Hard Disk .
Booting from Hard Disk failed: could not read the boot disk
##

And I checked my system, hugepages has been enabled.
[r...@vt-dp8 ~]# grep -i huge /proc/meminfo
HugePages_Total:   500
HugePages_Free:496
HugePages_Rsvd:  0
Hugepagesize: 2048 kB

[r...@vt-dp8 ~]# mount
/dev/sda1 on / type ext3 (rw)
none on /dev/shm type tmpfs (rw,size=4G)
nodev on /hugepages type hugetlbfs (rw)
/dev/sda3 on /share type ext2 (rw)
/dev/sda2 on /mnt/sda2 type ext3 (rw)

The command i am using:
qemu -m 256 -monitor pty -net nic,macaddr=00:16:3e:48:d4:aa,model=rtl8139 -net 
tap,script=/etc/kvm/qemu-ifup -hda /dev/sda --mem-path /hugepages/

If I remove --mem-path, the guest can be booted up.


--

Comment By: SourceForge Robot (sf-robot)
Date: 2009-08-25 02:20

Message:
This Tracker item was closed automatically by the system. It was
previously set to a Pending status, and the original submitter
did not respond within 14 days (the time period specified by
the administrator of this Tracker).

--

Comment By: Avi Kivity (avik)
Date: 2009-08-10 12:24

Message:
Should be fixed in kvm-88.

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=1941302group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ kvm-Bugs-1984384 ] soft lockup - CPU#5 stuck for 11s! [qemu-system-x86:4966]

2009-08-24 Thread SourceForge.net

Bugs item #1984384, was opened at 2008-06-04 11:49
Message generated for change (Comment added) made by sf-robot
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=1984384group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Closed
Resolution: Fixed
Priority: 5
Private: No
Submitted By: Rafal Wijata (ravpl)
Assigned to: Nobody/Anonymous (nobody)
Summary: soft lockup - CPU#5 stuck for 11s! [qemu-system-x86:4966]

Initial Comment:
I'm using kvm-69 running on
Linux 2.6.24.7-92.fc8 #1 SMP Wed May 7 16:26:02 EDT 2008 x86_64 x86_64 x86_64 
GNU/Linux
kvm modules loaded from kvm-69 rather than kernel provided

My system almost freezed after I killed qemu process.
I saw many, many tasks in 'D' state, along with [reiserfs/?] tasks.
Normally I would consider it reiserfs bug(and maybe it is), but two things
- it happened after qemu process was killed(running with 6cpus, 6G memory, 16G 
hdd placed on reiserfs placed on 200M/s hdd)

- dmesg showed following messages(2 total), which suggest it stucked in kvm

BUG: soft lockup - CPU#5 stuck for 11s! [qemu-system-x86:4966]
CPU 5:
Modules linked in: ipt_REJECT nf_conntrack_ipv4 iptable_filter ip_tables 
kvm_intel(U) kvm(U) tun nfs lockd nfs_acl autofs4 coretemp hwmon fuse sunrpc 
bridge xt_tcpudp nf_conntrack_ipv6 xt_state nf_conntrack ip6t_REJECT 
ip6table_filter ip6_tables x_tables ipv6 cpufreq_ondemand acpi_cpufreq reiserfs 
ext2 dm_mirror dm_multipath dm_mod i5000_edac iTCO_wdt serio_raw pcspkr 
iTCO_vendor_support e1000 button edac_core i2c_i801 ata_piix i2c_core pata_acpi 
ata_generic sg usb_storage ahci libata shpchp 3w_9xxx sd_mod scsi_mod ext3 jbd 
mbcache uhci_hcd ohci_hcd ehci_hcd
Pid: 4966, comm: qemu-system-x86 Not tainted 2.6.24.7-92.fc8 #1
RIP: 0010:[8834b29e]  [8834b29e] 
:kvm:rmap_remove+0x170/0x198
RSP: 0018:8101f4df5bd8  EFLAGS: 0246
RAX: 0002 RBX: 81004294af60 RCX: 
RDX:  RSI: 0106 RDI: 8101770448c0
RBP: 8101ce0454d0 R08: c20001b86030 R09: 8101d3587118
R10: 0019e7ea R11: 8101394dd9c0 R12: 8100240cece0
R13:  R14: 0019e7ea R15: 0018
FS:  () GS:81021f049580() knlGS:
CS:  0010 DS: 002b ES: 002b CR0: 8005003b
CR2: f7ff6000 CR3: 00021b5e5000 CR4: 26e0
DR0:  DR1:  DR2: 
DR3:  DR6: 0ff0 DR7: 0400

Call Trace:
 [8834b1dd] :kvm:rmap_remove+0xaf/0x198
 [8834b372] :kvm:kvm_mmu_zap_page+0x8a/0x25e
 [8834b9f3] :kvm:free_mmu_pages+0x12/0x34
 [8834bac9] :kvm:kvm_mmu_destroy+0x1d/0x5e
 [88346979] :kvm:kvm_arch_vcpu_uninit+0x1d/0x38
 [8834555b] :kvm:kvm_vcpu_uninit+0x9/0x15
 [88163aa8] :kvm_intel:vmx_free_vcpu+0x74/0x84
 [8834657b] :kvm:kvm_arch_destroy_vm+0x69/0xb4
 [88345538] :kvm:kvm_vcpu_release+0x13/0x18
 [810a35d4] __fput+0xc2/0x18f
 [810a0de7] filp_close+0x5d/0x65
 [8103b3df] put_files_struct+0x66/0xc4
 [8103c6f7] do_exit+0x28c/0x76b
 [8103cc55] sys_exit_group+0x0/0xe
 [81044163] get_signal_to_deliver+0x3aa/0x3d8
 [8100b359] do_notify_resume+0xa8/0x732
 [8126b7f6] unlock_kernel+0x32/0x33
 [881c01db] :reiserfs:reiserfs_setattr+0x26e/0x27d
 [810a1866] do_truncate+0x70/0x79
 [8100bf17] sysret_signal+0x1c/0x27
 [8100c1a7] ptregscall_common+0x67/0xb0


--

Comment By: SourceForge Robot (sf-robot)
Date: 2009-08-25 02:20

Message:
This Tracker item was closed automatically by the system. It was
previously set to a Pending status, and the original submitter
did not respond within 14 days (the time period specified by
the administrator of this Tracker).

--

Comment By: Avi Kivity (avik)
Date: 2009-08-10 12:27

Message:
Should be fixed in git.

--

Comment By: Avi Kivity (avik)
Date: 2008-06-04 15:45

Message:
Logged In: YES 
user_id=539971
Originator: NO

Okay,  I added a cond_resched() in free_mmu_pages().  That should avoid
the softlockup tick.
File Added: prevent-softlockup-on-kvm-destroy.patch

--

Comment By: david ahern (dsahern)
Date: 2008-06-04 15:08

Message:
Logged In: YES 
user_id=1755596
Originator: NO

My host did not crash, only the guest. I actually was not aware it had
gone down until I went to login. At that point I went digging through
syslog to find out when it died (my control scripts log startup and
shutdown). The

[ kvm-Bugs-2024740 ] Win2003 SMP installation crash

2009-08-24 Thread SourceForge.net

Bugs item #2024740, was opened at 2008-07-22 14:36
Message generated for change (Settings changed) made by sf-robot
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2024740group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Closed
Resolution: None
Priority: 5
Private: No
Submitted By: Marcelo Tosatti (mtosatti)
Assigned to: Nobody/Anonymous (nobody)
Summary: Win2003 SMP installation crash

Initial Comment:

Crash during first stage of Win2003 enterprise installation (text-mode), 4 
vcpu's:

Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
Ignoring de-assert INIT to vcpu 2
Ignoring de-assert INIT to vcpu 3
Ignoring de-assert INIT to vcpu 0
Ignoring de-assert INIT to vcpu 1
Ignoring de-assert INIT to vcpu 2
Ignoring de-assert INIT to vcpu 3
handle_exception: unexpected, vectoring info 0x8406 intr info 0x8b0c

With today's git tree (commit ea8b7f0542e0420240d057f7954808c65c4d13fc).

Flexpriority enabled host. 

qemu/x86_64-softmmu/qemu-system-x86_64 -hda /root/images/win2003.img \
-cdrom /root/isos/en_windows_server_2003_with_sp1_enterprise.iso \
 -m 2000 -usbdevice tablet -vnc :2 -smp 4   -boot d 


--

Comment By: SourceForge Robot (sf-robot)
Date: 2009-08-25 02:20

Message:
This Tracker item was closed automatically by the system. It was
previously set to a Pending status, and the original submitter
did not respond within 14 days (the time period specified by
the administrator of this Tracker).

--

Comment By: Avi Kivity (avik)
Date: 2009-08-10 12:30

Message:
Still happens?

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2024740group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ kvm-Bugs-2490866 ] repeatable corruption with qcow2 on kvm-79

2009-08-24 Thread SourceForge.net

Bugs item #2490866, was opened at 2009-01-06 21:10
Message generated for change (Comment added) made by sf-robot
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2490866group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: qemu
Group: None
Status: Closed
Resolution: Fixed
Priority: 5
Private: No
Submitted By: Adrian Bridgett (abridgett)
Assigned to: Nobody/Anonymous (nobody)
Summary: repeatable corruption with qcow2 on kvm-79

Initial Comment:
Creating a qcow2 image, mkfs.ext3, sometimes mounting it would fail 
immediately, but in all cases it would corrupt (overwritten with zeros) after 
starting up backuppc on it.   This is KVM-79 on a Debian lenny host and guest.

This occured using virtio or not.  Swapping to a raw file or LV worked 
flawlessly.  I've tested the box with memtest and I don't have issues elsewhere 
but I've seen corruptions on other images.  host and guest are both 
2.6.26-1-adm64 kernel (debian lenny) I'm running 32-bit userspace everywhere.  
Dual core Intel Core2 E6300.

I see KVM-81 has improve qcow2 data integrity with cache=writethrough which 
_might_ be what I'm hitting - but I can't find more details about this to check 
(and backport patch to debian package or wait for newer debian package).

thanks.

--

Comment By: SourceForge Robot (sf-robot)
Date: 2009-08-25 02:20

Message:
This Tracker item was closed automatically by the system. It was
previously set to a Pending status, and the original submitter
did not respond within 14 days (the time period specified by
the administrator of this Tracker).

--

Comment By: Avi Kivity (avik)
Date: 2009-08-10 12:18

Message:
Should be fixed in kvm-88.

--

Comment By: Adrian Bridgett (abridgett)
Date: 2009-01-24 14:17

Message:
I've tested with new debian KVM-83 package - which also removes that fedora
patch.  I can't reproduce the corruption I had anymore.  Feel free to close
this bug. Thanks.

--

Comment By: Daniel van Vugt (danv)
Date: 2009-01-21 06:47

Message:
As the original problem is resolved, it's description now inaccurate, and
my problem is ongoing, please close this bug. I will open a new one.

--

Comment By: Daniel van Vugt (danv)
Date: 2009-01-15 01:47

Message:
Reproduced qcow2 corruption again, using kvm-83 now. Same steps as before.

--

Comment By: Daniel van Vugt (danv)
Date: 2009-01-13 11:50

Message:
My problem is kvm-82 vanilla, compiled with no special options.

It happens consistently with kvm-82, but is not necessarily new to kvm-82
because I haven't used these images extensively with prior kvm releases.

No, I do not do anything dangerous like run multiple guests with the same
image or manage snapshots while the guest is up (only at pre-boot, when
stopped).

To reproduce repeatedly in the two cases I've seen it recently:

Case A: Windows Server 2003 (standard and enterprise) x86:
1. Boot guest (previously created under kvm-81)
2. Let Windows detect the new hardware (kvm-82 changes)
3. Reboot as instructed
4. Windows guest continues to work after multiple reboots and the console
shows snapshots intact
5. Shut down the Windows guest properly
6. Image is now invalid as soon as qemu(kvm) exits (not recognizable as
qcow2, not bootable and no snapshots reported by qemu-img info)

Case B: Windows 7 beta x86
1. Installed under kvm-82
2. Boot guest
3. Shut down guest
4. Image is now invalid as described.

This is a very serious problem. Thought it sounds like a new separate bug
may be required...

--

Comment By: Laszlo Dvornik (ldvornik)
Date: 2009-01-13 10:23

Message:
I narrowed down my problem to one Fedora patch, which include in Debian
too, but not in vanilla kvm, so vanilla kvm isn't effected.
The Fedora patch:
http://cvs.fedoraproject.org/viewvc/rpms/kvm/devel/kvm-62-block-rw-range-check.patch?revision=1.7view=markup
I removed the CVE-2008-0928-fedora.patch from the debian source package
and recompiled it, the partitioning problem solved.
Fedora bugzilla entry: https://bugzilla.redhat.com/show_bug.cgi?id=433560
A question. Is this vulnerability confirmed by qemu devels or why only
distributions fixed this? Their fix seems to  cause problems with qcow2,
qcow, vmdk formats.

The patch works well last in debian before 79+dfsg-3, when they started to
use Fedora's updated patch. Perhaps the packagers modified the patch for
newer kvms badly.
Debian kvm git repo's commit:

[ kvm-Bugs-1831632 ] Windows XP setup Performance regression

2009-08-24 Thread SourceForge.net

Bugs item #1831632, was opened at 2007-11-14 08:46
Message generated for change (Comment added) made by sf-robot
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=1831632group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Closed
Resolution: None
Priority: 5
Private: No
Submitted By: Technologov (technologov)
Assigned to: Nobody/Anonymous (nobody)
Summary: Windows XP setup Performance regression

Initial Comment:
Windows XP setup Performance drops slightly with nearly each KVM release. Below 
is the summary table:

KVM-36 - ~14:00
KVM-37 - ~15:00
KVM-40 - ~15:00
KVM-42 - ~15:00
KVM-43 - ~16:00
KVM-44 - ~16:00
KVM-45 - ~16:00
KVM-48 - ~16:00
KVM-51 - ~18:00 
KVM-52 - ~19:00 

As of now, I don't know what causes that.

-Alexey

--

Comment By: SourceForge Robot (sf-robot)
Date: 2009-08-25 02:20

Message:
This Tracker item was closed automatically by the system. It was
previously set to a Pending status, and the original submitter
did not respond within 14 days (the time period specified by
the administrator of this Tracker).

--

Comment By: Avi Kivity (avik)
Date: 2009-08-10 12:32

Message:
How does it measure now?

--

Comment By: Technologov (technologov)
Date: 2007-11-14 08:48

Message:
Logged In: YES 
user_id=1839746
Originator: YES

Tests performed on Intel Core 2 - 2.0 GHz (Xeon 5130), Fedora 7 x86-64.

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=1831632group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ kvm-Bugs-1929279 ] kernel BUG at kvm-64/kernel/mmu.c:560!

2009-08-24 Thread SourceForge.net

Bugs item #1929279, was opened at 2008-03-30 11:21
Message generated for change (Settings changed) made by sf-robot
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=1929279group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: kernel
Group: None
Status: Closed
Resolution: None
Priority: 5
Private: No
Submitted By: buggaboo (buggaboo)
Assigned to: Nobody/Anonymous (nobody)
Summary: kernel BUG at kvm-64/kernel/mmu.c:560!

Initial Comment:
# what cpu model:   AMD Athlon(tm) 64 X2 Dual Core Processor 4000+
# kvm version: kvm-64
# host kernel version: 2.6.24.4-x86_64-smp-tuxonice, vanilla + tuxonice
# host kernel arch: x86_64
# guest: Windows XP sp2, bitness: 32
# qemu command line:
vdeq qemu-system-x86_64 -m 512 -smp 2 \
-soundhw sb16,es1370 \
-usb -usbdevice tablet \
-net vde,vlan=0,sock=/var/run/vde.ctl \
-net nic,vlan=0,macaddr=52:54:00:00:AA:02 \
-hda vdisk-winxp-tooled-out1.img -smb virtual 
# -no-kvm switch: refuses to boot without kvm

This kernel bug seems to be triggered everytime when I attempt to install 
http://downloads.sourceforge.net/andlinux/andlinux-beta1rc6-kde.exe

also the guest hangs when I leave it alone after ~2 hours or so without giving 
any input.





--

Comment By: SourceForge Robot (sf-robot)
Date: 2009-08-25 02:20

Message:
This Tracker item was closed automatically by the system. It was
previously set to a Pending status, and the original submitter
did not respond within 14 days (the time period specified by
the administrator of this Tracker).

--

Comment By: Avi Kivity (avik)
Date: 2009-08-10 12:39

Message:
How exactly do you install this andlinux?

Note the bug may also be related to a hardware error.  Can you try this on
a different machine?

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=1929279group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: vhost net: performance with ping benchmark

2009-08-24 Thread Anthony Liguori


Michael S. Tsirkin wrote:

On Mon, Aug 24, 2009 at 11:12:41AM +0300, Michael S. Tsirkin wrote:
  

At Rusty's suggestion, I tested vhost base performance with ping.
Results below, and seem to be what you'd expect.



Rusty, any chance you could look at the code?  Is it in reasonable
shape? I think it makes sense to merge it through you. What do you
think?  One comment on file placement: I put files under a separate
vhost directory to avoid confusion with virtio-net which runs in guest.
Does this sound sane?  Also, can a minimal version (without TSO, tap or
any other features) be merged upstream first so that features can be
added later? Or do we have to wait until it's more full featured?
Finally, can it reasonably make 2.6.32, or you think it needs more time
out of tree?
  


I think 2.6.32 is pushing it.  I think some time is needed to flush out 
the userspace interface.  In particular, I don't think Mark's comments 
have been adequately addressed.  If a version were merged without GSO 
support, some mechanism to do feature detection would be needed in the 
userspace API.  I think this is likely going to be needed regardless.  I 
also think the tap compatibility suggestion would simplify the 
consumption of this in userspace.


I'd like some time to look at get_state/set_state ioctl()s along with 
dirty tracking support.  It's a much better model for live migration IMHO.


I think so more thorough benchmarking would be good too.  In particular, 
netperf/iperf runs would be nice.


Regards,

Anthony Liguori


Thanks very much,

  


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

kvm-76 aborted on on ibm server

2009-08-24 Thread Haneef Syed

Using kvm-76 on 2.6.21mv kernel on ibm server x3650(64bit processor) 
target, I got the 

following error.Any suggestions..

unhandled vm exit: 0x8021 vcpu_id 0
rax b101 rbx  rcx  rdx 

rsi  rdi  rsp 0ff8 rbp 

r8   r9   r10  r11 

r12  r13  r14  r15 

rip fe6e rflags 00023002
cs 0100 (/ p 1 dpl 0 db 1 s 1 type b l 0 g 1 avl 0)
ds 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0)
es  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
ss 0108 (/ p 1 dpl 0 db 1 s 1 type 3 l 0 g 1 avl 0)
fs  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
gs  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
tr 0148 (c04405c0/2067 p 1 dpl 0 db 0 s 0 type b l 0 g 0 avl 
0)
ldt  (/ p 0 dpl 0 db 0 s 0 type 0 l 0 g 0 avl 0)
gdt 5020/2cf
idt 52f0/7ff
cr0 8001003b cr2 0 cr3 1005000 cr4 2d4 cr8 0 efer 0
Aborted


__
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

KVM pvmmu: do not batch pte updates from interrupt context

2009-08-24 Thread Marcelo Tosatti


Commit b8bcfe997e4 made paravirt pte updates synchronous in interrupt
context.

Unfortunately the KVM pv mmu code caches the lazy/nonlazy mode
internally, so a pte update from interrupt context during a lazy mmu
operation can be batched while it should be performed synchronously.

https://bugzilla.redhat.com/show_bug.cgi?id=518022

Drop the internal mode variable and use paravirt_get_lazy_mode(), which
returns the correct state.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index c664d51..63b0ec8 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -34,7 +34,6 @@
 struct kvm_para_state {
u8 mmu_queue[MMU_QUEUE_SIZE];
int mmu_queue_len;
-   enum paravirt_lazy_mode mode;
 };
 
 static DEFINE_PER_CPU(struct kvm_para_state, para_state);
@@ -77,7 +76,7 @@ static void kvm_deferred_mmu_op(void *buffer, int len)
 {
struct kvm_para_state *state = kvm_para_state();
 
-   if (state-mode != PARAVIRT_LAZY_MMU) {
+   if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) {
kvm_mmu_op(buffer, len);
return;
}
@@ -185,10 +184,7 @@ static void kvm_release_pt(unsigned long pfn)
 
 static void kvm_enter_lazy_mmu(void)
 {
-   struct kvm_para_state *state = kvm_para_state();
-
paravirt_enter_lazy_mmu();
-   state-mode = paravirt_get_lazy_mode();
 }
 
 static void kvm_leave_lazy_mmu(void)
@@ -197,7 +193,6 @@ static void kvm_leave_lazy_mmu(void)
 
mmu_queue_flush(state);
paravirt_leave_lazy_mmu();
-   state-mode = paravirt_get_lazy_mode();
 }
 
 static void __init paravirt_ops_setup(void)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: vhost net: performance with ping benchmark

2009-08-24 Thread Avi Kivity


On 08/25/2009 05:22 AM, Anthony Liguori wrote:


I think 2.6.32 is pushing it. 


2.6.32 is pushing it, but we need to push it.

I think some time is needed to flush out the userspace interface.  In 
particular, I don't think Mark's comments have been adequately 
addressed.  If a version were merged without GSO support, some 
mechanism to do feature detection would be needed in the userspace API. 


I don't see any point  in merging without gso (unless it beats userspace 
with gso, which I don't think will happen).  In any case we'll need 
feature negotiation.


I think this is likely going to be needed regardless.  I also think 
the tap compatibility suggestion would simplify the consumption of 
this in userspace.


What about veth pairs?

I'd like some time to look at get_state/set_state ioctl()s along with 
dirty tracking support.  It's a much better model for live migration 
IMHO.


My preference is ring proxying.  Not we'll need ring proxying (or at 
least event proxying) for non-MSI guests.


I think so more thorough benchmarking would be good too.  In 
particular, netperf/iperf runs would be nice.


Definitely.

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/2] eventfd: new EFD_STATE flag

2009-08-24 Thread Avi Kivity


On 08/25/2009 01:08 AM, Davide Libenzi wrote:

Is that really difficult to understand where I'm standing, leaving the KVM
hat off for a moment?
   


I understand it perfectly.  I take the same position with kvm.  I'm 
providing more data in the hope that you'll change you mind, not trying 
to flood you with email so you'll give up.


We can always create our eventfd-lookalike for kvm, but I'd rather not 
do that (other options include a userspace proxy through existing 
interfaces, it might even be better than changing eventfd if we decide 
performance for level-triggered interrupts is not critical).


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

65 matches

Mail list logo