date:20090514

[COMMIT master] KVM: Expand on help info to specify kvm intel and amd module names

2009-05-14 Thread Avi Kivity

From: Robert P. J. Day rpj...@crashcourse.ca

Signed-off-by: Robert P. J. Day rpj...@crashcourse.ca
Cc: Avi Kivity a...@redhat.com
Signed-off-by: Andrew Morton a...@linux-foundation.org
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a58504e..8600a09 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -50,6 +50,9 @@ config KVM_INTEL
  Provides support for KVM on Intel processors equipped with the VT
  extensions.
 
+ To compile this as a module, choose M here: the module
+ will be called kvm-intel.
+
 config KVM_AMD
tristate KVM for AMD processors support
depends on KVM
@@ -57,6 +60,9 @@ config KVM_AMD
  Provides support for KVM on AMD processors equipped with the AMD-V
  (SVM) extensions.
 
+ To compile this as a module, choose M here: the module
+ will be called kvm-amd.
+
 config KVM_TRACE
bool KVM trace support
depends on KVM  SYSFS
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: x86: check for cr3 validity in mmu_alloc_roots

2009-05-14 Thread Avi Kivity

From: Marcelo Tosatti mtosa...@redhat.com

Verify the cr3 address stored in vcpu-arch.cr3 points to an existant
memslot. If not, inject a triple fault.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 80c76f4..479e748 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1912,7 +1912,19 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
vcpu-arch.mmu.root_hpa = INVALID_PAGE;
 }
 
-static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
+static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
+{
+   int ret = 0;
+
+   if (!kvm_is_visible_gfn(vcpu-kvm, root_gfn)) {
+   set_bit(KVM_REQ_TRIPLE_FAULT, vcpu-requests);
+   ret = 1;
+   }
+
+   return ret;
+}
+
+static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
 {
int i;
gfn_t root_gfn;
@@ -1927,13 +1939,15 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
if (tdp_enabled)
direct = 1;
+   if (mmu_check_root(vcpu, root_gfn))
+   return 1;
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
  PT64_ROOT_LEVEL, direct,
  ACC_ALL, NULL);
root = __pa(sp-spt);
++sp-root_count;
vcpu-arch.mmu.root_hpa = root;
-   return;
+   return 0;
}
direct = !is_paging(vcpu);
if (tdp_enabled)
@@ -1950,6 +1964,8 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
root_gfn = vcpu-arch.pdptrs[i]  PAGE_SHIFT;
} else if (vcpu-arch.mmu.root_level == 0)
root_gfn = 0;
+   if (mmu_check_root(vcpu, root_gfn))
+   return 1;
sp = kvm_mmu_get_page(vcpu, root_gfn, i  30,
  PT32_ROOT_LEVEL, direct,
  ACC_ALL, NULL);
@@ -1958,6 +1974,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
vcpu-arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
}
vcpu-arch.mmu.root_hpa = __pa(vcpu-arch.mmu.pae_root);
+   return 0;
 }
 
 static void mmu_sync_roots(struct kvm_vcpu *vcpu)
@@ -1976,7 +1993,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
for (i = 0; i  4; ++i) {
hpa_t root = vcpu-arch.mmu.pae_root[i];
 
-   if (root) {
+   if (root  VALID_PAGE(root)) {
root = PT64_BASE_ADDR_MASK;
sp = page_header(root);
mmu_sync_children(vcpu, sp);
@@ -2311,9 +2328,11 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
goto out;
spin_lock(vcpu-kvm-mmu_lock);
kvm_mmu_free_some_pages(vcpu);
-   mmu_alloc_roots(vcpu);
+   r = mmu_alloc_roots(vcpu);
mmu_sync_roots(vcpu);
spin_unlock(vcpu-kvm-mmu_lock);
+   if (r)
+   goto out;
kvm_x86_ops-set_cr3(vcpu, vcpu-arch.mmu.root_hpa);
kvm_mmu_flush_tlb(vcpu);
 out:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ecc35c6..33f850b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4564,6 +4564,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
kvm_mmu_zap_all(kvm);
+   kvm_reload_remote_mmus(kvm);
 }
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: s390: Unlink vcpu on destroy - v2

2009-05-14 Thread Avi Kivity

From: Carsten Otte co...@de.ibm.com

This patch makes sure we do unlink a vcpu's sie control block
from the system control area in kvm_arch_vcpu_destroy. This
prevents illegal accesses to the sie control block from other
virtual cpus after free.

Reported-by: Mijo Safradin m...@linux.vnet.ibm.com
Signed-off-by: Carsten Otte co...@de.ibm.com
Signed-off-by: Christian Ehrhardt ehrha...@de.ibm.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 36c654d..628494a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -196,6 +196,10 @@ out_nokvm:
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
VCPU_EVENT(vcpu, 3, %s, free cpu);
+   if (vcpu-kvm-arch.sca-cpu[vcpu-vcpu_id].sda ==
+   (__u64) vcpu-arch.sie_block)
+   vcpu-kvm-arch.sca-cpu[vcpu-vcpu_id].sda = 0;
+   smp_mb();
free_page((unsigned long)(vcpu-arch.sie_block));
kvm_vcpu_uninit(vcpu);
kfree(vcpu);
@@ -310,8 +314,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
vcpu-arch.sie_block-icpua = id;
BUG_ON(!kvm-arch.sca);
-   BUG_ON(kvm-arch.sca-cpu[id].sda);
-   kvm-arch.sca-cpu[id].sda = (__u64) vcpu-arch.sie_block;
+   if (!kvm-arch.sca-cpu[id].sda)
+   kvm-arch.sca-cpu[id].sda = (__u64) vcpu-arch.sie_block;
+   else
+   BUG_ON(!kvm-vcpus[id]); /* vcpu does already exist */
vcpu-arch.sie_block-scaoh = (__u32)(((__u64)kvm-arch.sca)  32);
vcpu-arch.sie_block-scaol = (__u32)(__u64)kvm-arch.sca;
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: Unprotect a page if #PF happens during NMI injection.

2009-05-14 Thread Avi Kivity

From: Gleb Natapov g...@redhat.com

It is done for exception and interrupt already.

Signed-off-by: Gleb Natapov g...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8b5ffbd..ac3d5ba 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1122,8 +1122,7 @@ static int pf_interception(struct vcpu_svm *svm, struct 
kvm_run *kvm_run)
if (npt_enabled)
svm_flush_tlb(svm-vcpu);
else {
-   if (svm-vcpu.arch.interrupt.pending ||
-   svm-vcpu.arch.exception.pending)
+   if (kvm_event_needs_reinjection(svm-vcpu))
kvm_mmu_unprotect_page_virt(svm-vcpu, fault_address);
}
return kvm_mmu_page_fault(svm-vcpu, fault_address, error_code);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f3ab27b..8981654 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2615,7 +2615,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
cr2 = vmcs_readl(EXIT_QUALIFICATION);
KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
(u32)((u64)cr2  32), handler);
-   if (vcpu-arch.interrupt.pending || 
vcpu-arch.exception.pending)
+   if (kvm_event_needs_reinjection(vcpu))
kvm_mmu_unprotect_page_virt(vcpu, cr2);
return kvm_mmu_page_fault(vcpu, cr2, error_code);
}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 39350b2..21203d4 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -30,4 +30,10 @@ static inline u8 kvm_pop_irq(struct kvm_vcpu *vcpu)
clear_bit(word_index, vcpu-arch.irq_summary);
return irq;
 }
+
+static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
+{
+   return vcpu-arch.exception.pending || vcpu-arch.interrupt.pending ||
+   vcpu-arch.nmi_injected;
+}
 #endif
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: s390: use hrtimer for clock wakeup from idle - v2

2009-05-14 Thread Avi Kivity

From: Christian Borntraeger borntrae...@de.ibm.com

This patch reworks the s390 clock comparator wakeup to hrtimer. The clock
comparator is a per-cpu value that is compared against the TOD clock. If
ckc = TOD an external interrupt 1004 is triggered. Since the clock comparator
and the TOD clock have a much higher resolution than jiffies we should use
hrtimers to trigger the wakeup. This speeds up guest nanosleep for small
values.

Since hrtimers callbacks run in hard-irq context, I added a tasklet to do
the actual work with enabled interrupts.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
Signed-off-by: Carsten Otte co...@de.ibm.com
Signed-off-by: Christian Ehrhardt ehrha...@de.ibm.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 54ea39f..a27d0d5 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -13,6 +13,8 @@
 
 #ifndef ASM_KVM_HOST_H
 #define ASM_KVM_HOST_H
+#include linux/hrtimer.h
+#include linux/interrupt.h
 #include linux/kvm_host.h
 #include asm/debug.h
 #include asm/cpuid.h
@@ -210,7 +212,8 @@ struct kvm_vcpu_arch {
s390_fp_regs  guest_fpregs;
unsigned int  guest_acrs[NUM_ACRS];
struct kvm_s390_local_interrupt local_int;
-   struct timer_list ckc_timer;
+   struct hrtimerckc_timer;
+   struct tasklet_struct tasklet;
union  {
cpuid_t   cpu_id;
u64   stidp_data;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 4ed4c3a..a48830f 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -12,6 +12,8 @@
 
 #include asm/lowcore.h
 #include asm/uaccess.h
+#include linux/hrtimer.h
+#include linux/interrupt.h
 #include linux/kvm_host.h
 #include linux/signal.h
 #include kvm-s390.h
@@ -361,12 +363,10 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
return 0;
}
 
-   sltime = (vcpu-arch.sie_block-ckc - now) / (0xf424ul / HZ) + 1;
+   sltime = ((vcpu-arch.sie_block-ckc - now)*125)9;
 
-   vcpu-arch.ckc_timer.expires = jiffies + sltime;
-
-   add_timer(vcpu-arch.ckc_timer);
-   VCPU_EVENT(vcpu, 5, enabled wait timer:%llx jiffies, sltime);
+   hrtimer_start(vcpu-arch.ckc_timer, ktime_set (0, sltime) , 
HRTIMER_MODE_REL);
+   VCPU_EVENT(vcpu, 5, enabled wait via clock comparator: %llx ns, 
sltime);
 no_timer:
spin_lock_bh(vcpu-arch.local_int.float_int-lock);
spin_lock_bh(vcpu-arch.local_int.lock);
@@ -389,21 +389,34 @@ no_timer:
remove_wait_queue(vcpu-wq, wait);
spin_unlock_bh(vcpu-arch.local_int.lock);
spin_unlock_bh(vcpu-arch.local_int.float_int-lock);
-   del_timer(vcpu-arch.ckc_timer);
+   hrtimer_try_to_cancel(vcpu-arch.ckc_timer);
return 0;
 }
 
-void kvm_s390_idle_wakeup(unsigned long data)
+void kvm_s390_tasklet(unsigned long parm)
 {
-   struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+   struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm;
 
-   spin_lock_bh(vcpu-arch.local_int.lock);
+   spin_lock(vcpu-arch.local_int.lock);
vcpu-arch.local_int.timer_due = 1;
if (waitqueue_active(vcpu-arch.local_int.wq))
wake_up_interruptible(vcpu-arch.local_int.wq);
-   spin_unlock_bh(vcpu-arch.local_int.lock);
+   spin_unlock(vcpu-arch.local_int.lock);
 }
 
+/*
+ * low level hrtimer wake routine. Because this runs in hardirq context
+ * we schedule a tasklet to do the real work.
+ */
+enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
+{
+   struct kvm_vcpu *vcpu;
+
+   vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
+   tasklet_schedule(vcpu-arch.tasklet);
+
+   return HRTIMER_NORESTART;
+}
 
 void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 86567e1..dc3d068 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -15,6 +15,7 @@
 #include linux/compiler.h
 #include linux/err.h
 #include linux/fs.h
+#include linux/hrtimer.h
 #include linux/init.h
 #include linux/kvm.h
 #include linux/kvm_host.h
@@ -283,8 +284,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu-arch.sie_block-gmsor = vcpu-kvm-arch.guest_origin;
vcpu-arch.sie_block-ecb   = 2;
vcpu-arch.sie_block-eca   = 0xC1002001U;
-   setup_timer(vcpu-arch.ckc_timer, kvm_s390_idle_wakeup,
-(unsigned long) vcpu);
+   hrtimer_init(vcpu-arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+   tasklet_init(vcpu-arch.tasklet, kvm_s390_tasklet,
+(unsigned long) vcpu);
+   vcpu-arch.ckc_timer.function = kvm_s390_idle_wakeup;
get_cpu_id(vcpu-arch.cpu_id);
vcpu-arch.cpu_id.version = 0xff;
return 0;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 00bbe69..748fee8 100644
---

[COMMIT master] KVM: s390: optimize float int lock: spin_lock_bh -- spin_lock

2009-05-14 Thread Avi Kivity

From: Christian Borntraeger borntrae...@de.ibm.com

The floating interrupt lock is only taken in process context. We can
replace all spin_lock_bh with standard spin_lock calls.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
Signed-off-by: Christian Ehrhardt ehrha...@de.ibm.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index a48830f..f04f530 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -301,13 +301,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
}
 
if ((!rc)  atomic_read(fi-active)) {
-   spin_lock_bh(fi-lock);
+   spin_lock(fi-lock);
list_for_each_entry(inti, fi-list, list)
if (__interrupt_is_deliverable(vcpu, inti)) {
rc = 1;
break;
}
-   spin_unlock_bh(fi-lock);
+   spin_unlock(fi-lock);
}
 
if ((!rc)  (vcpu-arch.sie_block-ckc 
@@ -368,7 +368,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
hrtimer_start(vcpu-arch.ckc_timer, ktime_set (0, sltime) , 
HRTIMER_MODE_REL);
VCPU_EVENT(vcpu, 5, enabled wait via clock comparator: %llx ns, 
sltime);
 no_timer:
-   spin_lock_bh(vcpu-arch.local_int.float_int-lock);
+   spin_lock(vcpu-arch.local_int.float_int-lock);
spin_lock_bh(vcpu-arch.local_int.lock);
add_wait_queue(vcpu-arch.local_int.wq, wait);
while (list_empty(vcpu-arch.local_int.list) 
@@ -377,18 +377,18 @@ no_timer:
!signal_pending(current)) {
set_current_state(TASK_INTERRUPTIBLE);
spin_unlock_bh(vcpu-arch.local_int.lock);
-   spin_unlock_bh(vcpu-arch.local_int.float_int-lock);
+   spin_unlock(vcpu-arch.local_int.float_int-lock);
vcpu_put(vcpu);
schedule();
vcpu_load(vcpu);
-   spin_lock_bh(vcpu-arch.local_int.float_int-lock);
+   spin_lock(vcpu-arch.local_int.float_int-lock);
spin_lock_bh(vcpu-arch.local_int.lock);
}
__unset_cpu_idle(vcpu);
__set_current_state(TASK_RUNNING);
remove_wait_queue(vcpu-wq, wait);
spin_unlock_bh(vcpu-arch.local_int.lock);
-   spin_unlock_bh(vcpu-arch.local_int.float_int-lock);
+   spin_unlock(vcpu-arch.local_int.float_int-lock);
hrtimer_try_to_cancel(vcpu-arch.ckc_timer);
return 0;
 }
@@ -455,7 +455,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu 
*vcpu)
if (atomic_read(fi-active)) {
do {
deliver = 0;
-   spin_lock_bh(fi-lock);
+   spin_lock(fi-lock);
list_for_each_entry_safe(inti, n, fi-list, list) {
if (__interrupt_is_deliverable(vcpu, inti)) {
list_del(inti-list);
@@ -466,7 +466,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu 
*vcpu)
}
if (list_empty(fi-list))
atomic_set(fi-active, 0);
-   spin_unlock_bh(fi-lock);
+   spin_unlock(fi-lock);
if (deliver) {
__do_deliver_interrupt(vcpu, inti);
kfree(inti);
@@ -531,7 +531,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 
mutex_lock(kvm-lock);
fi = kvm-arch.float_int;
-   spin_lock_bh(fi-lock);
+   spin_lock(fi-lock);
list_add_tail(inti-list, fi-list);
atomic_set(fi-active, 1);
sigcpu = find_first_bit(fi-idle_mask, KVM_MAX_VCPUS);
@@ -548,7 +548,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
if (waitqueue_active(li-wq))
wake_up_interruptible(li-wq);
spin_unlock_bh(li-lock);
-   spin_unlock_bh(fi-lock);
+   spin_unlock(fi-lock);
mutex_unlock(kvm-lock);
return 0;
 }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index dc3d068..36c654d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -318,11 +318,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
spin_lock_init(vcpu-arch.local_int.lock);
INIT_LIST_HEAD(vcpu-arch.local_int.list);
vcpu-arch.local_int.float_int = kvm-arch.float_int;
-   spin_lock_bh(kvm-arch.float_int.lock);
+   spin_lock(kvm-arch.float_int.lock);
kvm-arch.float_int.local_int[id] = vcpu-arch.local_int;
init_waitqueue_head(vcpu-arch.local_int.wq);
vcpu-arch.local_int.cpuflags = vcpu-arch.sie_block-cpuflags;
-   spin_unlock_bh(kvm-arch.float_int.lock);
+   spin_unlock(kvm-arch.float_int.lock);
 
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
diff --git a/arch/s390/kvm/priv.c

[COMMIT master] KVM: s390: Verify memory in kvm run

2009-05-14 Thread Avi Kivity

From: Carsten Otte co...@de.ibm.com

This check verifies that the guest we're trying to run in KVM_RUN
has some memory assigned to it. It enters an endless exception
loop if this is not the case.

Reported-by: Mijo Safradin m...@linux.vnet.ibm.com
Signed-off-by: Carsten Otte co...@de.ibm.com
Signed-off-by: Christian Ehrhardt ehrha...@de.ibm.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 628494a..10bccd1 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -487,6 +487,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
 
vcpu_load(vcpu);
 
+   /* verify, that memory has been registered */
+   if (!vcpu-kvm-arch.guest_memsize) {
+   vcpu_put(vcpu);
+   return -EINVAL;
+   }
+
if (vcpu-sigset_active)
sigprocmask(SIG_SETMASK, vcpu-sigset, sigsaved);
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: Do not allow interrupt injection from userspace if there is a pending event.

2009-05-14 Thread Avi Kivity

From: Gleb Natapov g...@redhat.com

The exception will immediately close the interrupt window.

Signed-off-by: Gleb Natapov g...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 33f850b..d9396a7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3101,8 +3101,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
kvm_run-ready_for_interrupt_injection = 1;
else
kvm_run-ready_for_interrupt_injection =
-   (kvm_arch_interrupt_allowed(vcpu) 
-!kvm_cpu_has_interrupt(vcpu));
+   kvm_arch_interrupt_allowed(vcpu) 
+   !kvm_cpu_has_interrupt(vcpu) 
+   !kvm_event_needs_reinjection(vcpu);
 }
 
 static void vapic_enter(struct kvm_vcpu *vcpu)
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: skip_emulated_instruction() decode instruction if size is not known

2009-05-14 Thread Avi Kivity

From: Gleb Natapov g...@redhat.com

Signed-off-by: Gleb Natapov g...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ac3d5ba..1315ce0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -228,7 +228,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
 
if (!svm-next_rip) {
-   printk(KERN_DEBUG %s: NOP\n, __func__);
+   if (emulate_instruction(vcpu, vcpu-run, 0, 0, EMULTYPE_SKIP) !=
+   EMULATE_DONE)
+   printk(KERN_DEBUG %s: NOP\n, __func__);
return;
}
if (svm-next_rip - kvm_rip_read(vcpu)  MAX_INST_SIZE)
@@ -1868,11 +1870,8 @@ static int task_switch_interception(struct vcpu_svm *svm,
if (reason != TASK_SWITCH_GATE ||
int_type == SVM_EXITINTINFO_TYPE_SOFT ||
(int_type == SVM_EXITINTINFO_TYPE_EXEPT 
-(int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
-   if (emulate_instruction(svm-vcpu, kvm_run, 0, 0,
-   EMULTYPE_SKIP) != EMULATE_DONE)
-   return 0;
-   }
+(int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
+   skip_emulated_instruction(svm-vcpu);
 
return kvm_task_switch(svm-vcpu, tss_selector, reason);
 }
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: Disable CR8 intercept if tpr patching is active

2009-05-14 Thread Avi Kivity

From: Gleb Natapov g...@redhat.com

Signed-off-by: Gleb Natapov g...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7037afa..44e87a5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3138,7 +3138,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
if (!kvm_x86_ops-update_cr8_intercept)
return;
 
-   max_irr = kvm_lapic_find_highest_irr(vcpu);
+   if (!vcpu-arch.apic-vapic_addr)
+   max_irr = kvm_lapic_find_highest_irr(vcpu);
+   else
+   max_irr = -1;
 
if (max_irr != -1)
max_irr = 4;
@@ -3245,10 +3248,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, 
struct kvm_run *kvm_run)
kvm_x86_ops-enable_irq_window(vcpu);
 
if (kvm_lapic_enabled(vcpu)) {
-   if (!vcpu-arch.apic-vapic_addr)
-   update_cr8_intercept(vcpu);
-   else
-   kvm_lapic_sync_to_vapic(vcpu);
+   update_cr8_intercept(vcpu);
+   kvm_lapic_sync_to_vapic(vcpu);
}
 
up_read(vcpu-kvm-slots_lock);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: Do not re-execute INTn instruction.

2009-05-14 Thread Avi Kivity

From: Gleb Natapov g...@redhat.com

Re-inject event instead. This is what Intel suggest. Also use correct
instruction length when re-injecting soft fault/interrupt.

Signed-off-by: Gleb Natapov g...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4347cc3..b5b3a72 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -319,6 +319,8 @@ struct kvm_vcpu_arch {
struct kvm_pio_request pio;
void *pio_data;
 
+   u8 event_exit_inst_len;
+
struct kvm_queued_exception {
bool pending;
bool has_error_code;
@@ -328,6 +330,7 @@ struct kvm_vcpu_arch {
 
struct kvm_queued_interrupt {
bool pending;
+   bool soft;
u8 nr;
} interrupt;
 
@@ -511,7 +514,7 @@ struct kvm_x86_ops {
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
unsigned char *hypercall_addr);
-   void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
+   void (*set_irq)(struct kvm_vcpu *vcpu);
void (*set_nmi)(struct kvm_vcpu *vcpu);
void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
bool has_error_code, u32 error_code);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1315ce0..377c4f1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2310,13 +2310,13 @@ static void svm_queue_irq(struct kvm_vcpu *vcpu, 
unsigned nr)
SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
 }
 
-static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
+static void svm_set_irq(struct kvm_vcpu *vcpu)
 {
struct vcpu_svm *svm = to_svm(vcpu);
 
nested_svm_intr(svm);
 
-   svm_queue_irq(vcpu, irq);
+   svm_queue_irq(vcpu, vcpu-arch.interrupt.nr);
 }
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
@@ -2418,7 +2418,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
case SVM_EXITINTINFO_TYPE_EXEPT:
/* In case of software exception do not reinject an exception
   vector, but re-execute and instruction instead */
-   if (vector == BP_VECTOR || vector == OF_VECTOR)
+   if (kvm_exception_is_soft(vector))
break;
if (exitintinfo  SVM_EXITINTINFO_VALID_ERR) {
u32 err = svm-vmcb-control.exit_int_info_err;
@@ -2428,7 +2428,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
kvm_queue_exception(svm-vcpu, vector);
break;
case SVM_EXITINTINFO_TYPE_INTR:
-   kvm_queue_interrupt(svm-vcpu, vector);
+   kvm_queue_interrupt(svm-vcpu, vector, false);
break;
default:
break;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8981654..29b49f0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -801,8 +801,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, 
unsigned nr,
return;
}
 
-   if (nr == BP_VECTOR || nr == OF_VECTOR) {
-   vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+   if (kvm_exception_is_soft(nr)) {
+   vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+vmx-vcpu.arch.event_exit_inst_len);
intr_info |= INTR_TYPE_SOFT_EXCEPTION;
} else
intr_info |= INTR_TYPE_HARD_EXCEPTION;
@@ -2445,9 +2446,11 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 }
 
-static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
+static void vmx_inject_irq(struct kvm_vcpu *vcpu)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
+   uint32_t intr;
+   int irq = vcpu-arch.interrupt.nr;
 
KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
 
@@ -2462,8 +2465,14 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int 
irq)
kvm_rip_write(vcpu, vmx-rmode.irq.rip - 1);
return;
}
-   vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-   irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
+   intr = irq | INTR_INFO_VALID_MASK;
+   if (vcpu-arch.interrupt.soft) {
+   intr |= INTR_TYPE_SOFT_INTR;
+   vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+vmx-vcpu.arch.event_exit_inst_len);
+   } else
+   intr |= INTR_TYPE_EXT_INTR;
+   vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
 }
 
 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -3024,6 +3033,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, 
struct kvm_run *kvm_run)
  GUEST_INTR_STATE_NMI);
break;
case

[COMMIT master] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6

2009-05-14 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

Conflicts:
arch/x86/kvm/x86.c

Signed-off-by: Avi Kivity a...@redhat.com
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: Always request IRQ/NMI window if an interrupt is pending

2009-05-14 Thread Avi Kivity

From: Gleb Natapov g...@redhat.com

Currently they are not requested if there is pending exception.

Signed-off-by: Gleb Natapov g...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e395ca4..efba9bc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3148,8 +3148,11 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
kvm_x86_ops-update_cr8_intercept(vcpu, tpr, max_irr);
 }
 
-static void inject_irq(struct kvm_vcpu *vcpu)
+static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
+   if (vcpu-guest_debug  KVM_GUESTDBG_SINGLESTEP)
+   kvm_x86_ops-set_interrupt_shadow(vcpu, 0);
+
/* try to reinject previous events if any */
if (vcpu-arch.nmi_injected) {
kvm_x86_ops-set_nmi(vcpu);
@@ -3177,26 +3180,11 @@ static void inject_irq(struct kvm_vcpu *vcpu)
}
 }
 
-static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-   bool req_int_win = !irqchip_in_kernel(vcpu-kvm) 
-   kvm_run-request_interrupt_window;
-
-   if (vcpu-guest_debug  KVM_GUESTDBG_SINGLESTEP)
-   kvm_x86_ops-set_interrupt_shadow(vcpu, 0);
-
-   inject_irq(vcpu);
-
-   /* enable NMI/IRQ window open exits if needed */
-   if (vcpu-arch.nmi_pending)
-   kvm_x86_ops-enable_nmi_window(vcpu);
-   else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
-   kvm_x86_ops-enable_irq_window(vcpu);
-}
-
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
int r;
+   bool req_int_win = !irqchip_in_kernel(vcpu-kvm) 
+   kvm_run-request_interrupt_window;
 
if (vcpu-requests)
if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, vcpu-requests))
@@ -3250,6 +3238,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, 
struct kvm_run *kvm_run)
else
inject_pending_irq(vcpu, kvm_run);
 
+   /* enable NMI/IRQ window open exits if needed */
+   if (vcpu-arch.nmi_pending)
+   kvm_x86_ops-enable_nmi_window(vcpu);
+   else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+   kvm_x86_ops-enable_irq_window(vcpu);
+
if (kvm_lapic_enabled(vcpu)) {
if (!vcpu-arch.apic-vapic_addr)
update_cr8_intercept(vcpu);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: Move exit due to NMI handling into vmx_complete_interrupts()

2009-05-14 Thread Avi Kivity

From: Gleb Natapov g...@redhat.com

To save us one reading of VM_EXIT_INTR_INFO.

Signed-off-by: Gleb Natapov g...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 29b49f0..fe2ce2b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3261,8 +3261,17 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
int type;
bool idtv_info_valid;
 
-   idtv_info_valid = idt_vectoring_info  VECTORING_INFO_VALID_MASK;
exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
+   /* We need to handle NMIs before interrupts are enabled */
+   if ((exit_intr_info  INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR 
+   (exit_intr_info  INTR_INFO_VALID_MASK)) {
+   KVMTRACE_0D(NMI, vmx-vcpu, handler);
+   asm(int $2);
+   }
+
+   idtv_info_valid = idt_vectoring_info  VECTORING_INFO_VALID_MASK;
+
if (cpu_has_virtual_nmis()) {
unblock_nmi = (exit_intr_info  INTR_INFO_UNBLOCK_NMI) != 0;
vector = exit_intr_info  INTR_INFO_VECTOR_MASK;
@@ -3363,7 +3372,6 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx)
 static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
-   u32 intr_info;
 
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!cpu_has_virtual_nmis()  vmx-soft_vnmi_blocked))
@@ -3490,15 +3498,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
asm(mov %0, %%ds; mov %0, %%es : : r(__USER_DS));
vmx-launched = 1;
 
-   intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-
-   /* We need to handle NMIs before interrupts are enabled */
-   if ((intr_info  INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR 
-   (intr_info  INTR_INFO_VALID_MASK)) {
-   KVMTRACE_0D(NMI, vcpu, handler);
-   asm(int $2);
-   }
-
vmx_complete_interrupts(vmx);
 }
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: Do not migrate pending software interrupts.

2009-05-14 Thread Avi Kivity

From: Gleb Natapov g...@redhat.com

INTn will be re-executed after migration. If we wanted to migrate
pending software interrupt we would need to migrate interrupt type
and instruction length too, but we do not have all required info on
SVM, so SVM-VMX migration would need to re-execute INTn anyway. To
make it simple never migrate pending soft interrupt.

Signed-off-by: Gleb Natapov g...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index efba9bc..7037afa 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3575,7 +3575,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
memset(sregs-interrupt_bitmap, 0, sizeof sregs-interrupt_bitmap);
 
-   if (vcpu-arch.interrupt.pending)
+   if (vcpu-arch.interrupt.pending  !vcpu-arch.interrupt.soft)
set_bit(vcpu-arch.interrupt.nr,
(unsigned long *)sregs-interrupt_bitmap);
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH -tip] x86: kvm replace MSR_IA32_TIME_STAMP_COUNTER with MSR_IA32_TSC of msr-index.h

2009-05-14 Thread Jaswinder Singh Rajput


Use standard msr-index.h's MSR declaration.

MSR_IA32_TSC is better than MSR_IA32_TIME_STAMP_COUNTER as it also solves
80 column issue.

Signed-off-by: Jaswinder Singh Rajput jaswinderraj...@gmail.com
---
 arch/x86/include/asm/kvm_host.h |2 --
 arch/x86/kvm/svm.c  |4 ++--
 arch/x86/kvm/vmx.c  |4 ++--
 arch/x86/kvm/x86.c  |5 ++---
 4 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f0faf58..824f5e6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -750,8 +750,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 
error_code)
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
 }
 
-#define MSR_IA32_TIME_STAMP_COUNTER0x010
-
 #define TSS_IOPB_BASE_OFFSET 0x66
 #define TSS_BASE_SIZE 0x68
 #define TSS_IOPB_SIZE (65536 / 8)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1821c20..f149d17 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1889,7 +1889,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned 
ecx, u64 *data)
struct vcpu_svm *svm = to_svm(vcpu);
 
switch (ecx) {
-   case MSR_IA32_TIME_STAMP_COUNTER: {
+   case MSR_IA32_TSC: {
u64 tsc;
 
rdtscll(tsc);
@@ -1979,7 +1979,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned 
ecx, u64 data)
struct vcpu_svm *svm = to_svm(vcpu);
 
switch (ecx) {
-   case MSR_IA32_TIME_STAMP_COUNTER: {
+   case MSR_IA32_TSC: {
u64 tsc;
 
rdtscll(tsc);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bb48133..c8d3234 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -897,7 +897,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 
msr_index, u64 *pdata)
case MSR_EFER:
return kvm_get_msr_common(vcpu, msr_index, pdata);
 #endif
-   case MSR_IA32_TIME_STAMP_COUNTER:
+   case MSR_IA32_TSC:
data = guest_read_tsc();
break;
case MSR_IA32_SYSENTER_CS:
@@ -957,7 +957,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 
msr_index, u64 data)
case MSR_IA32_SYSENTER_ESP:
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
-   case MSR_IA32_TIME_STAMP_COUNTER:
+   case MSR_IA32_TSC:
rdtscll(host_tsc);
guest_write_tsc(data, host_tsc);
break;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8e4a0ef..db0fd7d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -465,7 +465,7 @@ static u32 msrs_to_save[] = {
 #ifdef CONFIG_X86_64
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
-   MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
+   MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
 };
 
@@ -637,8 +637,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 
/* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags);
-   kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
- vcpu-hv_clock.tsc_timestamp);
+   kvm_get_msr(v, MSR_IA32_TSC, vcpu-hv_clock.tsc_timestamp);
ktime_get_ts(ts);
local_irq_restore(flags);
 
-- 
1.6.0.6



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCHv5 1/3] virtio: find_vqs/del_vqs virtio operations

2009-05-14 Thread Christian Borntraeger

Am Wednesday 13 May 2009 21:08:58 schrieb Michael S. Tsirkin:
 This replaces find_vq/del_vq with find_vqs/del_vqs virtio operations,
 and updates all drivers. This is needed for MSI support, because MSI
 needs to know the total number of vectors upfront.

[...]

 --- a/drivers/s390/kvm/kvm_virtio.c
 +++ b/drivers/s390/kvm/kvm_virtio.c
 @@ -227,6 +227,38 @@ static void kvm_del_vq(struct virtqueue *vq)
  KVM_S390_VIRTIO_RING_ALIGN));
  }
 
 +static void vp_del_vqs(struct virtio_device *vdev)
s/vp_del_vqs/kvm_del_vqs/ :

drivers/s390/kvm/kvm_virtio.c: In function 'kvm_find_vqs':
drivers/s390/kvm/kvm_virtio.c:258: error: implicit declaration of function 
'kvm_del_vqs'
drivers/s390/kvm/kvm_virtio.c: At top level:
drivers/s390/kvm/kvm_virtio.c:274: error: 'kvm_del_vqs' undeclared here (not in 
a function)
make[2]: *** [drivers/s390/kvm/kvm_virtio.o] Error 1

 +{
 + struct virtqueue *vq, *n;
 +
 + list_for_each_entry_safe(vq, n, vdev-vqs, list)
 + kvm_del_vq(vq);
 +}
 +
 +static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 + struct virtqueue *vqs[],
 + vq_callback_t *callbacks[],
 + const char *names[])
 +{
 + struct kvm_device *kdev = to_kvmdev(vdev);
 + int i;
 +
 + /* We must have this many virtqueues. */
 + if (nvqs  kdev-desc-num_vq)
 + return -ENOENT;
 +
 + for (i = 0; i  nvqs; ++i) {
 + vqs[i] = kvm_find_vq(vdev, i, callbacks[i], names[i]);
 + if (IS_ERR(vqs[i]))
 + goto error;
 + }
 + return 0;
 +
 +error:
 + kvm_del_vqs(vdev);
 + return PTR_ERR(vqs[i]);
 +}
 +
  /*
   * The config ops structure as defined by virtio config
   */
 @@ -238,8 +270,8 @@ static struct virtio_config_ops kvm_vq_configspace_ops = {
   .get_status = kvm_get_status,
   .set_status = kvm_set_status,
   .reset = kvm_reset,
 - .find_vq = kvm_find_vq,
 - .del_vq = kvm_del_vq,
 + .find_vqs = kvm_find_vqs,
 + .del_vqs = kvm_del_vqs,
  };
 
  /*


 --- a/include/linux/virtio_config.h
 +++ b/include/linux/virtio_config.h
[...]
needs an 

 #ifdef __KERNEL__
+#include linux/err.h
 #include linux/virtio.h

[...]
 + int err = vdev-config-find_vqs(vdev, 1, vq, callbacks, names);
 + if (err  0)
 + return ERR_PTR(err);

Otherwise ERR_PTR does not compile.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: kvm-autotest: The automation plans?

2009-05-14 Thread Michael Goldish


- jason wang jasow...@redhat.com wrote:

 sudhir kumar 写道:
  Hi Uri/Lucas,
 
  Do you have any plans for enhancing kvm-autotest?
  I was looking mainly on the following 2 aspects:
 
  (1).
  we have standalone migration only. Is there any plans of enhancing
  kvm-autotest so that we can trigger migration while a workload is
  running?
  Something like this:
  Start a workload(may be n instances of it).
  let the test execute for some time.
  Trigger migration.
  Log into the target.
  Check if the migration is succesful
  Check if the test results are consistent.

 We have some patches of ping pong migration and workload adding. The 
 migration is based on public bridge and workload adding is based on 
 running benchmark in the background of guest.
  (2).
  How can we run N parallel instances of a test? Will the current
  configuration  be easily able to support it?
 
  Please provide your thoughts on the above features.
 

 The parallelized instances could be easily achieved through 
 job.parallel() of autotest framework, and that is what we have used in
 our tests. We have make some helper routines such as get_free_port to
 be reentrant through file lock.

We'll probably have to use file locks anyway when we work with TAP, but in
VM.create(), not in get_free_port(), because we also want to prevent parallel
qemu instances from choosing the same TAP device. I'm not sure how qemu
handles this internally, and I'd rather be on the safe side.

Do you release the file lock inside get_free_port or only after running qemu?

 We've implemented following test cases: timedrift(already sent here),
 savevm/loadvm, suspend/resume, jumboframe, migration between two 
 machines and others. We will sent it here for review in the following
 weeks.
 There are some other things could be improved:
 1) Current kvm_test.cfg.sample/kvm_test.cfg is transparent to autotest
 server UI. This would make it hard to configure the tests in the
 server 
 side. During our test, we have merged it into control and make it
 could 
 be configured by editing control file function of autotest server
 side web UI.

Would it not suffice to just modify the configuration, instead of completely
define it, inside the control file? This is possible using parse_string().
For example:

cfg = kvm_config.config(kvm_tests.cfg)
cfg.parse_string(only weekly)
cfg.parse_string(only Fedora RHEL Windows)
cfg.parse_string(
variants:
- 1:
only ide
- 2:
Fedora:
no rtl8139
)
list = cfg.get_list()

(get_list() returns the test dictionaries.)

The advantage here is that we can have a standard kvm_tests.cfg that we all
agree on and only rather small environment-specific modifications are made
in the control file.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Best choice for copy/clone/snapshot

2009-05-14 Thread Avi Kivity


Ross Boylan wrote:

Thanks for all the info.  I have one follow up.
On Wed, 2009-05-13 at 10:07 +0300, Avi Kivity wrote:
  

As I install software onto a system I want to preserve its
  

state--just


the disk state---at various points so I can go back.  What is the
  

best


way to do this?
  
  

LVM snapshots.  Read up on the 'lvcreate -s' command and option.


I may have been unclear.  I meant as I install software on the VM.
Since some of them are running Windows, they can't do LVM.  I am running
LVM on my host Linux system.

Or are you suggesting that I put the image files on a snapshottable
partition?  Over time the snapshot seems likely to accumulate a lot of
original sectors that don't involve the disk image I care about.

Or do you mean I should back each virtual disk with an LVM volume?  That
does seem cleaner; I've just been following the docs and they use
regular files.  They say I can't just use a raw partition, but maybe
kvm-img -f qcow2 /dev/MyVolumeGroup/Volume10 ?
  


You can certainly use a raw partition, for example

 qemu-system-x86_64 -drive file=/dev/vg0/guest1,cache=none

Does that give better performance?  


That is the highest performing option, especially with cache=none.


The one drawback I see is that I'd
have to really take the space I wanted, rather than having it only
notionally reserved for a file.  


Yes, that's a drawback, and there's currently no way around it.


I'm not sure how growing the logical
volume would interact with qcow...
  


It should work, but I wouldn't recommend it.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: event injection MACROs

2009-05-14 Thread Avi Kivity


Dong, Eddie wrote:

OK.
Also back to Gleb's question, the reason I want to do that is to simplify event
generation mechanism in current KVM.

Today KVM use additional layer of exception/nmi/interrupt such as
vcpu.arch.exception.pending, vcpu-arch.interrupt.pending  
vcpu-arch.nmi_injected.
All those additional layer is due to compete of VM_ENTRY_INTR_INFO_FIELD
write to inject the event. Both SVM  VMX has only one resource to inject the 
virtual event
but KVM generates 3 catagory of events in parallel which further requires 
additional
logic to dictate among them. 


I thought of using a queue to hold all pending events (in a common 
format), sort it by priority, and inject the head.



One example is that exception has higher priority
than NMI/IRQ injection in current code which is not true in reality. 
  


I don't think it matters in practice, since the guest will see it as a 
timing issue.  NMIs and IRQs are asynchronous (even those generated by 
the guest through the local APIC).


Another issue is that an failed event from previous injection say IRQ or NMI may be 
discarded if an virtual exception happens in the EXIT handling now. With the patch of 
generic double fault handling, this case should be handled as normally.
  


Discarding an exception is usually okay as it will be regenerated.  I 
don't think we discard interrupts or NMIs.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [ANNOUNCE] qemu-kvm-0.10.4

2009-05-14 Thread Avi Kivity


Mark McLoughlin wrote:
  - There will be no stable releases, as such, of the kernel module. 
You should use upstream linux releases instead - e.g. the latest

stable release is 2.6.29.2
  


Actually, I do plan to release kvm-kmod-2.6.30 (and kvm-kmod-2.6.30.x).


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [KVM PATCH v7 2/3] kvm: add support for irqfd via eventfd-notification interface

2009-05-14 Thread Avi Kivity


Gregory Haskins wrote:

KVM provides a complete virtual system environment for guests, including
support for injecting interrupts modeled after the real exception/interrupt
facilities present on the native platform (such as the IDT on x86).
Virtual interrupts can come from a variety of sources (emulated devices,
pass-through devices, etc) but all must be injected to the guest via
the KVM infrastructure.  This patch adds a new mechanism to inject a specific
interrupt to a guest using a decoupled eventfd mechnanism:  Any legal signal
on the irqfd (using eventfd semantics from either userspace or kernel) will
translate into an injected interrupt in the guest at the next available
interrupt window.
  



r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 3db5d8d..dfc4bcc 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -415,6 +415,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
+#define KVM_CAP_EVENTFD 31
  


Let's keep a fine granularity and call it IRQFD.


+
+int
+kvm_deassign_irqfd(struct kvm *kvm, int fd)
+{
+   struct _irqfd *irqfd, *tmp;
+
+   mutex_lock(kvm-lock);
+
+   /*
+* linear search isn't brilliant, but this should be a infrequent
+* operation and the list should not grow very large
+*/
+   list_for_each_entry_safe(irqfd, tmp, kvm-irqfds, list) {
+   if (irqfd-fd != fd)
+   continue;
  


Please fget() the new fd and compare the filps; fds aren't meaningful in 
the kernel.  You can also drop _irqfd::fd.


It may also be useful to compare the gsi, this allows a 
make-before-break switchover:


- guest reroutes irq to a different gsi
- associate irqfd with new gsi
- disassociate irqfd from old gsi


+
+   irqfd_release(irqfd);
+   mutex_unlock(kvm-lock);
+   return 0;
  


Don't return, userspace may have multiple associations?


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: kvm-autotest: The automation plans?

2009-05-14 Thread sudhir kumar

On Wed, May 13, 2009 at 11:30 PM, Michael Goldish mgold...@redhat.com wrote:

 - sudhir kumar smalik...@gmail.com wrote:

 Hi Uri/Lucas,

 Do you have any plans for enhancing kvm-autotest?
 I was looking mainly on the following 2 aspects:

 (1).
 we have standalone migration only. Is there any plans of enhancing
 kvm-autotest so that we can trigger migration while a workload is
 running?
 Something like this:
 Start a workload(may be n instances of it).
 let the test execute for some time.
 Trigger migration.
 Log into the target.
 Check if the migration is succesful
 Check if the test results are consistent.

 Yes, we have plans to implement such functionality. It shouldn't be
 hard, but we need to give it some thought in order to implement it as
 elegantly as possible.
I completely agree here.

 (2).
 How can we run N parallel instances of a test? Will the current
 configuration  be easily able to support it?

 I currently have some experimental patches that allow running of
 several parallel queues of tests. But what exactly do you mean by
Please post them.
 N parallel instances of a test? Do you mean N queues? Please provide
 an example so I can get a better idea.
I wanted a parallelism in 2 degrees. Let me try with an example.
The following test
 only raw.*ide.*default.*smp2.*RHEL5.3.i386.*migrate.dbench
is just one instance and will create one VM with given specifications
and execute migrate and dbench. So I am thinking how can we trigger n
similar tests execution in parallel. I feel job.parallel() is meant
for that but is kvm_tests.cfg good enough to be used under such a
scenario? However we have most of the stuff non static(as getting the
free vnc port, etc) but still we have some variables which are static.
For ex. vm name, migration port etc. So what are your thoughts on it.
In this scenario my system will be having N VMs, all running the same
set of testcases.

On the other hand I was looking for something like this as well.
 only 
raw.*ide.*default.*smp2.*RHEL5.3.i386.*migrate.dbench.dbench_instancesN.bonnie
Thus all the tests will be executed in normal way except dbench. There
should be running N instances of dbench and when over simply run
bonnie and exit.

I hope my demand to kvm-autotest is not too much but for an effective
and rigorous testing of kvm such a framework is necessary. I am bit
new to autotest framework and have very little knowledge of the server
side. I will start spending some time on looking at the available
features.

Hope I was clear this time.



-- 
Sudhir Kumar
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

XP smp using a lot of CPU

2009-05-14 Thread Johannes Schlatow

I had a similar problem some weeks ago. Finally I found out that my VM
running WinXP was working on a non-acpi system (maybe I started kvm
with -no-acpi option during the installation). In the Device Manager
there has to be the entry Computer-ACPI Multiprocessor PC.
Otherwise the VM produced 100% real cpu load on my machines (the fans
were running on highest speed level).
I just started the WinXP installation in repair mode and this did fix
the problem.

I hope this helps!

regards
  Johannes

On Wed, May 13, 2009 at 2:41 AM, Ross Boylan r...@biostat.ucsf.edu wrote:

 I just installed XP into a new VM, specifying -smp 2 for the machine.
 According to top, it's using nearly 200% of a cpu even when I'm not
 doing anything.

 Is this real CPU useage, or just a reporting problem (just as my disk
 image is big according to ls, but isn't really)?

 If it's real, is there anything I can do about it?

 kvm 0.7.2 on Debian Lenny (but 2.6.29 kernel), amd64.  Xeon chips; 32
 bit version of XP pro installed, now fully patched (including the
 Windows Genuine Advantage stuff, though I cancelled it when it wanted to
 run).

 Task manager in XP shows virtually no CPU useage.

 Please cc me on responses.

 Thanks for any assistance.
 --
 Ross Boylan                                      wk:  (415) 514-8146
 185 Berry St #5700                               r...@biostat.ucsf.edu
 Dept of Epidemiology and Biostatistics           fax: (415) 514-8150
 University of California, San Francisco
 San Francisco, CA 94107-1739                     hm:  (415) 550-1062

 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: kvm-autotest: The automation plans?

2009-05-14 Thread sudhir kumar

On Thu, May 14, 2009 at 12:22 PM, jason wang jasow...@redhat.com wrote:
 sudhir kumar 写道:

 Hi Uri/Lucas,

 Do you have any plans for enhancing kvm-autotest?
 I was looking mainly on the following 2 aspects:

 (1).
 we have standalone migration only. Is there any plans of enhancing
 kvm-autotest so that we can trigger migration while a workload is
 running?
 Something like this:
 Start a workload(may be n instances of it).
 let the test execute for some time.
 Trigger migration.
 Log into the target.
 Check if the migration is succesful
 Check if the test results are consistent.


 We have some patches of ping pong migration and workload adding. The
 migration is based on public bridge and workload adding is based on running
 benchmark in the background of guest.
Cool. I would like to have look on them. So how do you manage the
background process/thread?


 (2).
 How can we run N parallel instances of a test? Will the current
 configuration  be easily able to support it?

 Please provide your thoughts on the above features.



 The parallelized instances could be easily achieved through job.parallel()
 of autotest framework, and that is what we have used in our tests. We have
 make some helper routines such as get_free_port to be reentrant through file
 lock.
 We've implemented following test cases: timedrift(already sent here),
 savevm/loadvm, suspend/resume, jumboframe, migration between two machines
 and others. We will sent it here for review in the following weeks.
 There are some other things could be improved:
 1) Current kvm_test.cfg.sample/kvm_test.cfg is transparent to autotest
 server UI. This would make it hard to configure the tests in the server
 side. During our test, we have merged it into control and make it could be
 configured by editing control file function of autotest server side web
 UI.
Not much clue here. But I would like to keep the control file as
simple as possible and as much independent of test scenarios as
possible. kvm_tests.cfg should be the right file untill and unless it
is impossible to do by using it.
 2) Public bridge support: I've sent a patch(TAP network support in
 kvm-autotest), this patch needs external DHCP server and requires nmap
 support. I don't know whether the method of original kvm_runtes_old(DHCP
 server of private bridge) is preferable.
The old approach is better. All might not be able to run an external
DHCP server for running the test. I do not see any issue with the old
approach.





-- 
Sudhir Kumar
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][KVM-AUTOTEST] TAP network support in kvm-autotest

2009-05-14 Thread Michael Goldish

Hi Jason,

We already have patches that implement similar functionality here in
TLV, as mentioned in the to-do list (item #4 under 'Framework').
They're not yet committed upstream because they're still quite fresh.

Still, your patch looks good and is quite similar to mine. The main
difference is that I use MAC/IP address pools specified by the user,
instead of random MACs with arp/nmap to detect the matching IP
addresses.

I will post my patch to the mailing list soon, but it will come
together with quite a few other patches that I haven't posted yet, so
please be patient.

Comments/questions:

Why do you use nmap in addition to arp? In what cases will arp not
suffice? I'm a little put off by the fact that nmap imposes an
additional requirement on the host. Three hosts I've tried don't come
with nmap installed by default.

Please see additional comments below.

- Jason Wang jasow...@redhat.com wrote:

 Hi All:
 This patch tries to add tap network support in kvm-autotest. Multiple
 nics connected to different bridges could be achieved through this
 script. Public bridge is important for testing real network traffic
 and migration. The patch gives each nic with randomly generated mac
 address. The ip address required in the test could be dynamically
 probed through nmap/arp. Only the ip address of first NIC is used
 through the test.
 
 Example:
 nics = nic1 nic2
 network = bridge
 bridge = switch
 ifup =/etc/qemu-ifup-switch
 ifdown =/etc/qemu-ifdown-switch
 
 This would make the virtual machine have two nics both of which are
 connected to a bridge with the name of 'switch'. Ifup/ifdown scripts
 are also specified.
 
 Another Example:
 nics = nic1 nic2
 network = bridge
 bridge = switch
 bridge_nic2 = virbr0
 ifup =/etc/qemu-ifup-switch
 ifup_nic2 = /etc/qemu-ifup-virbr0
 
 This would makes the virtual machine have two nics: nic1 are connected
 to bridge 'switch' and nci2 are connected to bridge 'virbr0'.
 
 Public mode and user mode nic could also be mixed:
 nics = nic1 nic2
 network = bridge
 network_nic2 = user
 
 Looking forward for comments and suggestions.
 
 From: jason jasow...@redhat.com
 Date: Wed, 13 May 2009 16:15:28 +0800
 Subject: [PATCH] Add tap networking support.
 
 ---
  client/tests/kvm_runtest_2/kvm_utils.py |7 +++
  client/tests/kvm_runtest_2/kvm_vm.py|   74
 ++-
  2 files changed, 69 insertions(+), 12 deletions(-)
 
 diff --git a/client/tests/kvm_runtest_2/kvm_utils.py
 b/client/tests/kvm_runtest_2/kvm_utils.py
 index be8ad95..0d1f7f8 100644
 --- a/client/tests/kvm_runtest_2/kvm_utils.py
 +++ b/client/tests/kvm_runtest_2/kvm_utils.py
 @@ -773,3 +773,10 @@ def md5sum_file(filename, size=None):
  size -= len(data)
  f.close()
  return o.hexdigest()
 +
 +def random_mac():
 +mac=[0x00,0x16,0x30,
 + random.randint(0x00,0x09),
 + random.randint(0x00,0x09),
 + random.randint(0x00,0x09)]
 +return ':'.join(map(lambda x: %02x %x,mac))

Random MAC addresses will not necessarily work everywhere, as far as
I know. That's why I prefer user specified MAC/IP address ranges.

 diff --git a/client/tests/kvm_runtest_2/kvm_vm.py
 b/client/tests/kvm_runtest_2/kvm_vm.py
 index fab839f..ea7dab6 100644
 --- a/client/tests/kvm_runtest_2/kvm_vm.py
 +++ b/client/tests/kvm_runtest_2/kvm_vm.py
 @@ -105,6 +105,10 @@ class VM:
  self.qemu_path = qemu_path
  self.image_dir = image_dir
  self.iso_dir = iso_dir
 +self.macaddr = []
 +for nic_name in kvm_utils.get_sub_dict_names(params,nics):
 +macaddr = kvm_utils.random_mac()
 +self.macaddr.append(macaddr)

  def verify_process_identity(self):
  Make sure .pid really points to the original qemu
 process.
 @@ -189,9 +193,25 @@ class VM:
  for nic_name in kvm_utils.get_sub_dict_names(params,
 nics):
  nic_params = kvm_utils.get_sub_dict(params, nic_name)
  qemu_cmd +=  -net nic,vlan=%d % vlan
 +net = nic_params.get(network)
 +if net == bridge:
 +qemu_cmd += ,macaddr=%s % self.macaddr[vlan]
  if nic_params.get(nic_model):
  qemu_cmd += ,model=%s % nic_params.get(nic_model)
 -qemu_cmd +=  -net user,vlan=%d % vlan
 +if net == bridge:
 +qemu_cmd +=  -net tap,vlan=%d % vlan
 +ifup = nic_params.get(ifup)
 +if ifup:
 +qemu_cmd += ,script=%s % ifup
 +else:
 +qemu_cmd += ,script=/etc/qemu-ifup

Why not just leave 'script' out if the user doesn't specify 'ifup'?
There's no good reason to prefer /etc/qemu-ifup to /etc/kvm-ifup or
anything else, so I think it's best to leave it up to qemu if the
user has no preference. It's also slightly shorter.

 +ifdown = nic_params.get(ifdown)
 +if ifdown:
 +qemu_cmd += ,downscript=%s % ifdown
 +

Re: kvm-autotest: The automation plans?

2009-05-14 Thread Michael Goldish


- sudhir kumar smalik...@gmail.com wrote:

 On Thu, May 14, 2009 at 12:22 PM, jason wang jasow...@redhat.com
 wrote:
  sudhir kumar 写道:
 
  Hi Uri/Lucas,
 
  Do you have any plans for enhancing kvm-autotest?
  I was looking mainly on the following 2 aspects:
 
  (1).
  we have standalone migration only. Is there any plans of enhancing
  kvm-autotest so that we can trigger migration while a workload is
  running?
  Something like this:
  Start a workload(may be n instances of it).
  let the test execute for some time.
  Trigger migration.
  Log into the target.
  Check if the migration is succesful
  Check if the test results are consistent.
 
 
  We have some patches of ping pong migration and workload adding.
 The
  migration is based on public bridge and workload adding is based on
 running
  benchmark in the background of guest.
 Cool. I would like to have look on them. So how do you manage the
 background process/thread?
 
 
  (2).
  How can we run N parallel instances of a test? Will the current
  configuration  be easily able to support it?
 
  Please provide your thoughts on the above features.
 
 
 
  The parallelized instances could be easily achieved through
 job.parallel()
  of autotest framework, and that is what we have used in our tests.
 We have
  make some helper routines such as get_free_port to be reentrant
 through file
  lock.
  We've implemented following test cases: timedrift(already sent
 here),
  savevm/loadvm, suspend/resume, jumboframe, migration between two
 machines
  and others. We will sent it here for review in the following weeks.
  There are some other things could be improved:
  1) Current kvm_test.cfg.sample/kvm_test.cfg is transparent to
 autotest
  server UI. This would make it hard to configure the tests in the
 server
  side. During our test, we have merged it into control and make it
 could be
  configured by editing control file function of autotest server
 side web
  UI.
 Not much clue here. But I would like to keep the control file as
 simple as possible and as much independent of test scenarios as
 possible. kvm_tests.cfg should be the right file untill and unless it
 is impossible to do by using it.
  2) Public bridge support: I've sent a patch(TAP network support in
  kvm-autotest), this patch needs external DHCP server and requires
 nmap
  support. I don't know whether the method of original
 kvm_runtes_old(DHCP
  server of private bridge) is preferable.
 The old approach is better. All might not be able to run an external
 DHCP server for running the test. I do not see any issue with the old
 approach.

We're taking more of a minimalist approach in kvm_runtest_2: the
framework should handle only the things directly related to testing.
Configuring and running a DHCP server is and should be beyond the scope
of the KVM-Autotest framework. To emulate the old behavior, you can just
start the DHCP server yourself locally. If you wish, maybe we can
bundle example scripts with the framework that will do this for the user,
but they should not be an integral part of the framework in my opinion.

 
 
 
 -- 
 Sudhir Kumar
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCHv6 0/4] virtio: MSI-X support

2009-05-14 Thread Michael S. Tsirkin

Here's the latest draft of virtio patches.
This is on top of Rusty's recent virtqueue list + name patch,
which is included in series for completeness.

Changelog:
changes since v5: fix build on s390 (only patch 2/4 modified)

Michael S. Tsirkin (3):
  virtio: find_vqs/del_vqs virtio operations
  virtio_pci: split up vp_interrupt
  virtio_pci: optional MSI-X support

Rusty Russell (1):
  virtio: add names to virtqueue struct, mapping from devices to
queues.

 drivers/block/virtio_blk.c  |6 +-
 drivers/char/hw_random/virtio-rng.c |6 +-
 drivers/char/virtio_console.c   |   26 ++--
 drivers/lguest/lguest_device.c  |   41 +-
 drivers/net/virtio_net.c|   45 ++---
 drivers/s390/kvm/kvm_virtio.c   |   43 +-
 drivers/virtio/virtio.c |2 +
 drivers/virtio/virtio_balloon.c |   27 ++--
 drivers/virtio/virtio_pci.c |  306 ++-
 drivers/virtio/virtio_ring.c|   25 +++-
 include/linux/virtio.h  |   12 +-
 include/linux/virtio_config.h   |   45 -
 include/linux/virtio_pci.h  |   10 +-
 include/linux/virtio_ring.h |3 +-
 net/9p/trans_virtio.c   |2 +-
 15 files changed, 465 insertions(+), 134 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCHv6 1/4] virtio: add names to virtqueue struct, mapping from devices to queues.

2009-05-14 Thread Michael S. Tsirkin

From: Rusty Russell ru...@rustcorp.com.au

Add a linked list of all virtqueues for a virtio device: this helps for
debugging and is also needed for upcoming interface change.

Also, add a name field for clearer debug messages.

Signed-off-by: Rusty Russell ru...@rustcorp.com.au
---

including this Rusty's patch here for completeness.

 drivers/block/virtio_blk.c  |2 +-
 drivers/char/hw_random/virtio-rng.c |2 +-
 drivers/char/virtio_console.c   |4 ++--
 drivers/lguest/lguest_device.c  |5 +++--
 drivers/net/virtio_net.c|6 +++---
 drivers/s390/kvm/kvm_virtio.c   |7 ---
 drivers/virtio/virtio.c |2 ++
 drivers/virtio/virtio_balloon.c |4 ++--
 drivers/virtio/virtio_pci.c |5 +++--
 drivers/virtio/virtio_ring.c|   25 +++--
 include/linux/virtio.h  |   12 
 include/linux/virtio_config.h   |6 --
 include/linux/virtio_ring.h |3 ++-
 net/9p/trans_virtio.c   |2 +-
 14 files changed, 55 insertions(+), 30 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 5d34764..8f7c956 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -224,7 +224,7 @@ static int virtblk_probe(struct virtio_device *vdev)
sg_init_table(vblk-sg, vblk-sg_elems);
 
/* We expect one virtqueue, for output. */
-   vblk-vq = vdev-config-find_vq(vdev, 0, blk_done);
+   vblk-vq = vdev-config-find_vq(vdev, 0, blk_done, requests);
if (IS_ERR(vblk-vq)) {
err = PTR_ERR(vblk-vq);
goto out_free_vblk;
diff --git a/drivers/char/hw_random/virtio-rng.c 
b/drivers/char/hw_random/virtio-rng.c
index 86e83f8..2aeafce 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -94,7 +94,7 @@ static int virtrng_probe(struct virtio_device *vdev)
int err;
 
/* We expect a single virtqueue. */
-   vq = vdev-config-find_vq(vdev, 0, random_recv_done);
+   vq = vdev-config-find_vq(vdev, 0, random_recv_done, input);
if (IS_ERR(vq))
return PTR_ERR(vq);
 
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index ff6f5a4..58684e4 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -202,13 +202,13 @@ static int __devinit virtcons_probe(struct virtio_device 
*dev)
/* Find the input queue. */
/* FIXME: This is why we want to wean off hvc: we do nothing
 * when input comes in. */
-   in_vq = vdev-config-find_vq(vdev, 0, hvc_handle_input);
+   in_vq = vdev-config-find_vq(vdev, 0, hvc_handle_input, input);
if (IS_ERR(in_vq)) {
err = PTR_ERR(in_vq);
goto free;
}
 
-   out_vq = vdev-config-find_vq(vdev, 1, NULL);
+   out_vq = vdev-config-find_vq(vdev, 1, NULL, output);
if (IS_ERR(out_vq)) {
err = PTR_ERR(out_vq);
goto free_in_vq;
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index df44d96..4babed8 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -228,7 +228,8 @@ extern void lguest_setup_irq(unsigned int irq);
  * function. */
 static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
unsigned index,
-   void (*callback)(struct virtqueue *vq))
+   void (*callback)(struct virtqueue *vq),
+   const char *name)
 {
struct lguest_device *ldev = to_lgdev(vdev);
struct lguest_vq_info *lvq;
@@ -263,7 +264,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device 
*vdev,
/* OK, tell virtio_ring.c to set up a virtqueue now we know its size
 * and we've got a pointer to its pages. */
vq = vring_new_virtqueue(lvq-config.num, LGUEST_VRING_ALIGN,
-vdev, lvq-pages, lg_notify, callback);
+vdev, lvq-pages, lg_notify, callback, name);
if (!vq) {
err = -ENOMEM;
goto unmap;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 4d1d479..be3b734 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -906,20 +906,20 @@ static int virtnet_probe(struct virtio_device *vdev)
vi-mergeable_rx_bufs = true;
 
/* We expect two virtqueues, receive then send. */
-   vi-rvq = vdev-config-find_vq(vdev, 0, skb_recv_done);
+   vi-rvq = vdev-config-find_vq(vdev, 0, skb_recv_done, input);
if (IS_ERR(vi-rvq)) {
err = PTR_ERR(vi-rvq);
goto free;
}
 
-   vi-svq = vdev-config-find_vq(vdev, 1, skb_xmit_done);
+   vi-svq = vdev-config-find_vq(vdev, 1, skb_xmit_done, output);
if (IS_ERR(vi-svq)) {

[PATCHv6 4/4] virtio_pci: optional MSI-X support

2009-05-14 Thread Michael S. Tsirkin

This implements optional MSI-X support in virtio_pci.
MSI-X is used whenever the host supports at least 2 MSI-X
vectors: 1 for configuration changes and 1 for virtqueues.
Per-virtqueue vectors are allocated if enough vectors
available.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 drivers/virtio/virtio_pci.c |  227 +++
 include/linux/virtio_pci.h  |   10 ++-
 2 files changed, 217 insertions(+), 20 deletions(-)

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 951e673..65627a4 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -42,6 +42,26 @@ struct virtio_pci_device
/* a list of queues so we can dispatch IRQs */
spinlock_t lock;
struct list_head virtqueues;
+
+   /* MSI-X support */
+   int msix_enabled;
+   int intx_enabled;
+   struct msix_entry *msix_entries;
+   /* Name strings for interrupts. This size should be enough,
+* and I'm too lazy to allocate each name separately. */
+   char (*msix_names)[256];
+   /* Number of available vectors */
+   unsigned msix_vectors;
+   /* Vectors allocated */
+   unsigned msix_used_vectors;
+};
+
+/* Constants for MSI-X */
+/* Use first vector for configuration changes, second and the rest for
+ * virtqueues Thus, we need at least 2 vectors for MSI. */
+enum {
+   VP_MSIX_CONFIG_VECTOR = 0,
+   VP_MSIX_VQ_VECTOR = 1,
 };
 
 struct virtio_pci_vq_info
@@ -60,6 +80,9 @@ struct virtio_pci_vq_info
 
/* the list node for the virtqueues list */
struct list_head node;
+
+   /* MSI-X vector (or none) */
+   unsigned vector;
 };
 
 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
@@ -109,7 +132,8 @@ static void vp_get(struct virtio_device *vdev, unsigned 
offset,
   void *buf, unsigned len)
 {
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-   void __iomem *ioaddr = vp_dev-ioaddr + VIRTIO_PCI_CONFIG + offset;
+   void __iomem *ioaddr = vp_dev-ioaddr +
+   VIRTIO_PCI_CONFIG(vp_dev) + offset;
u8 *ptr = buf;
int i;
 
@@ -123,7 +147,8 @@ static void vp_set(struct virtio_device *vdev, unsigned 
offset,
   const void *buf, unsigned len)
 {
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-   void __iomem *ioaddr = vp_dev-ioaddr + VIRTIO_PCI_CONFIG + offset;
+   void __iomem *ioaddr = vp_dev-ioaddr +
+  VIRTIO_PCI_CONFIG(vp_dev) + offset;
const u8 *ptr = buf;
int i;
 
@@ -221,7 +246,121 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
return vp_vring_interrupt(irq, opaque);
 }
 
-/* the config-find_vq() implementation */
+static void vp_free_vectors(struct virtio_device *vdev) {
+   struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+   int i;
+
+   if (vp_dev-intx_enabled) {
+   free_irq(vp_dev-pci_dev-irq, vp_dev);
+   vp_dev-intx_enabled = 0;
+   }
+
+   for (i = 0; i  vp_dev-msix_used_vectors; ++i)
+   free_irq(vp_dev-msix_entries[i].vector, vp_dev);
+   vp_dev-msix_used_vectors = 0;
+
+   if (vp_dev-msix_enabled) {
+   /* Disable the vector used for configuration */
+   iowrite16(VIRTIO_MSI_NO_VECTOR,
+ vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+   /* Flush the write out to device */
+   ioread16(vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+
+   vp_dev-msix_enabled = 0;
+   pci_disable_msix(vp_dev-pci_dev);
+   }
+}
+
+static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
+ int *options, int noptions)
+{
+   int i;
+   for (i = 0; i  noptions; ++i)
+   if (!pci_enable_msix(dev, entries, options[i]))
+   return options[i];
+   return -EBUSY;
+}
+
+static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs)
+{
+   struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+   const char *name = dev_name(vp_dev-vdev.dev);
+   unsigned i, v;
+   int err = -ENOMEM;
+   /* We want at most one vector per queue and one for config changes.
+* Fallback to separate vectors for config and a shared for queues.
+* Finally fall back to regular interrupts. */
+   int options[] = { max_vqs + 1, 2 };
+   int nvectors = max(options[0], options[1]);
+
+   vp_dev-msix_entries = kmalloc(nvectors * sizeof *vp_dev-msix_entries,
+  GFP_KERNEL);
+   if (!vp_dev-msix_entries)
+   goto error_entries;
+   vp_dev-msix_names = kmalloc(nvectors * sizeof *vp_dev-msix_names,
+GFP_KERNEL);
+   if (!vp_dev-msix_names)
+   goto error_names;
+
+   for (i = 0; i  nvectors; ++i)
+

Re: [KVM PATCH v7 3/3] kvm: add iofd support

2009-05-14 Thread Avi Kivity


Gregory Haskins wrote:

iofd is a mechanism to register PIO/MMIO regions to trigger an eventfd
signal when written to.  Userspace can register any arbitrary address
with a corresponding eventfd.

  


Please start a separate patchset for this so I can merge irqfd.



diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index dfc4bcc..99b6e45 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -292,6 +292,17 @@ struct kvm_guest_debug {
struct kvm_guest_debug_arch arch;
 };
 
+#define KVM_IOFD_FLAG_DEASSIGN  (1  0)

+#define KVM_IOFD_FLAG_PIO   (1  1)
+
+struct kvm_iofd {
+   __u64 addr;
+   __u32 len;
+   __u32 fd;
+   __u32 flags;
+   __u8  pad[12];
+};
+
  
Please add a data match capability.  virtio uses a write with the data 
containing the queue ID, and we want a separate event for each queue.




  * kvm trace categories
@@ -508,6 +519,7 @@ struct kvm_irqfd {
 #define KVM_DEASSIGN_DEV_IRQ   _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
 #define KVM_ASSIGN_IRQFD   _IOW(KVMIO, 0x76, struct kvm_irqfd)
 #define KVM_DEASSIGN_IRQFD _IOW(KVMIO, 0x77, __u32)
+#define KVM_IOFD   _IOW(KVMIO, 0x78, struct kvm_iofd)
  


Too general a name.  It's not doing IO, just sending out notifications.

Why have assign/deassign for irqfd and a single ioctl for iofd?

The rest looks good.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: kvm-autotest: The automation plans?

2009-05-14 Thread Michael Goldish


- sudhir kumar smalik...@gmail.com wrote:

 On Wed, May 13, 2009 at 11:30 PM, Michael Goldish
 mgold...@redhat.com wrote:
 
  - sudhir kumar smalik...@gmail.com wrote:
 
  Hi Uri/Lucas,
 
  Do you have any plans for enhancing kvm-autotest?
  I was looking mainly on the following 2 aspects:
 
  (1).
  we have standalone migration only. Is there any plans of enhancing
  kvm-autotest so that we can trigger migration while a workload is
  running?
  Something like this:
  Start a workload(may be n instances of it).
  let the test execute for some time.
  Trigger migration.
  Log into the target.
  Check if the migration is succesful
  Check if the test results are consistent.
 
  Yes, we have plans to implement such functionality. It shouldn't be
  hard, but we need to give it some thought in order to implement it
 as
  elegantly as possible.
 I completely agree here.
 
  (2).
  How can we run N parallel instances of a test? Will the current
  configuration  be easily able to support it?
 
  I currently have some experimental patches that allow running of
  several parallel queues of tests. But what exactly do you mean by
 Please post them.
  N parallel instances of a test? Do you mean N queues? Please
 provide
  an example so I can get a better idea.
 I wanted a parallelism in 2 degrees. Let me try with an example.
 The following test
  only raw.*ide.*default.*smp2.*RHEL5.3.i386.*migrate.dbench
 is just one instance and will create one VM with given specifications
 and execute migrate and dbench. So I am thinking how can we trigger n
 similar tests execution in parallel. I feel job.parallel() is meant
 for that but is kvm_tests.cfg good enough to be used under such a
 scenario? However we have most of the stuff non static(as getting the
 free vnc port, etc) but still we have some variables which are
 static.
 For ex. vm name, migration port etc. So what are your thoughts on it.

I think generally kvm_tests.cfg is flexible enough, and can easily be
modified to define whatever you like.

Note, however, that the config file parser module is only responsible
for producing a list of dictionaries which define the tests to run.
It doesn't care much about parallelism -- this is up to the control file
and the rest of the framework. If you're not familiar with the format
of config files, please refer to
http://www.linux-kvm.org/page/KVM-Autotest/Test_Config_File
and
http://www.linux-kvm.org/page/KVM-Autotest/Parameters

 In this scenario my system will be having N VMs, all running the same
 set of testcases.

I thought you said one VM running migrate and dbench in parallel. I'm
not sure I follow.

 On the other hand I was looking for something like this as well.
  only
 raw.*ide.*default.*smp2.*RHEL5.3.i386.*migrate.dbench.dbench_instancesN.bonnie
 Thus all the tests will be executed in normal way except dbench.
 There
 should be running N instances of dbench and when over simply run
 bonnie and exit.

This seems like two tests to me: dbench with dbench (several instances),
and then another unrelated bonnie test.

Also note that the variants you select with 'only' must be defined before
they can be selected. Look at the examples in the wiki as well as real
config files.

 I hope my demand to kvm-autotest is not too much but for an effective
 and rigorous testing of kvm such a framework is necessary. I am bit
 new to autotest framework and have very little knowledge of the
 server
 side. I will start spending some time on looking at the available
 features.
 
 Hope I was clear this time.

Regarding parallelism:
Generally two types can be implemented.

1. Several independent test execution queues: in this case there are several
queues that don't interfere with each other. Each queue works with its own
VMs. This is useful for saving time by running tests in parallel on capable
hosts. This can be implemented using job.parallel() and is already running
in TLV. I will try to post the patches soon.

This can probably also be implemented from the server, if it can treat a
single physical host as if it were several, thus running several independent
copies of the Autotest client on it.

2. Several tests on a single VM, which is what you were referring to, if I
understood correctly: in this case several threads work with the same VMs
and abuse them in parallel -- one thread can run dbench while the other
runs migration on the same VM. This is possible using threads, and the
syntax in the config file can be something like 'types = dbench migration'
instead of what we currently use -- 'type = dbench'.
However, we have to think whether we really just want to run tests in
parallel. In the migration-dbench case, for example, we'd like to make sure
dbench starts running before we migrate. So maybe it's wiser to just run some
load inside the migration test, instead of the dbench test. We should
carefully consider all options.

Thanks,
Michael
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a

Re: [KVM PATCH v7 2/3] kvm: add support for irqfd via eventfd-notification interface

2009-05-14 Thread Avi Kivity


Gregory Haskins wrote:

KVM provides a complete virtual system environment for guests, including
support for injecting interrupts modeled after the real exception/interrupt
facilities present on the native platform (such as the IDT on x86).
Virtual interrupts can come from a variety of sources (emulated devices,
pass-through devices, etc) but all must be injected to the guest via
the KVM infrastructure.  This patch adds a new mechanism to inject a specific
interrupt to a guest using a decoupled eventfd mechnanism:  Any legal signal
on the irqfd (using eventfd semantics from either userspace or kernel) will
translate into an injected interrupt in the guest at the next available
interrupt window.

+
+static void
+irqfd_inject(struct work_struct *work)
+{
+   struct _irqfd *irqfd = container_of(work, struct _irqfd, work);
+   struct kvm *kvm = irqfd-kvm;
+
  



I think you need to -read() from the irqfd, otherwise the count will 
never clear.



+   mutex_lock(kvm-lock);
+   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 1);
+   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0);
+   mutex_unlock(kvm-lock);
+}
  


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [KVM PATCH v7 2/3] kvm: add support for irqfd via eventfd-notification interface

2009-05-14 Thread Gregory Haskins

Avi Kivity wrote:
 Gregory Haskins wrote:
 KVM provides a complete virtual system environment for guests, including
 support for injecting interrupts modeled after the real
 exception/interrupt
 facilities present on the native platform (such as the IDT on x86).
 Virtual interrupts can come from a variety of sources (emulated devices,
 pass-through devices, etc) but all must be injected to the guest via
 the KVM infrastructure.  This patch adds a new mechanism to inject a
 specific
 interrupt to a guest using a decoupled eventfd mechnanism:  Any legal
 signal
 on the irqfd (using eventfd semantics from either userspace or
 kernel) will
 translate into an injected interrupt in the guest at the next available
 interrupt window.
   

  r = 1;
  break;
  case KVM_CAP_COALESCED_MMIO:
 diff --git a/include/linux/kvm.h b/include/linux/kvm.h
 index 3db5d8d..dfc4bcc 100644
 --- a/include/linux/kvm.h
 +++ b/include/linux/kvm.h
 @@ -415,6 +415,7 @@ struct kvm_trace_rec {
  #define KVM_CAP_ASSIGN_DEV_IRQ 29
  /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
  #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
 +#define KVM_CAP_EVENTFD 31
   

 Let's keep a fine granularity and call it IRQFD.

Yeah, the iofd stuff is still immature and is not likely to be ready at
the same time anyway.  The CAP bits are cheap enough as it is, so not
sure what I was thinking.  Will fix.


 +
 +int
 +kvm_deassign_irqfd(struct kvm *kvm, int fd)
 +{
 +struct _irqfd *irqfd, *tmp;
 +
 +mutex_lock(kvm-lock);
 +
 +/*
 + * linear search isn't brilliant, but this should be a infrequent
 + * operation and the list should not grow very large
 + */
 +list_for_each_entry_safe(irqfd, tmp, kvm-irqfds, list) {
 +if (irqfd-fd != fd)
 +continue;
   

 Please fget() the new fd and compare the filps; fds aren't meaningful
 in the kernel.  You can also drop _irqfd::fd.

I like this as a second option...


 It may also be useful to compare the gsi, this allows a
 make-before-break switchover:

...but I like this best.  Good idea.


 - guest reroutes irq to a different gsi
 - associate irqfd with new gsi
 - disassociate irqfd from old gsi

 +
 +irqfd_release(irqfd);
 +mutex_unlock(kvm-lock);
 +return 0;
   

 Don't return, userspace may have multiple associations?

Parse error.  Can you elaborate?

-Greg







signature.asc
Description: OpenPGP digital signature

Re: [KVM PATCH v7 3/3] kvm: add iofd support

2009-05-14 Thread Gregory Haskins

Avi Kivity wrote:
 Gregory Haskins wrote:
 iofd is a mechanism to register PIO/MMIO regions to trigger an eventfd
 signal when written to.  Userspace can register any arbitrary address
 with a corresponding eventfd.

   

 Please start a separate patchset for this so I can merge irqfd.

Ack.  Will spin a new split series with your irqfd review changes


 diff --git a/include/linux/kvm.h b/include/linux/kvm.h
 index dfc4bcc..99b6e45 100644
 --- a/include/linux/kvm.h
 +++ b/include/linux/kvm.h
 @@ -292,6 +292,17 @@ struct kvm_guest_debug {
  struct kvm_guest_debug_arch arch;
  };
  
 +#define KVM_IOFD_FLAG_DEASSIGN  (1  0)
 +#define KVM_IOFD_FLAG_PIO   (1  1)
 +
 +struct kvm_iofd {
 +__u64 addr;
 +__u32 len;
 +__u32 fd;
 +__u32 flags;
 +__u8  pad[12];
 +};
 +
   
 Please add a data match capability.  virtio uses a write with the data
 containing the queue ID, and we want a separate event for each queue.

How about u64 cookie ?


   * kvm trace categories
 @@ -508,6 +519,7 @@ struct kvm_irqfd {
  #define KVM_DEASSIGN_DEV_IRQ   _IOW(KVMIO, 0x75, struct
 kvm_assigned_irq)
  #define KVM_ASSIGN_IRQFD   _IOW(KVMIO, 0x76, struct kvm_irqfd)
  #define KVM_DEASSIGN_IRQFD _IOW(KVMIO, 0x77, __u32)
 +#define KVM_IOFD   _IOW(KVMIO, 0x78, struct kvm_iofd)
   

 Too general a name.  It's not doing IO, just sending out notifications.

Hmm...good point.  I was trying to reflect [MM/P]IO-FD.  How about
IOSIGNALFD


 Why have assign/deassign for irqfd and a single ioctl for iofd?
Heh..  irqfd liked two because the deassign only needed a u32.  iofd
needed more or less the same structure for both so I guess I thought I
would be slick and condense the vectors.  Will fix so they are
symmetrical.


 The rest looks good.


I will also submit a patch to fix the io_bus stuff so that registrations
can gracefully fail instead of BUG_ON(), and to provide an unregister
function.

Thanks Avi,
-Greg



signature.asc
Description: OpenPGP digital signature

Re: [PATCH] don't use a 32-bit bit type as offset argument.

2009-05-14 Thread Avi Kivity


Glauber Costa wrote:

In the call path of kvm_get_dirty_pages_log_range(),
its caller kvm_get_dirty_bitmap_cb() passes the
target_phys_addr_t both as start_addr and the offset.
So, using int will make dirty tracking over 4G fail
completely.

  


Applied, thanks.


Of course we should be using qemu types in
here, so please don't get me started on this. The whole
file is wrong already ;)
  


These callbacks are called from libkvm, which doesn't know much about qemu.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [KVM PATCH v7 2/3] kvm: add support for irqfd via eventfd-notification interface

2009-05-14 Thread Avi Kivity


Gregory Haskins wrote:

Please fget() the new fd and compare the filps; fds aren't meaningful
in the kernel.  You can also drop _irqfd::fd.



I like this as a second option...

  

It may also be useful to compare the gsi, this allows a
make-before-break switchover:



...but I like this best.  Good idea.
  


I thought of comparing both.


- guest reroutes irq to a different gsi
- associate irqfd with new gsi
- disassociate irqfd from old gsi



+
+irqfd_release(irqfd);
+mutex_unlock(kvm-lock);
+return 0;
  
  

Don't return, userspace may have multiple associations?



Parse error.  Can you elaborate?

  


You break out of the look when you match your irqfd.  But there may be 
multiple matches.


Granted, it doesn't make much sense to hook the same fd to the same gsi 
multiple times (it may make sense to hook multiple fds to a single gsi, 
or maybe a single fd to multiple gsis), but it pays to have a consistent 
do-what-I-said-even-if-it-doesn't-make-sense interface.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [KVM PATCH v7 3/3] kvm: add iofd support

2009-05-14 Thread Avi Kivity


Gregory Haskins wrote:

+#define KVM_IOFD_FLAG_PIO   (1  1)
+
+struct kvm_iofd {
+__u64 addr;
+__u32 len;
+__u32 fd;
+__u32 flags;
+__u8  pad[12];
+};
+
  
  

Please add a data match capability.  virtio uses a write with the data
containing the queue ID, and we want a separate event for each queue.



How about u64 cookie ?
  


Sure, and a bit in flags to enable it.


  * kvm trace categories
@@ -508,6 +519,7 @@ struct kvm_irqfd {
 #define KVM_DEASSIGN_DEV_IRQ   _IOW(KVMIO, 0x75, struct
kvm_assigned_irq)
 #define KVM_ASSIGN_IRQFD   _IOW(KVMIO, 0x76, struct kvm_irqfd)
 #define KVM_DEASSIGN_IRQFD _IOW(KVMIO, 0x77, __u32)
+#define KVM_IOFD   _IOW(KVMIO, 0x78, struct kvm_iofd)
  
  

Too general a name.  It's not doing IO, just sending out notifications.



Hmm...good point.  I was trying to reflect [MM/P]IO-FD.  How about
IOSIGNALFD
  


Okay.


Why have assign/deassign for irqfd and a single ioctl for iofd?


Heh..  irqfd liked two because the deassign only needed a u32.  iofd
needed more or less the same structure for both so I guess I thought I
would be slick and condense the vectors.  Will fix so they are
symmetrical.
  


Yeah.  You could have both use just one, or both use two.  Not sure 
which is better.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: i8042.c: No controller found - no keyboard when I type in BIOS

2009-05-14 Thread Tomasz Chmielewski


Tomasz Chmielewski schrieb:

The keyboard is not present after I reboot the guest and usually type 
before Linux is started. It does not happen always.


Observed with kvm-83, kvm-84, kvm-85 on multiple KVM hosts (different 
hardware).


Anyone else seeing this? If you're not sure, do something like:


Looks I'm not alone here with this issue:

http://osdir.com/ml/fedora-virt/2009-04/msg00066.html


--
Tomasz Chmielewski
http://wpkg.org
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [KVM PATCH v7 2/3] kvm: add support for irqfd via eventfd-notification interface

2009-05-14 Thread Gregory Haskins

Avi Kivity wrote:
 Gregory Haskins wrote:
 Please fget() the new fd and compare the filps; fds aren't meaningful
 in the kernel.  You can also drop _irqfd::fd.
 

 I like this as a second option...

  
 It may also be useful to compare the gsi, this allows a
 make-before-break switchover:
 

 ...but I like this best.  Good idea.
   

 I thought of comparing both.

Ah, ok.  I misunderstood.  We can do that.

 - guest reroutes irq to a different gsi
 - associate irqfd with new gsi
 - disassociate irqfd from old gsi


 +
 +irqfd_release(irqfd);
 +mutex_unlock(kvm-lock);
 +return 0;
 
 Don't return, userspace may have multiple associations?
 

 Parse error.  Can you elaborate?

   

 You break out of the look when you match your irqfd.  But there may be
 multiple matches.

 Granted, it doesn't make much sense to hook the same fd to the same
 gsi multiple times (it may make sense to hook multiple fds to a single
 gsi, or maybe a single fd to multiple gsis), but it pays to have a
 consistent do-what-I-said-even-if-it-doesn't-make-sense interface.

Ack, will do.

-Greg




signature.asc
Description: OpenPGP digital signature

Re: i8042.c: No controller found - no keyboard when I type in BIOS

2009-05-14 Thread Tomasz Chmielewski


Tomasz Chmielewski schrieb:

Tomasz Chmielewski schrieb:

The keyboard is not present after I reboot the guest and usually type 
before Linux is started. It does not happen always.


Observed with kvm-83, kvm-84, kvm-85 on multiple KVM hosts (different 
hardware).


Anyone else seeing this? If you're not sure, do something like:


Looks I'm not alone here with this issue:

http://osdir.com/ml/fedora-virt/2009-04/msg00066.html


Seems to be qemu-related problem (I found more confirmations in the 
internet); reposting question to qemu-devel list.



--
Tomasz Chmielewski
http://wpkg.org

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: event injection MACROs

2009-05-14 Thread Dong, Eddie

Avi Kivity wrote:
 Dong, Eddie wrote:
 OK.
 Also back to Gleb's question, the reason I want to do that is to
 simplify event 
 generation mechanism in current KVM.
 
 Today KVM use additional layer of exception/nmi/interrupt such as
 vcpu.arch.exception.pending, vcpu-arch.interrupt.pending 
 vcpu-arch.nmi_injected. 
 All those additional layer is due to compete of
 VM_ENTRY_INTR_INFO_FIELD 
 write to inject the event. Both SVM  VMX has only one resource to
 inject the virtual event but KVM generates 3 catagory of events in
 parallel which further requires additional 
 logic to dictate among them.
 
 I thought of using a queue to hold all pending events (in a common
 format), sort it by priority, and inject the head.

The SDM Table 5-4 requires to merge 2 events together, i.e. convert to #DF/
Triple fault or inject serially when 2 events happens no matter NMI, IRQ or 
exception.

As if considering above events merging activity, that is a single element queue.
 We could have either:  1) A pure SW queue that will be flush to HW 
register later (VM_ENTRY_INTR_INFO_FIELD), 2) Direct use HW register.


A potential benefit is that it can avoid duplicated code and potential bugs
in current code as following patch shows if I understand correctly:

--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct
kvm_run *kvm_run)
cr2 = vmcs_readl(EXIT_QUALIFICATION);
KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
(u32)((u64)cr2  32), handler);
-   if (vcpu-arch.interrupt.pending || vcpu-arch.exception.pending
)
+   if (vcpu-arch.interrupt.pending || vcpu-arch.exception.pending
 || vcpu-arch.nmi_injected)
kvm_mmu_unprotect_page_virt(vcpu, cr2);
return kvm_mmu_page_fault(vcpu, cr2, error_code);
}


If using above merged SW queue or HW direct register, we can do like 
following:

--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct
kvm_run *kvm_run)
cr2 = vmcs_readl(EXIT_QUALIFICATION);
KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
(u32)((u64)cr2  32), handler);
-   if (vcpu-arch.interrupt.pending || vcpu-arch.exception.pending
)
+   if (vmcs_read(VM_ENTRY_INTR_INFO_FIELD)  INTR_INFO_VALID_MASK)
kvm_mmu_unprotect_page_virt(vcpu, cr2);
return kvm_mmu_page_fault(vcpu, cr2, error_code);
}

Either way are OK and up to you. BTW Xen uses HW register directly to 
representing
an pending event.

 
 One example is that exception has higher priority
 than NMI/IRQ injection in current code which is not true in reality.
 
 
 I don't think it matters in practice, since the guest will see it as a
 timing issue.  NMIs and IRQs are asynchronous (even those generated by
 the guest through the local APIC).

Yes. But also cause IRQ injection be delayed which may have side effect.
For example if guest exception handler is very longer or if guest VCPU fall into
recursive #GP. Within current logic, a guest IRQ event from KDB (IPI) running
on VCPU0, as an example, can't force the dead loop VCPU1 into KDB since it
is recursively #GP. 

 
 Another issue is that an failed event from previous injection say
 IRQ or NMI may be discarded if an virtual exception happens in the
 EXIT handling now. With the patch of generic double fault handling,
 this case should be handled as normally. 
 
 
 Discarding an exception is usually okay as it will be regenerated.  I
 don't think we discard interrupts or NMIs.
In reality (Running OS in guest), it doesn't happen so far. But 
architecturally, 
it could. For example KVM injects an IRQ, but VM Resume get #PF and 
back to KVM with IDT_VECTORING valid. Then KVM will put back the failed 
IRQ to interrupt queue. But if #PF handling generates another exception,
then the interrupt queue won't be able to be injected, since KVM inject 
exception first. And the interrupt queue is discarded at next VM Exit.

Overal, I think this is mostly for simplification but may benefit future
a lot. Especially with Gleb's recent cleanup, it soulds to be much easier to
do than before. 

I may make mistake here, will like to see more comments.
thx, eddie
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: event injection MACROs

2009-05-14 Thread Gleb Natapov

On Thu, May 14, 2009 at 09:43:33PM +0800, Dong, Eddie wrote:
 Avi Kivity wrote:
  Dong, Eddie wrote:
  OK.
  Also back to Gleb's question, the reason I want to do that is to
  simplify event 
  generation mechanism in current KVM.
  
  Today KVM use additional layer of exception/nmi/interrupt such as
  vcpu.arch.exception.pending, vcpu-arch.interrupt.pending 
  vcpu-arch.nmi_injected. 
  All those additional layer is due to compete of
  VM_ENTRY_INTR_INFO_FIELD 
  write to inject the event. Both SVM  VMX has only one resource to
  inject the virtual event but KVM generates 3 catagory of events in
  parallel which further requires additional 
  logic to dictate among them.
  
  I thought of using a queue to hold all pending events (in a common
  format), sort it by priority, and inject the head.
 
 The SDM Table 5-4 requires to merge 2 events together, i.e. convert to #DF/
 Triple fault or inject serially when 2 events happens no matter NMI, IRQ or 
 exception.
 
 As if considering above events merging activity, that is a single element 
 queue.
I don't know how you got to this conclusion from you previous statement.
See explanation to table 5-2 for instate where it is stated that
interrupt should be held pending if there is exception with higher
priority. Should be held pending where? In the queue, like we do. Note
that low prio exceptions are just dropped since they will be regenerated.

  We could have either:  1) A pure SW queue that will be flush to HW 
 register later (VM_ENTRY_INTR_INFO_FIELD), 2) Direct use HW register.
 
We have three event sources 1) exceptions 2) IRQ 3) NMI. We should have
queue of three elements sorted by priority. On each entry we should
inject an event with highest priority. And remove it from queue on exit.

 
 A potential benefit is that it can avoid duplicated code and potential bugs
 in current code as following patch shows if I understand correctly:
 
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, 
 struct
 kvm_run *kvm_run)
 cr2 = vmcs_readl(EXIT_QUALIFICATION);
 KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
 (u32)((u64)cr2  32), handler);
 -   if (vcpu-arch.interrupt.pending || 
 vcpu-arch.exception.pending
 )
 +   if (vcpu-arch.interrupt.pending || 
 vcpu-arch.exception.pending
  || vcpu-arch.nmi_injected)
 kvm_mmu_unprotect_page_virt(vcpu, cr2);
 return kvm_mmu_page_fault(vcpu, cr2, error_code);
 }
This fix is already in Avi's tree (not yet pushed).

 Either way are OK and up to you. BTW Xen uses HW register directly to 
 representing
 an pending event.
 
In this particular case I don't mind to use HW register either, but I
don't see any advantage.

  
  One example is that exception has higher priority
  than NMI/IRQ injection in current code which is not true in reality.
  
  
  I don't think it matters in practice, since the guest will see it as a
  timing issue.  NMIs and IRQs are asynchronous (even those generated by
  the guest through the local APIC).
 
 Yes. But also cause IRQ injection be delayed which may have side effect.
 For example if guest exception handler is very longer or if guest VCPU fall 
 into
 recursive #GP. Within current logic, a guest IRQ event from KDB (IPI) running
 on VCPU0, as an example, can't force the dead loop VCPU1 into KDB since it
 is recursively #GP.
If one #GP causes another #GP this is a #DF. If CPU has a chance to executes 
something in between KVM will have a chance to inject NMI.

 
  
  Another issue is that an failed event from previous injection say
  IRQ or NMI may be discarded if an virtual exception happens in the
  EXIT handling now. With the patch of generic double fault handling,
  this case should be handled as normally. 
  
  
  Discarding an exception is usually okay as it will be regenerated.  I
  don't think we discard interrupts or NMIs.
 In reality (Running OS in guest), it doesn't happen so far. But 
 architecturally, 
 it could. For example KVM injects an IRQ, but VM Resume get #PF and 
 back to KVM with IDT_VECTORING valid. Then KVM will put back the failed 
 IRQ to interrupt queue. But if #PF handling generates another exception,
 then the interrupt queue won't be able to be injected, since KVM inject 
 exception first. And the interrupt queue is discarded at next VM Exit.
 
I acknowledge the presence of the bug although I was not able to write a test 
case
to cause it yet, but it is easy to fix this without changing code too much. 
Unified event
queue and clearing of only injected event on exit should do the trick.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Unicode Error

2009-05-14 Thread Gilberto Nunes

Hi all

I'm newbie on list.
I have deploy a system here, with a Ubuntu Server running KVM.
Well, when I run virt-clone command, I get this error:

CMD: virt-clone -o vm01 -n VMUbuntu-2 -f /virt/ubuntu-2.img

RESULT:
Traceback (most recent call last):
  File /usr/lib/python2.6/logging/__init__.py, line 773, in emit
stream.write(fs % msg.encode(UTF-8))
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 14:
ordinal not in range(128)

I don't know if this is a issue of Ubuntu, libvirt (!)...

Someone can point a way to fix this issue...

Thanks...

Gilberto Nunes Ferreira
TI
Selbetti Gestão de Documentos
Telefone: +55 (47) 3441-6004
Celular: +55 (47) 8861-6672





--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Unicode Error

2009-05-14 Thread Javier Guerra

On Thu, May 14, 2009 at 9:16 AM, Gilberto Nunes
gilberto.nu...@selbetti.com.br wrote:
 Hi all

 I'm newbie on list.
 I have deploy a system here, with a Ubuntu Server running KVM.
 Well, when I run virt-clone command, I get this error:

 CMD: virt-clone -o vm01 -n VMUbuntu-2 -f /virt/ubuntu-2.img

 RESULT:
 Traceback (most recent call last):
  File /usr/lib/python2.6/logging/__init__.py, line 773, in emit
    stream.write(fs % msg.encode(UTF-8))
 UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 14:
 ordinal not in range(128)

 I don't know if this is a issue of Ubuntu, libvirt (!)...

 Someone can point a way to fix this issue...

it seems that at some point in libvirt (which is mostly written in
Python), it transcodes some info between ascii and UTF8.  some of that
info isn't valid 7-bit ASCII, probably some name.  it's safer to use
only ascii valid strings, both in names and paths.

of course, it should be reported as a bug to the libvirt people
(http://libvirt.org/bugs.html)

-- 
Javier
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: XP smp using a lot of CPU

2009-05-14 Thread Kevin Shanahan

On Wed, May 13, 2009 at 09:56:18AM +0300, Avi Kivity wrote:
 Ross Boylan wrote:
 I just installed XP into a new VM, specifying -smp 2 for the machine.
 According to top, it's using nearly 200% of a cpu even when I'm not
 doing anything.

 Is this real CPU useage, or just a reporting problem (just as my disk
 image is big according to ls, but isn't really)?

 If it's real, is there anything I can do about it?

 kvm 0.7.2 on Debian Lenny (but 2.6.29 kernel), amd64.  Xeon chips; 32
 bit version of XP pro installed, now fully patched (including the
 Windows Genuine Advantage stuff, though I cancelled it when it wanted to
 run).  

 Task manager in XP shows virtually no CPU useage.

 Please cc me on responses.

   

 I'm guessing Windows uses a pio port to sleep, which kvm doesn't  
 support.  Can you provide kvm_stat output?

Could this be what has happened to Windows 2000 as well? (kvm-Bugs-2314737)

Task manager in the guest shows both CPUs idle, but on the host it
shows 200% CPU almost constantly.

ucwb-0119:/home/kmshanah/kvm/kvm-85# ./kvm_stat -1
efer_reload0 0
exits 5454894602  4839
fpu_reload   5311150 5
halt_exits200719 1
halt_wakeup   200218 1
host_state_reload 1951410204  1869
hypercalls 0 0
insn_emulation1391377570  1393
insn_emulation_fail   52 0
invlpg  92034019 1
io_exits  1042421930   694
irq_exits 1545221935  1874
irq_injections 450100320   448
irq_window 225291775   245
kvm_request_irq0 0
largepages 0 0
mmio_exits484805 0
mmu_cache_miss  26346459 4
mmu_flooded 21532314 4
mmu_pde_zapped886970 0
mmu_pte_updated 52882039 4
mmu_pte_write   70044961 6
mmu_recycled1102 0
mmu_shadow_zapped   26384127 4
mmu_unsync  7671 0
mmu_unsync_global  0 0
nmi_injections 0 0
nmi_window 0 0
pf_fixed   421444565   167
pf_guest34545643 1
remote_tlb_flush   125039581 9
request_nmi0 0
signal_exits   1 0
tlb_flush  749126829   284

Regards,
Kevin.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: event injection MACROs

2009-05-14 Thread Dong, Eddie

Gleb Natapov wrote:
 On Thu, May 14, 2009 at 09:43:33PM +0800, Dong, Eddie wrote:
 Avi Kivity wrote:
 Dong, Eddie wrote:
 OK.
 Also back to Gleb's question, the reason I want to do that is to
 simplify event generation mechanism in current KVM.
 
 Today KVM use additional layer of exception/nmi/interrupt such as
 vcpu.arch.exception.pending, vcpu-arch.interrupt.pending 
 vcpu-arch.nmi_injected. All those additional layer is due to
 compete of VM_ENTRY_INTR_INFO_FIELD
 write to inject the event. Both SVM  VMX has only one resource to
 inject the virtual event but KVM generates 3 catagory of events in
 parallel which further requires additional
 logic to dictate among them.
 
 I thought of using a queue to hold all pending events (in a common
 format), sort it by priority, and inject the head.
 
 The SDM Table 5-4 requires to merge 2 events together, i.e. convert
 to #DF/ 
 Triple fault or inject serially when 2 events happens no matter NMI,
 IRQ or exception. 
 
 As if considering above events merging activity, that is a single
 element queue. 
 I don't know how you got to this conclusion from you previous
 statement. 
 See explanation to table 5-2 for instate where it is stated that
 interrupt should be held pending if there is exception with higher
 priority. Should be held pending where? In the queue, like we do. Note
 that low prio exceptions are just dropped since they will be
 regenerated. 

I have different understanding here.
My understanding is that held means NO INTA in HW, i.e. LAPIC still hold this 
IRQ.

 
  We could have either:  1) A pure SW queue that will be flush to HW
 register later (VM_ENTRY_INTR_INFO_FIELD), 2) Direct use HW register.
 
 We have three event sources 1) exceptions 2) IRQ 3) NMI. We should
 have 
 queue of three elements sorted by priority. On each entry we should

Table 5-4 alreadys says NMI/IRQ is BENIGN.

 inject an event with highest priority. And remove it from queue on
 exit. 

The problem is that we have to decide to inject only one of above 3, and 
discard the rest.
Whether priority them or merge (to one event as Table 5-4) is another story.

 
 
 A potential benefit is that it can avoid duplicated code and
 potential bugs 
 in current code as following patch shows if I understand correctly:
 
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu
 *vcpu, struct kvm_run *kvm_run) cr2 =
 vmcs_readl(EXIT_QUALIFICATION);
 KVMTRACE_3D(PAGE_FAULT, vcpu,
 error_code, (u32)cr2, (u32)((u64)cr2  32), handler); -
 if (vcpu-arch.interrupt.pending || vcpu-arch.exception.pending ) +
 if (vcpu-arch.interrupt.pending ||
 vcpu-arch.exception.pending  ||
 vcpu-arch.nmi_injected) kvm_mmu_unprotect_page_virt(vcpu,
 cr2); return kvm_mmu_page_fault(vcpu, cr2, error_code); } 
 This fix is already in Avi's tree (not yet pushed).
 
 Either way are OK and up to you. BTW Xen uses HW register directly
 to representing 
 an pending event.
 
 In this particular case I don't mind to use HW register either, but I
 don't see any advantage.
 
 
 One example is that exception has higher priority
 than NMI/IRQ injection in current code which is not true in
 reality. 
 
 
 I don't think it matters in practice, since the guest will see it
 as a timing issue.  NMIs and IRQs are asynchronous (even those
 generated by the guest through the local APIC).
 
 Yes. But also cause IRQ injection be delayed which may have side
 effect. 
 For example if guest exception handler is very longer or if guest
 VCPU fall into recursive #GP. Within current logic, a guest IRQ
 event from KDB (IPI) running 
 on VCPU0, as an example, can't force the dead loop VCPU1 into KDB
 since it 
 is recursively #GP.
 If one #GP causes another #GP this is a #DF. If CPU has a chance to

Means another #GP in next instruction i.e. Beginning of #GP handler in guest.
No #DF here.

 executes 
 something in between KVM will have a chance to inject NMI.

Could have no chance in some cases though not very common.

 
 
 
 Another issue is that an failed event from previous injection say
 IRQ or NMI may be discarded if an virtual exception happens in the
 EXIT handling now. With the patch of generic double fault handling,
 this case should be handled as normally.
 
 
 Discarding an exception is usually okay as it will be regenerated. 
 I don't think we discard interrupts or NMIs.
 In reality (Running OS in guest), it doesn't happen so far. But
 architecturally, 
 it could. For example KVM injects an IRQ, but VM Resume get #PF and
 back to KVM with IDT_VECTORING valid. Then KVM will put back the
 failed 
 IRQ to interrupt queue. But if #PF handling generates another
 exception, 
 then the interrupt queue won't be able to be injected, since KVM
 inject 
 exception first. And the interrupt queue is discarded at next VM
 Exit. 
 
 I acknowledge the presence of the bug

Re: [PATCH v4 resend 5/6] VT-d: cleanup iommu_flush_iotlb_psi and flush_unmaps

2009-05-14 Thread David Woodhouse

On Thu, 2009-05-14 at 10:32 +0800, Yu Zhao wrote:
 Make iommu_flush_iotlb_psi() and flush_unmaps() more readable.

This doesn't apply any more.

-- 
David WoodhouseOpen Source Technology Centre
david.woodho...@intel.com  Intel Corporation

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: XP smp using a lot of CPU

2009-05-14 Thread Ross Boylan

On Thu, 2009-05-14 at 12:19 +0200, Johannes Schlatow wrote:
 I had a similar problem some weeks ago. Finally I found out that my VM
 running WinXP was working on a non-acpi system (maybe I started kvm
 with -no-acpi option during the installation). In the Device Manager
 there has to be the entry Computer-ACPI Multiprocessor PC.
 Otherwise the VM produced 100% real cpu load on my machines (the fans
 were running on highest speed level).
 I just started the WinXP installation in repair mode and this did fix
 the problem.
 
 I hope this helps!
 
 regards
   Johannes
That may be it: I was running with -no-acpi.  Various docs recommended
this for Windows performance, but your comment reminded me that acpi is
(I think) required for multiprocessors.

I'll be in where I can check on this later today.

Thanks.
Ross
 
 On Wed, May 13, 2009 at 2:41 AM, Ross Boylan r...@biostat.ucsf.edu
 wrote:
 I just installed XP into a new VM, specifying -smp 2 for the
 machine.
 According to top, it's using nearly 200% of a cpu even when
 I'm not
 doing anything.
 
 Is this real CPU useage, or just a reporting problem (just as
 my disk
 image is big according to ls, but isn't really)?
 
 If it's real, is there anything I can do about it?
 
 kvm 0.7.2 on Debian Lenny (but 2.6.29 kernel), amd64.  Xeon
 chips; 32
 bit version of XP pro installed, now fully patched (including
 the
 Windows Genuine Advantage stuff, though I cancelled it when it
 wanted to
 run).
 
 Task manager in XP shows virtually no CPU useage.
 
 Please cc me on responses.
 
 Thanks for any assistance.
 --
 Ross Boylan  wk:  (415)
 514-8146
 185 Berry St #5700
 r...@biostat.ucsf.edu
 Dept of Epidemiology and Biostatistics   fax: (415)
 514-8150
 University of California, San Francisco
 San Francisco, CA 94107-1739 hm:  (415)
 550-1062
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm
 in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at
  http://vger.kernel.org/majordomo-info.html
 
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: event injection MACROs

2009-05-14 Thread Gleb Natapov

On Thu, May 14, 2009 at 10:34:11PM +0800, Dong, Eddie wrote:
 Gleb Natapov wrote:
  On Thu, May 14, 2009 at 09:43:33PM +0800, Dong, Eddie wrote:
  Avi Kivity wrote:
  Dong, Eddie wrote:
  OK.
  Also back to Gleb's question, the reason I want to do that is to
  simplify event generation mechanism in current KVM.
  
  Today KVM use additional layer of exception/nmi/interrupt such as
  vcpu.arch.exception.pending, vcpu-arch.interrupt.pending 
  vcpu-arch.nmi_injected. All those additional layer is due to
  compete of VM_ENTRY_INTR_INFO_FIELD
  write to inject the event. Both SVM  VMX has only one resource to
  inject the virtual event but KVM generates 3 catagory of events in
  parallel which further requires additional
  logic to dictate among them.
  
  I thought of using a queue to hold all pending events (in a common
  format), sort it by priority, and inject the head.
  
  The SDM Table 5-4 requires to merge 2 events together, i.e. convert
  to #DF/ 
  Triple fault or inject serially when 2 events happens no matter NMI,
  IRQ or exception. 
  
  As if considering above events merging activity, that is a single
  element queue. 
  I don't know how you got to this conclusion from you previous
  statement. 
  See explanation to table 5-2 for instate where it is stated that
  interrupt should be held pending if there is exception with higher
  priority. Should be held pending where? In the queue, like we do. Note
  that low prio exceptions are just dropped since they will be
  regenerated. 
 
 I have different understanding here.
 My understanding is that held means NO INTA in HW, i.e. LAPIC still hold 
 this IRQ.
 
And what if INTA already happened and CPU is ready to fetch IDT for
interrupt vector and at this very moment CPU faults?

  
   We could have either:  1) A pure SW queue that will be flush to HW
  register later (VM_ENTRY_INTR_INFO_FIELD), 2) Direct use HW register.
  
  We have three event sources 1) exceptions 2) IRQ 3) NMI. We should
  have 
  queue of three elements sorted by priority. On each entry we should
 
 Table 5-4 alreadys says NMI/IRQ is BENIGN.
Table 5-2 applies here not table 5-4 I think.

 
  inject an event with highest priority. And remove it from queue on
  exit. 
 
 The problem is that we have to decide to inject only one of above 3, and 
 discard the rest.
 Whether priority them or merge (to one event as Table 5-4) is another story.
Only a small number of event are merged into #DF. Most handled serially
(SDM does not define what serially means unfortunately), so I don't
understand where discard the rest is come from. We can discard
exception since it will be regenerated anyway, but IRQ and NMI is
another story. SDM says that IRQ should be held pending (once again not
much explanation here), nothing about NMI.

  
  
  A potential benefit is that it can avoid duplicated code and
  potential bugs 
  in current code as following patch shows if I understand correctly:
  
  --- a/arch/x86/kvm/vmx.c
  +++ b/arch/x86/kvm/vmx.c
  @@ -2599,7 +2599,7 @@ static int handle_exception(struct kvm_vcpu
  *vcpu, struct kvm_run *kvm_run) cr2 =
  vmcs_readl(EXIT_QUALIFICATION);
  KVMTRACE_3D(PAGE_FAULT, vcpu,
  error_code, (u32)cr2, (u32)((u64)cr2  32), handler); -
  if (vcpu-arch.interrupt.pending || vcpu-arch.exception.pending ) +
  if (vcpu-arch.interrupt.pending ||
  vcpu-arch.exception.pending  ||
  vcpu-arch.nmi_injected) kvm_mmu_unprotect_page_virt(vcpu,
  cr2); return kvm_mmu_page_fault(vcpu, cr2, error_code); } 
  This fix is already in Avi's tree (not yet pushed).
  
  Either way are OK and up to you. BTW Xen uses HW register directly
  to representing 
  an pending event.
  
  In this particular case I don't mind to use HW register either, but I
  don't see any advantage.
  
  
  One example is that exception has higher priority
  than NMI/IRQ injection in current code which is not true in
  reality. 
  
  
  I don't think it matters in practice, since the guest will see it
  as a timing issue.  NMIs and IRQs are asynchronous (even those
  generated by the guest through the local APIC).
  
  Yes. But also cause IRQ injection be delayed which may have side
  effect. 
  For example if guest exception handler is very longer or if guest
  VCPU fall into recursive #GP. Within current logic, a guest IRQ
  event from KDB (IPI) running 
  on VCPU0, as an example, can't force the dead loop VCPU1 into KDB
  since it 
  is recursively #GP.
  If one #GP causes another #GP this is a #DF. If CPU has a chance to
 
 Means another #GP in next instruction i.e. Beginning of #GP handler in guest.
 No #DF here.
 
In this case we will enter guest with NMI windows open request and
should exit immediately before first instruction of #GP handler. At this
moment KVM will be able to inject NMI.

  executes 
  something in between KVM will have a chance to inject NMI.
 
 Could have no chance

Re: [PATCH] kvm: user: include arch specific headers from $(KERNELDIR)

2009-05-14 Thread Arnd Bergmann

On Thursday 14 May 2009, Avi Kivity wrote:
 
 There aren't the real kernel headers, just cheap copies carried in 
 qemu-kvm.git which have been appropriately postprocessed.  We do this 
 since the kvm external module can run on a much older kernel, so there 
 is no natural place to find it headers.
 

Sorry for the confusion on my part. I was aware of the sanitized
kernel headers, but was mislead by the line

kerneldir=/lib/modules/$(uname -r)/build

in kvm/user/configure. What I didn't realize is that this
always gets overridden by kvm/configure.
Maybe we can change the default in kvm/user/configure to
something more sensible:
---
[PATCH] kvm: user: fix default kerneldir

calling ./configure in kvm/user sets the kerneldir to the
currently running kernel, which is incorrect for user code.
This changes the default to the sanitized header files from
the kvm/kernel directory.

Signed-off-by: Arnd Bergmann a...@arndb.de

diff --git a/kvm/user/configure b/kvm/user/configure
index efb8705..858a519 100755
--- a/kvm/user/configure
+++ b/kvm/user/configure
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 prefix=/usr/local
-kerneldir=/lib/modules/$(uname -r)/build
+kerneldir=$(dirname $0)/../kernel
 cc=gcc
 ld=ld
 objcopy=objcopy
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [KVM PATCH v7 2/3] kvm: add support for irqfd via eventfd-notification interface

2009-05-14 Thread Gregory Haskins

Avi Kivity wrote:
 Gregory Haskins wrote:
 KVM provides a complete virtual system environment for guests, including
 support for injecting interrupts modeled after the real
 exception/interrupt
 facilities present on the native platform (such as the IDT on x86).
 Virtual interrupts can come from a variety of sources (emulated devices,
 pass-through devices, etc) but all must be injected to the guest via
 the KVM infrastructure.  This patch adds a new mechanism to inject a
 specific
 interrupt to a guest using a decoupled eventfd mechnanism:  Any legal
 signal
 on the irqfd (using eventfd semantics from either userspace or
 kernel) will
 translate into an injected interrupt in the guest at the next available
 interrupt window.

 +
 +static void
 +irqfd_inject(struct work_struct *work)
 +{
 +struct _irqfd *irqfd = container_of(work, struct _irqfd, work);
 +struct kvm *kvm = irqfd-kvm;
 +
   


 I think you need to -read() from the irqfd, otherwise the count will
 never clear.

Yeah, and this is a disavantage to using eventfd vs a custom anon-fd
implementation.

However, the count is really only there for deciding whether to sleep a
traditional eventfd recipient which doesn't really apply in this
application.  I suppose we could try to invoke the read method (or add a
new method to eventfd to allow it to be cleared independent of the
f_ops-read() (ala eventfd_signal() vs f_ops-write()).  I'm not
convinced we really need to worry about it, though.  IMO we can just let
the count accumulate.

But if you insist this loose end should be addressed, perhaps Davide has
some thoughts on how to best do this?

-Greg


 +mutex_lock(kvm-lock);
 +kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 1);
 +kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0);
 +mutex_unlock(kvm-lock);
 +}
   





signature.asc
Description: OpenPGP digital signature

Re: [PATCH] kvm: user: include arch specific headers from $(KERNELDIR)

2009-05-14 Thread Avi Kivity


Arnd Bergmann wrote:

On Thursday 14 May 2009, Avi Kivity wrote:
  
There aren't the real kernel headers, just cheap copies carried in 
qemu-kvm.git which have been appropriately postprocessed.  We do this 
since the kvm external module can run on a much older kernel, so there 
is no natural place to find it headers.



Sorry for the confusion on my part. I was aware of the sanitized
kernel headers, but was mislead by the line

kerneldir=/lib/modules/$(uname -r)/build

in kvm/user/configure. What I didn't realize is that this
always gets overridden by kvm/configure.
Maybe we can change the default in kvm/user/configure to
something more sensible:
---
[PATCH] kvm: user: fix default kerneldir

calling ./configure in kvm/user sets the kerneldir to the
currently running kernel, which is incorrect for user code.
This changes the default to the sanitized header files from
the kvm/kernel directory.

Signed-off-by: Arnd Bergmann a...@arndb.de

diff --git a/kvm/user/configure b/kvm/user/configure
index efb8705..858a519 100755
--- a/kvm/user/configure
+++ b/kvm/user/configure
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 prefix=/usr/local

-kerneldir=/lib/modules/$(uname -r)/build
+kerneldir=$(dirname $0)/../kernel
 cc=gcc
 ld=ld
 objcopy=objcopy
  


I usually add a readlink -f in there due to my innate fear of relative 
directories and cd.


btw, these are my plans for kvm/user:

- convert the tests to be loadable with qemu -kernel; we lose the 
simplicity of kvmctl so I'm not 100% sure it's a good idea.  On the 
other hand some of the tests are useful for tcg.
- kill kvmtrace (replaced by the standard ftrace tools, whatever they 
are; maybe create a new repo if kvm specific tools are needed)


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] kvm: user: include arch specific headers from $(KERNELDIR)

2009-05-14 Thread Arnd Bergmann

On Thursday 14 May 2009, Avi Kivity wrote:
 I usually add a readlink -f in there due to my innate fear of relative 
 directories and cd.

There is one already in the only place where this gets used:

KERNELDIR=$(readlink -f $kerneldir)

It also gets shown in the configure --help output, but I suppose
showing the relative path there may be helpful because of its
brevity.

Arnd 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

profiling virtio (blk in this case)

2009-05-14 Thread Eran Rom

The Host--Guest calls in virtio_blk (using the generic virtio kick/notify)
are as follows:

Guest-Host
---
do_virtlkb_request calls kick in the guest side causing handle_output to be
called on the host side.

Host-Guest
---
virtio_blk_rw_complete calls notify in the host side causing block_done to be
called on the guest side

My question has to do with the timing of the calls.
Which would be the correct drawing
1. Overlapping:
kick ||
handle_output ||
2. Disjoint:
kick ||
handle_output||

In other words:
if I do
g1 = get_cpu_cycles
kick
g2 = get_cpu_cycles
and 
h1 = get_cpu_cycles
handle_output
h2 = get_cpu_cycles
would (g2-g1) + (h2-h2) count some cycles twice?

Same question for notify and block_done.

Thanks very much,
Eran


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] bios: Fix MADT corruption and RSDT size when using -acpitable

2009-05-14 Thread Beth Kon


Anthony Liguori wrote:

Vincent Minet wrote:

External ACPI tables are counted twice for the RSDT size and the load
address for the first external table is in the MADT (interrupt override
entries are overwritten).

Signed-off-by: Vincent Minet vinc...@vincent-minet.net
  


Beth,

I think you had a patch attempting to address the same issue.  It was 
a bit more involved though.


Which is the proper fix and are they both to the same problem?
They are for 2 different bases. My patch was for qemu's bochs bios and 
this is for qemu-kvm/kvm/bios/rombios32.c. They are pretty divergent in 
this area of setting up the ACPI tables. My patch is still needed for 
the qemu base. I hope we'll be getting to one base soon :-)


Assuming the intent of the code was for MAX_RSDT_ENTRIES to include 
external_tables, this patch looks correct. I think one additional check 
would be needed (in my patch) to make sure that the code doesn't exceed 
MAX_RSDT_ENTRIES when the external tables are being loaded.


My patch also puts all the code that calculates madt_size in the same 
place, at the beginning of the table layout. I believe this is neater 
and will avoid problems like this one in the future. As much as 
possible, I think it best to get all the tables layed out, then fill 
them in. If for some reason this is not acceptable, we need to add a big 
note that no tables should be layed out after the madt because the madt 
may grow further down in the code and overwrite the other table.







Regards,

Anthony Liguori


---
 kvm/bios/rombios32.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/kvm/bios/rombios32.c b/kvm/bios/rombios32.c
index cbd5f15..289361b 100755
--- a/kvm/bios/rombios32.c
+++ b/kvm/bios/rombios32.c
@@ -1626,7 +1626,7 @@ void acpi_bios_init(void)
 addr = base_addr = ram_size - ACPI_DATA_SIZE;
 rsdt_addr = addr;
 rsdt = (void *)(addr);
-rsdt_size = sizeof(*rsdt) + external_tables * 4;
+rsdt_size = sizeof(*rsdt);
 addr += rsdt_size;
 
 fadt_addr = addr;

@@ -1787,6 +1787,7 @@ void acpi_bios_init(void)
 }
 int_override++;
 madt_size += sizeof(struct madt_int_override);
+addr += sizeof(struct madt_int_override);
 }
 acpi_build_table_header((struct acpi_table_header *)madt,
 APIC, madt_size, 1);
  


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


diff --git a/kvm/bios/rombios32.c b/kvm/bios/rombios32.c
index cbd5f15..23835b6 100755
--- a/kvm/bios/rombios32.c
+++ b/kvm/bios/rombios32.c
@@ -1626,7 +1626,7 @@ void acpi_bios_init(void)
 addr = base_addr = ram_size - ACPI_DATA_SIZE;
 rsdt_addr = addr;
 rsdt = (void *)(addr);
-rsdt_size = sizeof(*rsdt) + external_tables * 4;
+rsdt_size = sizeof(*rsdt);
 addr += rsdt_size;
 
 fadt_addr = addr;
@@ -1665,6 +1665,7 @@ void acpi_bios_init(void)
 
 addr = (addr + 7)  ~7;
 madt_addr = addr;
+madt = (void *)(addr);
 madt_size = sizeof(*madt) +
 sizeof(struct madt_processor_apic) * MAX_CPUS +
 #ifdef BX_QEMU
@@ -1672,7 +1673,11 @@ void acpi_bios_init(void)
 #else
 sizeof(struct madt_io_apic);
 #endif
-madt = (void *)(addr);
+for ( i = 0; i  16; i++ ) {
+if ( PCI_ISA_IRQ_MASK  (1U  i) ) {
+madt_size += sizeof(struct madt_int_override);
+}
+}
 addr += madt_size;
 
 #ifdef BX_QEMU
@@ -1786,7 +1791,6 @@ void acpi_bios_init(void)
 continue;
 }
 int_override++;
-madt_size += sizeof(struct madt_int_override);
 }
 acpi_build_table_header((struct acpi_table_header *)madt,
 APIC, madt_size, 1);
@@ -1868,17 +1872,6 @@ void acpi_bios_init(void)
 acpi_build_table_header((struct  acpi_table_header *)hpet,
  HPET, sizeof(*hpet), 1);
 #endif
-
-acpi_additional_tables(); /* resets cfg to required entry */
-for(i = 0; i  external_tables; i++) {
-uint16_t len;
-if(acpi_load_table(i, addr, len)  0)
-BX_PANIC(Failed to load ACPI table from QEMU\n);
-rsdt-table_offset_entry[nb_rsdt_entries++] = cpu_to_le32(addr);
-addr += len;
-if(addr = ram_size)
-BX_PANIC(ACPI table overflow\n);
-}
 #endif
 
 /* RSDT */
@@ -1891,6 +1884,16 @@ void acpi_bios_init(void)
 //  rsdt-table_offset_entry[nb_rsdt_entries++] = cpu_to_le32(hpet_addr);
 if (nb_numa_nodes  0)
 rsdt-table_offset_entry[nb_rsdt_entries++] = cpu_to_le32(srat_addr);
+acpi_additional_tables(); /* resets cfg to required entry */
+for(i = 0; i  external_tables; i++) {
+uint16_t len;
+if(acpi_load_table(i, addr, len)  0)
+BX_PANIC(Failed to load ACPI table from QEMU\n);
+

[KVM PATCH v8] kvm: add support for irqfd via eventfd-notification interface

2009-05-14 Thread Gregory Haskins

KVM provides a complete virtual system environment for guests, including
support for injecting interrupts modeled after the real exception/interrupt
facilities present on the native platform (such as the IDT on x86).
Virtual interrupts can come from a variety of sources (emulated devices,
pass-through devices, etc) but all must be injected to the guest via
the KVM infrastructure.  This patch adds a new mechanism to inject a specific
interrupt to a guest using a decoupled eventfd mechnanism:  Any legal signal
on the irqfd (using eventfd semantics from either userspace or kernel) will
translate into an injected interrupt in the guest at the next available
interrupt window.

Signed-off-by: Gregory Haskins ghask...@novell.com
---

 arch/x86/kvm/Makefile|2 
 arch/x86/kvm/x86.c   |1 
 include/linux/kvm.h  |   11 +++
 include/linux/kvm_host.h |4 +
 virt/kvm/eventfd.c   |  198 ++
 virt/kvm/kvm_main.c  |   11 +++
 6 files changed, 226 insertions(+), 1 deletions(-)
 create mode 100644 virt/kvm/eventfd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index b43c4ef..4d50904 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -3,7 +3,7 @@
 #
 
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-coalesced_mmio.o irq_comm.o)
+coalesced_mmio.o irq_comm.o eventfd.o)
 ifeq ($(CONFIG_KVM_TRACE),y)
 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c6d3ff3..1d062eb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1026,6 +1026,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_REINJECT_CONTROL:
case KVM_CAP_IRQ_INJECT_STATUS:
case KVM_CAP_ASSIGN_DEV_IRQ:
+   case KVM_CAP_IRQFD:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 3db5d8d..a1ecc6a 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -415,6 +415,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
+#define KVM_CAP_IRQFD 31
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -454,6 +455,15 @@ struct kvm_irq_routing {
 
 #endif
 
+#define KVM_IRQFD_FLAG_DEASSIGN (1  0)
+
+struct kvm_irqfd {
+   __u32 fd;
+   __u32 gsi;
+   __u32 flags;
+   __u8  pad[20];
+};
+
 /*
  * ioctls for VM fds
  */
@@ -498,6 +508,7 @@ struct kvm_irq_routing {
 #define KVM_ASSIGN_SET_MSIX_ENTRY \
_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
 #define KVM_DEASSIGN_DEV_IRQ   _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
+#define KVM_IRQFD  _IOW(KVMIO, 0x76, struct kvm_irqfd)
 
 /*
  * ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2b8df0c..dc91610 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -134,6 +134,7 @@ struct kvm {
struct list_head vm_list;
struct kvm_io_bus mmio_bus;
struct kvm_io_bus pio_bus;
+   struct list_head irqfds;
struct kvm_vm_stat stat;
struct kvm_arch arch;
atomic_t users_count;
@@ -525,4 +526,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 
 #endif
 
+int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
+void kvm_irqfd_release(struct kvm *kvm);
+
 #endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
new file mode 100644
index 000..884df16
--- /dev/null
+++ b/virt/kvm/eventfd.c
@@ -0,0 +1,198 @@
+/*
+ * kvm eventfd support - use eventfd objects to signal various KVM events
+ *
+ * Copyright 2009 Novell.  All Rights Reserved.
+ *
+ * Author:
+ * Gregory Haskins ghask...@novell.com
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include linux/kvm_host.h
+#include linux/workqueue.h
+#include linux/syscalls.h
+#include linux/wait.h
+#include linux/poll.h
+#include linux/file.h
+#include linux/list.h
+
+/*
+ * 
+ * irqfd: Allows an fd to be used to inject an interrupt to the guest
+ *
+ * Credit goes to Avi Kivity for the original idea.
+ *

[PATCH v8] qemu-kvm: add irqfd support

2009-05-14 Thread Gregory Haskins

irqfd lets you create an eventfd based file-desriptor to inject interrupts
to a kvm guest.  We associate one gsi per fd for fine-grained routing.

Signed-off-by: Gregory Haskins ghask...@novell.com
---

 kvm/libkvm/libkvm.c |   57 +++
 kvm/libkvm/libkvm.h |   26 +++
 2 files changed, 83 insertions(+), 0 deletions(-)

diff --git a/kvm/libkvm/libkvm.c b/kvm/libkvm/libkvm.c
index ba0a5d1..ccab985 100644
--- a/kvm/libkvm/libkvm.c
+++ b/kvm/libkvm/libkvm.c
@@ -34,6 +34,7 @@
 #include string.h
 #include errno.h
 #include sys/ioctl.h
+#include sys/eventfd.h
 #include inttypes.h
 #include libkvm.h
 
@@ -1444,3 +1445,59 @@ int kvm_assign_set_msix_entry(kvm_context_t kvm,
 return ret;
 }
 #endif
+
+#ifdef KVM_CAP_IRQFD
+static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
+{
+   int r;
+   struct kvm_irqfd data = {
+   .fd= fd,
+   .gsi   = gsi,
+   .flags = flags,
+   };
+
+   r = ioctl(kvm-vm_fd, KVM_IRQFD, data);
+   if (r == -1)
+   r = -errno;
+   return r;
+}
+
+int kvm_create_irqfd(kvm_context_t kvm, int gsi, int flags)
+{
+   int r;
+   int fd;
+
+   if (!kvm_check_extension(kvm, KVM_CAP_IRQFD))
+   return -ENOENT;
+
+   fd = eventfd(0, 0);
+   if (fd  0)
+   return -errno;
+
+   r = _kvm_irqfd(kvm, fd, gsi, 0);
+   if (r  0) {
+   close(fd);
+   return -errno;
+   }
+
+   return fd;
+}
+
+int kvm_destroy_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
+{
+   return _kvm_irqfd(kvm, fd, gsi, KVM_IRQFD_FLAG_DEASSIGN);
+}
+
+#else /* KVM_CAP_IRQFD */
+
+int kvm_create_irqfd(kvm_context_t kvm, int gsi, int flags)
+{
+   return -ENOENT;
+}
+
+int kvm_destroy_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
+{
+   return -ENOENT;
+}
+
+#endif /* KVM_CAP_IRQFD */
diff --git a/kvm/libkvm/libkvm.h b/kvm/libkvm/libkvm.h
index 4821a1e..3ccbe3d 100644
--- a/kvm/libkvm/libkvm.h
+++ b/kvm/libkvm/libkvm.h
@@ -856,6 +856,32 @@ int kvm_commit_irq_routes(kvm_context_t kvm);
  */
 int kvm_get_irq_route_gsi(kvm_context_t kvm);
 
+/*!
+ * \brief Create a file descriptor for injecting interrupts
+ *
+ * Creates an eventfd based file-descriptor that maps to a specific GSI
+ * in the guest.  eventfd compliant signaling (write() from userspace, or
+ * eventfd_signal() from kernelspace) will cause the GSI to inject
+ * itself into the guest at the next available window.
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param gsi GSI to assign to this fd
+ * \param flags reserved, must be zero
+ */
+int kvm_create_irqfd(kvm_context_t kvm, int gsi, int flags);
+
+/*!
+ * \brief Destroy an irqfd file descriptor
+ *
+ * Destroys a file descriptor previously opened with kvm_create_irqfd()
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param fd fd to close
+ * \param gsi GSI to close
+ * \param flags reserved, must be zero
+ */
+int kvm_destroy_irqfd(kvm_context_t kvm, int fd, int gsi, int flags);
+
 #ifdef KVM_CAP_DEVICE_MSIX
 int kvm_assign_set_msix_nr(kvm_context_t kvm,
   struct kvm_assigned_msix_nr *msix_nr);

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: Status of pci passthrough work?

2009-05-14 Thread Passera, Pablo R

Amit,
I trying to use PVDMA. I've downloaded a kernel snapshot from the your 
kvm git, but I couldn't download a snapshot or the repo from your kvm-userspace 
tree. I tried to launch the VM using kvm-85 user space but it hangs before 
loading it. Should it work with kvm-85 user space? Do you have the userspace 
patches for PVDMA?

Thanks,
Pablo

-Original Message-
From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On
Behalf Of Amit Shah
Sent: Tuesday, December 16, 2008 12:07 PM
To: xming
Cc: Thomas Fjellstrom; kvm@vger.kernel.org
Subject: Re: Status of pci passthrough work?

Hello,

- xming xming...@gmail.com wrote:

 When can we expect pvdma updates? Is it ever going to be merged into
 mainline kvm?

The pvdma tree at

http://git.kernel.org/?p=linux/kernel/git/amit/kvm.git;a=shortlog;h=pvdm
a

is based of an older Linux version.

It's usable; but not ported to newer kernel versions. I can't say when
I'll get around doing it. In the meanwhile if someone else is
interested, drop me a line.

Amit.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] Re: Question about KVM and PC speaker

2009-05-14 Thread Sebastian Herbszt


malc wrote:

On Wed, 13 May 2009, Sebastian Herbszt wrote:


Jan Kiszka wrote:
 Moreover, does sound work at all with your qemu?
 The image I tried [1] issues two beeps after loading (obviously via
 direct hw access) - a good way to check general support. Note that one
 reason for broken host sound with qemu can be OSS. For that reason I
 always configure my qemu with --audio-drv-list=alsa.

Thats a good hint :)
Seems i used to compile qemu without --audio-drv-list. Since dsound and
fmod drivers don't compile here (i likely miss some libs in my mingw), i
used sdl.


Don't do that. Here's a nice tutorial Kazu made that will probably help 
you: http://www.h7.dion.ne.jp/~qemu-win/Audio-en.html


So you're saying the use of sdl for audio is not recommended?


Now i can hear those two beeps with the image you suggested. Tho those are
coming
thru my sound card and not the hosts pc speaker (even with -soundhw pcspk,
but maybe
that option means something different).


And it will always come through your soundcard. pcspk is not a passthrough
thing.


Thanks for the clarification.

 With INT 10h AH=0Eh i now can hear a beep too, but it doesn't stop and qemu

somewhat freezes.


Huh?


With this INT 10h function qemu should beep once, but it does loop the beep 
infinitely.
Normally i can exit qemu by clicking on the [x] window close icon, but while 
it does
endlessly beep that doesn't work (vista says process doesn't respond). Using 
quit in the
monitor window doesn't work either.

- Sebastian

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: XP smp using a lot of CPU

2009-05-14 Thread Ross Boylan

On Wed, 2009-05-13 at 09:56 +0300, Avi Kivity wrote:
 Ross Boylan wrote:
  I just installed XP into a new VM, specifying -smp 2 for the machine.
  According to top, it's using nearly 200% of a cpu even when I'm not
  doing anything.
 
  Is this real CPU useage, or just a reporting problem (just as my disk
  image is big according to ls, but isn't really)?
 
  If it's real, is there anything I can do about it?
 
  kvm 0.7.2 on Debian Lenny (but 2.6.29 kernel), amd64.  Xeon chips; 32
  bit version of XP pro installed, now fully patched (including the
  Windows Genuine Advantage stuff, though I cancelled it when it wanted to
  run).  
 
  Task manager in XP shows virtually no CPU useage.
 
  Please cc me on responses.
 

 
 I'm guessing Windows uses a pio port to sleep, which kvm doesn't 
 support.  Can you provide kvm_stat output?
markov:~# kvm_stat -1
efer_reload0 0
exits9921384   566
fpu_reload267970 0
halt_exits 1 0
halt_wakeup3 0
host_state_reload402605017
hypercalls 0 0
insn_emulation   1329455 0
insn_emulation_fail  154 0
invlpg176773 0
io_exits 3818270 0
irq_exits1434046   566
irq_injections326730 0
irq_window164827 0
largepages 0 0
mmio_exits 35892 0
mmu_cache_miss 29760 0
mmu_flooded19908 0
mmu_pde_zapped 15557 0
mmu_pte_updated82088 0
mmu_pte_write  97990 0
mmu_recycled   0 0
mmu_shadow_zapped  43276 0
mmu_unsync   891 0
mmu_unsync_global  0 0
nmi_injections 0 0
nmi_window 0 0
pf_fixed 1231164 0
pf_guest  276083 0
remote_tlb_flush  115606 0
request_irq0 0
request_nmi0 0
signal_exits   5 0
tlb_flush 960198 0

This is with the VM displaying the XP It is now safe to turn off your
computer.  CPU remains about 200% from kvm.  Invoked with
sudo vdeq kvm -net nic,vlan=1,macaddr=52:54:a0:12:01:00 \
-net vde,vlan=1,sock=/var/run/vde2/tap0.ctl \
-std-vga -hda XP.raw \
-boot c \
-soundhw es1370 -localtime -no-acpi  -m 1G -smp 2

Next I'll trying fiddling with acpi.

-- 
Ross Boylan  wk:  (415) 514-8146
185 Berry St #5700   r...@biostat.ucsf.edu
Dept of Epidemiology and Biostatistics   fax: (415) 514-8150
University of California, San Francisco
San Francisco, CA 94107-1739 hm:  (415) 550-1062

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RFC: convert KVMTRACE to event traces

2009-05-14 Thread Marcelo Tosatti


Convert custom marker based KVMTRACE to event trace.

Applies on top of
git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-x86.git

See Documentation/trace/events.txt and commit
7ce7e4249921d5073e764f7ff7ad83cfa9894bd7 if you're interested in playing
with event traces.

Index: linux-2.6-x86-2/arch/x86/kvm/vmx.c
===
--- linux-2.6-x86-2.orig/arch/x86/kvm/vmx.c
+++ linux-2.6-x86-2/arch/x86/kvm/vmx.c
@@ -25,6 +25,7 @@
 #include linux/highmem.h
 #include linux/sched.h
 #include linux/moduleparam.h
+#include trace/events/kvm/x86-arch.h
 #include kvm_cache_regs.h
 #include x86.h
 
@@ -2406,7 +2407,7 @@ static void vmx_inject_irq(struct kvm_vc
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-   KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
+   trace_kvm_inj_virq(irq);
 
++vcpu-stat.irq_injections;
if (vcpu-arch.rmode.active) {
@@ -2631,8 +2632,8 @@ static int handle_exception(struct kvm_v
if (vm_need_ept())
BUG();
cr2 = vmcs_readl(EXIT_QUALIFICATION);
-   KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
-   (u32)((u64)cr2  32), handler);
+   trace_kvm_page_fault(cr2, error_code);
+
if (vcpu-arch.interrupt.pending || 
vcpu-arch.exception.pending)
kvm_mmu_unprotect_page_virt(vcpu, cr2);
return kvm_mmu_page_fault(vcpu, cr2, error_code);
@@ -2679,7 +2680,6 @@ static int handle_external_interrupt(str
 struct kvm_run *kvm_run)
 {
++vcpu-stat.irq_exits;
-   KVMTRACE_1D(INTR, vcpu, vmcs_read32(VM_EXIT_INTR_INFO), handler);
return 1;
 }
 
@@ -2727,7 +2727,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcp
 
 static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-   unsigned long exit_qualification;
+   unsigned long exit_qualification, val;
int cr;
int reg;
 
@@ -2736,25 +2736,23 @@ static int handle_cr(struct kvm_vcpu *vc
reg = (exit_qualification  8)  15;
switch ((exit_qualification  4)  3) {
case 0: /* mov to cr */
-   KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr,
-   (u32)kvm_register_read(vcpu, reg),
-   (u32)((u64)kvm_register_read(vcpu, reg)  32),
-   handler);
+   val = kvm_register_read(vcpu, reg);
+   trace_kvm_cr_write(cr, val);
switch (cr) {
case 0:
-   kvm_set_cr0(vcpu, kvm_register_read(vcpu, reg));
+   kvm_set_cr0(vcpu, val);
skip_emulated_instruction(vcpu);
return 1;
case 3:
-   kvm_set_cr3(vcpu, kvm_register_read(vcpu, reg));
+   kvm_set_cr3(vcpu, val);
skip_emulated_instruction(vcpu);
return 1;
case 4:
-   kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg));
+   kvm_set_cr4(vcpu, val);
skip_emulated_instruction(vcpu);
return 1;
case 8:
-   kvm_set_cr8(vcpu, kvm_register_read(vcpu, reg));
+   kvm_set_cr8(vcpu, val);
skip_emulated_instruction(vcpu);
if (irqchip_in_kernel(vcpu-kvm))
return 1;
@@ -2767,23 +2765,19 @@ static int handle_cr(struct kvm_vcpu *vc
vcpu-arch.cr0 = ~X86_CR0_TS;
vmcs_writel(CR0_READ_SHADOW, vcpu-arch.cr0);
vmx_fpu_activate(vcpu);
-   KVMTRACE_0D(CLTS, vcpu, handler);
skip_emulated_instruction(vcpu);
return 1;
case 1: /*mov from cr*/
switch (cr) {
case 3:
kvm_register_write(vcpu, reg, vcpu-arch.cr3);
-   KVMTRACE_3D(CR_READ, vcpu, (u32)cr,
-   (u32)kvm_register_read(vcpu, reg),
-   (u32)((u64)kvm_register_read(vcpu, reg)  
32),
-   handler);
+   trace_kvm_cr_read(cr, vcpu-arch.cr3);
skip_emulated_instruction(vcpu);
return 1;
case 8:
-   kvm_register_write(vcpu, reg, kvm_get_cr8(vcpu));
-   KVMTRACE_2D(CR_READ, vcpu, (u32)cr,
-   (u32)kvm_register_read(vcpu, reg), handler);
+   val = kvm_get_cr8(vcpu);
+   kvm_register_write(vcpu, cr, val);
+   trace_kvm_cr_read(cr, val);
skip_emulated_instruction(vcpu);
return 1;
}
@@

[PATCH] Allow to override sync source

2009-05-14 Thread Jan Kiszka

In order to allow sync'ing the kmod dir against arbitrary kernels trees,
extend the sync script to accept alternative paths and adjust the
Makefile accordingly.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---

 Makefile |3 ++-
 sync |   14 +++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index 1e0420e..dad5f0b 100644
--- a/Makefile
+++ b/Makefile
@@ -17,6 +17,7 @@ ORIGMODDIR = $(patsubst %/build,%/kernel,$(KERNELDIR))
 
 rpmrelease = devel
 
+KVM_VERSION = kvm-devel
 LINUX = ./linux-2.6
 
 ifeq ($(EXT_CONFIG_KVM_TRACE),y)
@@ -38,7 +39,7 @@ include $(MAKEFILE_PRE)
 .PHONY: sync
 
 sync:
-   ./sync $(KVM_VERSION)
+   ./sync -v $(KVM_VERSION) -l $(LINUX)
 
 install:
mkdir -p $(DESTDIR)/$(INSTALLDIR)
diff --git a/sync b/sync
index 4a89296..2e53a31 100755
--- a/sync
+++ b/sync
@@ -1,6 +1,7 @@
 #!/usr/bin/python
 
 import sys, os, glob, os.path, shutil, re
+from optparse import OptionParser
 
 glob = glob.glob
 
@@ -8,12 +9,19 @@ def cmd(c):
 if os.system(c) != 0:
 raise Exception('command execution failed: ' + c)
 
-version = 'kvm-devel'
-if len(sys.argv) = 2:
-version = sys.argv[1]
+parser = OptionParser(usage='usage: %prog [-v version][-l linuxkernel]')
+parser.add_option('-v', action='store', type='string', dest='version')
+parser.add_option('-l', action='store', type='string', dest='linux')
+(options, args) = parser.parse_args()
 
+version = 'kvm-devel'
 linux = 'linux-2.6'
 
+if options.version:
+version = options.version
+if options.linux:
+linux = options.linux
+
 _re_cache = {}
 
 def re_cache(regexp):



signature.asc
Description: OpenPGP digital signature

[PATCH v3] kvm: x86: Allow PIT emulation without speaker port

2009-05-14 Thread Jan Kiszka

The in-kernel speaker emulation is only a dummy and also unneeded from
the performance point of view. Rather, it takes user space support to
generate sound output on the host, e.g. console beeps.

To allow this, introduce KVM_CREATE_PIT2 which controls in-kernel
speaker port emulation via a flag passed along the new IOCTL. It also
leaves room for future extensions of the PIT configuration interface.

Changes in v3:
 - increase padding in kvm_pit_config to 64 bytes (as requested by Avi)

 Changes in v2:
 - Use extensible KVM_CREATE_PIT2

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---

 arch/x86/kvm/i8254.c |   14 --
 arch/x86/kvm/i8254.h |2 +-
 arch/x86/kvm/x86.c   |   12 +++-
 include/linux/kvm.h  |   10 ++
 4 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4d6f0d2..584e3d3 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -560,7 +560,7 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier 
*kimn, bool mask)
}
 }
 
-struct kvm_pit *kvm_create_pit(struct kvm *kvm)
+struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 {
struct kvm_pit *pit;
struct kvm_kpit_state *pit_state;
@@ -586,11 +586,13 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
pit-dev.private = pit;
kvm_io_bus_register_dev(kvm-pio_bus, pit-dev);
 
-   pit-speaker_dev.read = speaker_ioport_read;
-   pit-speaker_dev.write = speaker_ioport_write;
-   pit-speaker_dev.in_range = speaker_in_range;
-   pit-speaker_dev.private = pit;
-   kvm_io_bus_register_dev(kvm-pio_bus, pit-speaker_dev);
+   if (flags  KVM_PIT_SPEAKER_DUMMY) {
+   pit-speaker_dev.read = speaker_ioport_read;
+   pit-speaker_dev.write = speaker_ioport_write;
+   pit-speaker_dev.in_range = speaker_in_range;
+   pit-speaker_dev.private = pit;
+   kvm_io_bus_register_dev(kvm-pio_bus, pit-speaker_dev);
+   }
 
kvm-arch.vpit = pit;
pit-kvm = kvm;
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index bbd863f..b267018 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -50,7 +50,7 @@ struct kvm_pit {
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
-struct kvm_pit *kvm_create_pit(struct kvm *kvm);
+struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
 void kvm_free_pit(struct kvm *kvm);
 void kvm_pit_reset(struct kvm_pit *pit);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 44e87a5..c6e7896 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1026,6 +1026,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_REINJECT_CONTROL:
case KVM_CAP_IRQ_INJECT_STATUS:
case KVM_CAP_ASSIGN_DEV_IRQ:
+   case KVM_CAP_PIT2:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -1829,6 +1830,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
union {
struct kvm_pit_state ps;
struct kvm_memory_alias alias;
+   struct kvm_pit_config pit_config;
} u;
 
switch (ioctl) {
@@ -1889,12 +1891,20 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
break;
case KVM_CREATE_PIT:
+   u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
+   goto create_pit;
+   case KVM_CREATE_PIT2:
+   r = -EFAULT;
+   if (copy_from_user(u.pit_config, argp,
+  sizeof(struct kvm_pit_config)))
+   goto out;
+   create_pit:
mutex_lock(kvm-lock);
r = -EEXIST;
if (kvm-arch.vpit)
goto create_pit_unlock;
r = -ENOMEM;
-   kvm-arch.vpit = kvm_create_pit(kvm);
+   kvm-arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
if (kvm-arch.vpit)
r = 0;
create_pit_unlock:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 3db5d8d..5575409 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -70,6 +70,14 @@ struct kvm_irqchip {
} chip;
 };
 
+/* for KVM_CREATE_PIT2 */
+struct kvm_pit_config {
+   __u32 flags;
+   __u32 pad[15];
+};
+
+#define KVM_PIT_SPEAKER_DUMMY 1
+
 #define KVM_EXIT_UNKNOWN  0
 #define KVM_EXIT_EXCEPTION1
 #define KVM_EXIT_IO   2
@@ -415,6 +423,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
+#define KVM_CAP_PIT2 31
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -498,6 +507,7 @@ struct kvm_irq_routing {
 #define KVM_ASSIGN_SET_MSIX_ENTRY \
_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
 #define KVM_DEASSIGN_DEV_IRQ   _IOW(KVMIO,

[PATCH v4] qemu-kvm: Make PC speaker emulation aware of in-kernel PIT

2009-05-14 Thread Jan Kiszka

When using the in-kernel PIT the speaker emulation has to synchronize
the PIT state with KVM. Enhance the existing speaker sound device and
allow it to take over port 0x61 by using KVM_CREATE_PIT2 where
available. This unbreaks -soundhw pcspk in KVM mode.

Changes in v4:
 - preserve full PIT state across read-modify-write
 - update kvm.h

Changes in v3:
 - re-added incorrectly dropped kvm_enabled checks

Changes in v2:
 - rebased over qemu-kvm and KVM_CREATE_PIT2
 - refactored hooks in pcspk

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---

 hw/pcspk.c |   48 
 kvm/kernel/include/linux/kvm.h |   10 
 kvm/libkvm/libkvm-x86.c|   26 +++---
 3 files changed, 76 insertions(+), 8 deletions(-)

diff --git a/hw/pcspk.c b/hw/pcspk.c
index ec1d0c6..c0b8347 100644
--- a/hw/pcspk.c
+++ b/hw/pcspk.c
@@ -27,6 +27,8 @@
 #include isa.h
 #include audio/audio.h
 #include qemu-timer.h
+#include i8254.h
+#include qemu-kvm.h
 
 #define PCSPK_BUF_LEN 1792
 #define PCSPK_SAMPLE_RATE 32000
@@ -48,6 +50,43 @@ typedef struct {
 static const char *s_spk = pcspk;
 static PCSpkState pcspk_state;
 
+#ifdef USE_KVM_PIT
+static void kvm_get_pit_ch2(PITState *pit,
+struct kvm_pit_state *inkernel_state)
+{
+struct kvm_pit_state pit_state;
+
+if (kvm_enabled()  qemu_kvm_pit_in_kernel()) {
+kvm_get_pit(kvm_context, pit_state);
+pit-channels[2].mode = pit_state.channels[2].mode;
+pit-channels[2].count = pit_state.channels[2].count;
+pit-channels[2].count_load_time = 
pit_state.channels[2].count_load_time;
+pit-channels[2].gate = pit_state.channels[2].gate;
+if (inkernel_state) {
+memcpy(inkernel_state, pit_state, sizeof(*inkernel_state));
+}
+}
+}
+
+static void kvm_set_pit_ch2(PITState *pit,
+struct kvm_pit_state *inkernel_state)
+{
+if (kvm_enabled()  qemu_kvm_pit_in_kernel()) {
+inkernel_state-channels[2].mode = pit-channels[2].mode;
+inkernel_state-channels[2].count = pit-channels[2].count;
+inkernel_state-channels[2].count_load_time =
+pit-channels[2].count_load_time;
+inkernel_state-channels[2].gate = pit-channels[2].gate;
+kvm_set_pit(kvm_context, inkernel_state);
+}
+}
+#else
+static inline void kvm_get_pit_ch2(PITState *pit,
+   kvm_pit_state *inkernel_state) { }
+static inline void kvm_set_pit_ch2(PITState *pit,
+   kvm_pit_state *inkernel_state) { }
+#endif
+
 static inline void generate_samples(PCSpkState *s)
 {
 unsigned int i;
@@ -72,6 +111,8 @@ static void pcspk_callback(void *opaque, int free)
 PCSpkState *s = opaque;
 unsigned int n;
 
+kvm_get_pit_ch2(s-pit, NULL);
+
 if (pit_get_mode(s-pit, 2) != 3)
 return;
 
@@ -121,6 +162,8 @@ static uint32_t pcspk_ioport_read(void *opaque, uint32_t 
addr)
 PCSpkState *s = opaque;
 int out;
 
+kvm_get_pit_ch2(s-pit, NULL);
+
 s-dummy_refresh_clock ^= (1  4);
 out = pit_get_out(s-pit, 2, qemu_get_clock(vm_clock))  5;
 
@@ -129,9 +172,12 @@ static uint32_t pcspk_ioport_read(void *opaque, uint32_t 
addr)
 
 static void pcspk_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
+struct kvm_pit_state inkernel_state;
 PCSpkState *s = opaque;
 const int gate = val  1;
 
+kvm_get_pit_ch2(s-pit, inkernel_state);
+
 s-data_on = (val  1)  1;
 pit_set_gate(s-pit, 2, gate);
 if (s-voice) {
@@ -139,6 +185,8 @@ static void pcspk_ioport_write(void *opaque, uint32_t addr, 
uint32_t val)
 s-play_pos = 0;
 AUD_set_active_out(s-voice, gate  s-data_on);
 }
+
+kvm_set_pit_ch2(s-pit, inkernel_state);
 }
 
 void pcspk_init(PITState *pit)
diff --git a/kvm/kernel/include/linux/kvm.h b/kvm/kernel/include/linux/kvm.h
index f5e9d66..5b4b90c 100644
--- a/kvm/kernel/include/linux/kvm.h
+++ b/kvm/kernel/include/linux/kvm.h
@@ -110,6 +110,14 @@ struct kvm_irqchip {
} chip;
 };
 
+/* for KVM_CREATE_PIT2 */
+struct kvm_pit_config {
+   __u32 flags;
+   __u32 pad[15];
+};
+
+#define KVM_PIT_SPEAKER_DUMMY 1
+
 #define KVM_EXIT_UNKNOWN  0
 #define KVM_EXIT_EXCEPTION1
 #define KVM_EXIT_IO   2
@@ -455,6 +463,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
+#define KVM_CAP_PIT2 31
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -538,6 +547,7 @@ struct kvm_irq_routing {
 #define KVM_ASSIGN_SET_MSIX_ENTRY \
_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
 #define KVM_DEASSIGN_DEV_IRQ   _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
+#define KVM_CREATE_PIT2   _IOW(KVMIO, 0x76, struct 
kvm_pit_config)
 
 /*
  * ioctls for vcpu fds
diff --git a/kvm/libkvm/libkvm-x86.c

KVM VT-d2?

2009-05-14 Thread Fischer, Anna

Does KVM already take advantage of Intel VT-d2 features, e.g. interrupt 
remapping support? Has anyone verified how it improves interrupt delivery for 
PCI pass-through devices?

Thanks,
Anna


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[KVM-AUTOTEST PATCH] Instead of trying to remove kvm modules with rmmod, use modprobe -r, as it handles module dependencies better

2009-05-14 Thread Lucas Meneghel Rodrigues

In certain situations, trying to do a rmmod on the kvm modules might
generate errors, as there is a chain of dependencies involved. Instead,
let's use modprobe -r, as it handles dependencies.

Signed-off-by: Lucas Meneghel Rodrigues mrodr...@redhat.com

diff --git a/client/tests/kvm_runtest_2/kvm_install.py b/client/tests/kvm_runtes
index dbf8401..c27c6c7 100755
--- a/client/tests/kvm_runtest_2/kvm_install.py
+++ b/client/tests/kvm_runtest_2/kvm_install.py
@@ -192,9 +192,9 @@ def __load_kvm_modules():
 #utils.system(pkill qemu 1/dev/null 21, ignore_status=True)
 utils.system(pkill qemu, ignore_status=True)
 #if utils.system(grep kvm_%s /proc/modules 1/dev/null % vendor, ignore_s
-utils.system(/sbin/rmmod kvm_%s % vendor, ignore_status=True)
+utils.system(/sbin/modprobe -r kvm_%s % vendor, ignore_status=True)
 #if utils.system(grep kvm /proc/modules 1/dev/null, ignore_status=True) 
-utils.system(/sbin/rmmod kvm, ignore_status=True)
+utils.system(/sbin/modprobe -r kvm, ignore_status=True)
 
 if utils.system(grep kvm /proc/modules 1/dev/null, ignore_status=True) =
 message = Failed to remove old KVM modules

-- 
Lucas Meneghel Rodrigues
Software Engineer (QE)
Red Hat - Emerging Technologies

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: KVM VT-d2?

2009-05-14 Thread Kay, Allen M

We have verified VT-d2 features works with PCI passthrough on KVM.  To enable 
it, you need to turn on interrupt remapping in kernel config.

Interrupt remapping is a security/isolation feature where interrupt delivery is 
qualified with device's bus/device/function in interrupt remapping table entry 
when source ID checking is turn on.  It does not directly inject interrupt to 
the guest OS.

-Original Message-
From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf Of 
Fischer, Anna
Sent: Thursday, May 14, 2009 2:53 PM
To: kvm@vger.kernel.org
Subject: KVM  VT-d2?

Does KVM already take advantage of Intel VT-d2 features, e.g. interrupt 
remapping support? Has anyone verified how it improves interrupt delivery for 
PCI pass-through devices?

Thanks,
Anna


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] Re: Question about KVM and PC speaker

2009-05-14 Thread malc

On Thu, 14 May 2009, Sebastian Herbszt wrote:

 malc wrote:
  On Wed, 13 May 2009, Sebastian Herbszt wrote:
  
   Jan Kiszka wrote:
Moreover, does sound work at all with your qemu?
The image I tried [1] issues two beeps after loading (obviously via
direct hw access) - a good way to check general support. Note that one
reason for broken host sound with qemu can be OSS. For that reason I
always configure my qemu with --audio-drv-list=alsa.
   
   Thats a good hint :)
   Seems i used to compile qemu without --audio-drv-list. Since dsound
   and
   fmod drivers don't compile here (i likely miss some libs in my mingw), i
   used sdl.
  
  Don't do that. Here's a nice tutorial Kazu made that will probably help you:
  http://www.h7.dion.ne.jp/~qemu-win/Audio-en.html
 
 So you're saying the use of sdl for audio is not recommended?

Yes.

 
   Now i can hear those two beeps with the image you suggested. Tho those are
   coming
   thru my sound card and not the hosts pc speaker (even with -soundhw
   pcspk,
   but maybe
   that option means something different).
  
  And it will always come through your soundcard. pcspk is not a passthrough
  thing.
 
 Thanks for the clarification.
 
  With INT 10h AH=0Eh i now can hear a beep too, but it doesn't stop and qemu
   somewhat freezes.
  
  Huh?
 
 With this INT 10h function qemu should beep once, but it does loop the beep
 infinitely.
 Normally i can exit qemu by clicking on the [x] window close icon, but while
 it does
 endlessly beep that doesn't work (vista says process doesn't respond). Using
 quit in the
 monitor window doesn't work either.
 

Can you post some .com file sparing me from writing the code for one
myself?

-- 
mailto:av1...@comtv.ru
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: KVM VT-d2?

2009-05-14 Thread Fischer, Anna

I thought that one use case of VT-d2 interrupt remapping was to be able to 
safely and more efficiently deliver interrupts to the CPU that runs the 
particular VCPU of the guest that owns the I/O device that issues the 
interrupt. Shouldn't there at least be some performance (e.g. latency) 
improvement doing the remapping and checking in HW with a predefined table 
rather than multiplexing this in software in the hypervisor layer?

 -Original Message-
 From: Kay, Allen M [mailto:allen.m@intel.com]
 Sent: 14 May 2009 15:02
 To: Fischer, Anna; kvm@vger.kernel.org
 Subject: RE: KVM  VT-d2?
 
 We have verified VT-d2 features works with PCI passthrough on KVM.  To
 enable it, you need to turn on interrupt remapping in kernel config.
 
 Interrupt remapping is a security/isolation feature where interrupt
 delivery is qualified with device's bus/device/function in interrupt
 remapping table entry when source ID checking is turn on.  It does not
 directly inject interrupt to the guest OS.
 
 -Original Message-
 From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On
 Behalf Of Fischer, Anna
 Sent: Thursday, May 14, 2009 2:53 PM
 To: kvm@vger.kernel.org
 Subject: KVM  VT-d2?
 
 Does KVM already take advantage of Intel VT-d2 features, e.g. interrupt
 remapping support? Has anyone verified how it improves interrupt
 delivery for PCI pass-through devices?
 
 Thanks,
 Anna
 
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

virtio_net with RSS?

2009-05-14 Thread Fischer, Anna

Are there any plans to enhance virtio_net with receive-side scaling 
capabilities, so that an SMP guest OS can balance its network processing load 
more equally across multiple CPUs?

Thanks,
Anna
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: KVM VT-d2?

2009-05-14 Thread Kay, Allen M

In both interrupt remapping and no interrupt remapping cases, interrupts from 
the passthrough device are still delivered to the host kernel.  KVM then 
injects the interrupt to guest via vlapic-vioapic-vmcs path.

The value add from interrupt remapping is that a new source ID field in 
interrupt remapping table entry is checked before the interrupt is delivered to 
the host kernel.  This prevents malicious guests with PCI passthrough devices 
to generate DOS attacks via DMA writes to the apic area.

-Original Message-
From: Fischer, Anna [mailto:anna.fisc...@hp.com] 
Sent: Thursday, May 14, 2009 4:12 PM
To: Kay, Allen M
Cc: kvm@vger.kernel.org
Subject: RE: KVM  VT-d2?

I thought that one use case of VT-d2 interrupt remapping was to be able to 
safely and more efficiently deliver interrupts to the CPU that runs the 
particular VCPU of the guest that owns the I/O device that issues the 
interrupt. Shouldn't there at least be some performance (e.g. latency) 
improvement doing the remapping and checking in HW with a predefined table 
rather than multiplexing this in software in the hypervisor layer?

 -Original Message-
 From: Kay, Allen M [mailto:allen.m@intel.com]
 Sent: 14 May 2009 15:02
 To: Fischer, Anna; kvm@vger.kernel.org
 Subject: RE: KVM  VT-d2?
 
 We have verified VT-d2 features works with PCI passthrough on KVM.  To
 enable it, you need to turn on interrupt remapping in kernel config.
 
 Interrupt remapping is a security/isolation feature where interrupt
 delivery is qualified with device's bus/device/function in interrupt
 remapping table entry when source ID checking is turn on.  It does not
 directly inject interrupt to the guest OS.
 
 -Original Message-
 From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On
 Behalf Of Fischer, Anna
 Sent: Thursday, May 14, 2009 2:53 PM
 To: kvm@vger.kernel.org
 Subject: KVM  VT-d2?
 
 Does KVM already take advantage of Intel VT-d2 features, e.g. interrupt
 remapping support? Has anyone verified how it improves interrupt
 delivery for PCI pass-through devices?
 
 Thanks,
 Anna
 
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ kvm-Bugs-2638990 ] Segfault 284

2009-05-14 Thread SourceForge.net

Bugs item #2638990, was opened at 2009-02-25 23:35
Message generated for change (Settings changed) made by sf-robot
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2638990group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Closed
Resolution: None
Priority: 6
Private: No
Submitted By: David Rasche (drasche2)
Assigned to: Nobody/Anonymous (nobody)
Summary: Segfault 284

Initial Comment:
Host
(2) Intel Xeon (E5430) Quad Core Processors (2.66GHz)
16G mem
Host OS: Ubuntu 8.10 (64bit)
kvm-72
libvirt 0.4.4

Guest OS Win2k3 Server (32 bit)

After running for 8 to 48 hours, Win2k3 guest system crashes with no warning. 
Syslog shows the following segmentation fault:

Feb 25 16:12:02 host-b kernel: [448190.415857] kvm[25511]: segfault at 284 ip 
0043
386f sp 7fff97fa3a70 error 4 in kvm[40+19e000]

this error has been confirmed on 2 different machines with exactly the same 
setup.

We are running KVM through libvirt with the following xml setup.

domain type='kvm'
  nameexchange/name
  uuide8d93082-c1db-426c-9ad3-ae651095ceb5/uuid
  memory4096000/memory
  currentMemory4096000/currentMemory
  vcpu3/vcpu
  os
typehvm/type
boot dev='hd'/
  /os
  features
acpi/
  /features
  clock offset='localtime'/
  on_poweroffdestroy/on_poweroff
  on_rebootrestart/on_reboot
  on_crashdestroy/on_crash
  devices
emulator/usr/bin/kvm/emulator
disk type='file' device='disk'
  source file='/mnt/vg0/lvol3/exchange.qcow2'/
  target dev='hda' bus='ide'/
/disk
disk type='block' device='disk'
  source dev='/dev/vg1/lv_exchdb'/
  target dev='hdb' bus='ide'/
/disk
disk type='file' device='cdrom'
  target dev='hdc' bus='ide'/
  readonly/
/disk
disk type='block' device='disk'
  source dev='/dev/vg2/lv_exchlog'/
  target dev='hdd' bus='ide'/
/disk
interface type='bridge'
  mac address='00:0c:29:cf:71:e4'/
  source bridge='br0'/
/interface
input type='tablet' bus='usb'/
input type='mouse' bus='ps2'/
graphics type='vnc' port='5900' listen='127.0.0.1'/
  /devices
/domain



--

Comment By: SourceForge Robot (sf-robot)
Date: 2009-05-15 02:20

Message:
This Tracker item was closed automatically by the system. It was
previously set to a Pending status, and the original submitter
did not respond within 14 days (the time period specified by
the administrator of this Tracker).

--

Comment By: Simon Jagoe (ivanvimes)
Date: 2009-04-30 18:47

Message:
I'll start it up in gdb and watch it for the error. I'll post it when (if)
it happens again. My server was up for at least a week before this
occurred, so I may not be able to get it immediately.

--

Comment By: Avi Kivity (avik)
Date: 2009-04-30 18:36

Message:
Please generate a core dump and post a stack trace:

  $ gdb /path/to/qemu core
  (gdb) backtrace

--

Comment By: Simon Jagoe (ivanvimes)
Date: 2009-04-30 18:26

Message:
Sorry about the formatting of the kvm commandline I posted, I have
re-formatted it so that sourceforge does not automagically wrap it:

/usr/bin/kvm -S -M pc -m 1024 -smp 1 -name partridge \
-monitor pty -boot c \
-drive file=/dev/hare/partridge_root,if=ide,index=0,boot=on \
-drive file=/dev/hare/partridge_var,if=ide,index=1 \
-drive file=/dev/hare/partridge_opt,if=ide,index=2 \
-drive file=/dev/hare/partridge_home,if=ide,index=3 \
-net nic,macaddr=00:16:3e:30:99:7c,vlan=0 \
-net tap,fd=17,script=,vlan=0,ifname=vnet3 \
-serial none -parallel none -usb -vnc 127.0.0.1:0

--

Comment By: Simon Jagoe (ivanvimes)
Date: 2009-04-30 18:24

Message:
Thanks for the reply.

The libvirt XML I posted calls kvm as follows:

/usr/bin/kvm -S -M pc -m 1024 -smp 1 -name partridge -monitor pty -boot c
\
  -drive file=/dev/hare/partridge_root,if=ide,index=0,boot=on \
  -drive file=/dev/hare/partridge_var,if=ide,index=1 \
  -drive file=/dev/hare/partridge_opt,if=ide,index=2 \
  -drive file=/dev/hare/partridge_home,if=ide,index=3 \
  -net nic,macaddr=00:16:3e:30:99:7c,vlan=0 -net
tap,fd=17,script=,vlan=0,ifname=vnet3 \
  -serial none -parallel none -usb -vnc 127.0.0.1:0

--

Comment By: Brian Jackson (iggy_cav)
Date: 2009-04-30 15:47

Message:
For those of us that don't use/speak libvirt, what does command line is it
using?

--

Comment By: Simon Jagoe (ivanvimes)
Date: 2009-04-30 08:09

Message:
I am running an

Re: [KVM PATCH v7 2/3] kvm: add support for irqfd via eventfd-notification interface

2009-05-14 Thread Davide Libenzi

On Thu, 14 May 2009, Gregory Haskins wrote:

 Avi Kivity wrote:
  Gregory Haskins wrote:
  KVM provides a complete virtual system environment for guests, including
  support for injecting interrupts modeled after the real
  exception/interrupt
  facilities present on the native platform (such as the IDT on x86).
  Virtual interrupts can come from a variety of sources (emulated devices,
  pass-through devices, etc) but all must be injected to the guest via
  the KVM infrastructure.  This patch adds a new mechanism to inject a
  specific
  interrupt to a guest using a decoupled eventfd mechnanism:  Any legal
  signal
  on the irqfd (using eventfd semantics from either userspace or
  kernel) will
  translate into an injected interrupt in the guest at the next available
  interrupt window.
 
  +
  +static void
  +irqfd_inject(struct work_struct *work)
  +{
  +struct _irqfd *irqfd = container_of(work, struct _irqfd, work);
  +struct kvm *kvm = irqfd-kvm;
  +

 
 
  I think you need to -read() from the irqfd, otherwise the count will
  never clear.
 
 Yeah, and this is a disavantage to using eventfd vs a custom anon-fd
 implementation.
 
 However, the count is really only there for deciding whether to sleep a
 traditional eventfd recipient which doesn't really apply in this
 application.  I suppose we could try to invoke the read method (or add a
 new method to eventfd to allow it to be cleared independent of the
 f_ops-read() (ala eventfd_signal() vs f_ops-write()).  I'm not
 convinced we really need to worry about it, though.  IMO we can just let
 the count accumulate.
 
 But if you insist this loose end should be addressed, perhaps Davide has
 some thoughts on how to best do this?

The counter is 64bit, so at 1M IRQ/s will take about 585K years to 
saturate. But from a symmetry POV, it may be better to clear it. Maybe 
with a kernel-side eventfd_read()?


- Davide


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: kvm-autotest: The automation plans?

2009-05-14 Thread jason wang


Michael Goldish 写道:

- jason wang jasow...@redhat.com wrote:

  

sudhir kumar 写道:


Hi Uri/Lucas,

Do you have any plans for enhancing kvm-autotest?
I was looking mainly on the following 2 aspects:

(1).
we have standalone migration only. Is there any plans of enhancing
kvm-autotest so that we can trigger migration while a workload is
running?
Something like this:
Start a workload(may be n instances of it).
let the test execute for some time.
Trigger migration.
Log into the target.
Check if the migration is succesful
Check if the test results are consistent.
  
  
We have some patches of ping pong migration and workload adding. The 
migration is based on public bridge and workload adding is based on 
running benchmark in the background of guest.


(2).
How can we run N parallel instances of a test? Will the current
configuration  be easily able to support it?

Please provide your thoughts on the above features.

  
  
The parallelized instances could be easily achieved through 
job.parallel() of autotest framework, and that is what we have used in

our tests. We have make some helper routines such as get_free_port to
be reentrant through file lock.



We'll probably have to use file locks anyway when we work with TAP, but in
VM.create(), not in get_free_port(), because we also want to prevent parallel
qemu instances from choosing the same TAP device. I'm not sure how qemu
handles this internally, and I'd rather be on the safe side.

Do you release the file lock inside get_free_port or only after running qemu?
  
We record the port usage and release the file lock inside 
get_free_port(). I agree with you that it's better to get/release the 
file lock in VM.create() because it is easier and it also eliminates the 
effort of doing lock in every helper function.
For the TAP device, maybe we could give each TAP device used by qemu-kvm 
an random generated ifname to prevent qemu-kvm from choosing the same 
TAP devices. This method works well in our test.

We've implemented following test cases: timedrift(already sent here),
savevm/loadvm, suspend/resume, jumboframe, migration between two 
machines and others. We will sent it here for review in the following

weeks.
There are some other things could be improved:
1) Current kvm_test.cfg.sample/kvm_test.cfg is transparent to autotest
server UI. This would make it hard to configure the tests in the
server 
side. During our test, we have merged it into control and make it
could 
be configured by editing control file function of autotest server

side web UI.



Would it not suffice to just modify the configuration, instead of completely
define it, inside the control file? This is possible using parse_string().
For example:

cfg = kvm_config.config(kvm_tests.cfg)
cfg.parse_string(only weekly)
cfg.parse_string(only Fedora RHEL Windows)
cfg.parse_string(
variants:
- 1:
only ide
- 2:
Fedora:
no rtl8139
)
list = cfg.get_list()

(get_list() returns the test dictionaries.)

The advantage here is that we can have a standard kvm_tests.cfg that we all
agree on and only rather small environment-specific modifications are made
in the control file.
  

Thanks, this way makes the things easier.



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

PCI pass-through of multi-function device

2009-05-14 Thread Fischer, Anna

Does KVM allow passing through a full multi-function PCI device to a guest, and 
make that device appear as a whole multi-function device rather than as 
multiple PCI single-function devices (e.g. Xen only does the latter where all 
PCI devices appear with function ID being 0 in the guest)?

Thanks,
Anna
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

KVM 10/Gb Ethernet PCIe passthrough with Linux/iSCSI and large block sizes

2009-05-14 Thread Nicholas A. Bellinger

Greetings all,

The first test results for Linux/iSCSI Initiators and targets for large
block sizes using 10 Gb/sec Ethernet + PCIe device-passthrough into
Linux/KVM guests have been posted at:

http://linux-iscsi.org/index.php/KVM-LIO-Target

So far, the results have been quite impressive using the Neterion X3100
series hardware with recent KVM-85 stable code (with Marcelo's patches,
see the above link) on v2.6.29.2 KVM guests, and using v2.6.30-rc3 KVM
Hosts.

Using iSCSI RFC defined MC/S to scale a *single* KVM accessable
Linux/iSCSI Logical Unit to 10 Gb/sec line-rate speeds has been
successful using Core-iSCSI WRITE/READ (bi-directional) traffic using
Linux-Test-Project disktest pthreaded benchmark with O_DIRECT enabled.
Using Core-iSCSI MC/S w/ iSCSI READ (uni-directional) the average is
about 6-7 Gb/sec, and with MC/S iSCSI WRITE (uni-directional) the
average is about 5 Gb/sec to the RAMDISK_DR and FILEIO storage objects
for these same streaming tests.  Please see the link for more
information on the tests and hardware/software setup.

The tests have been run with both upstream Open-iSCSI and Core-iSCSI
Initiators against Target_Core_Mod/LIO-Target v3.0 in KVM guests.  It is
important to note that these tests have been run with tcp_sendpage()
disabled (tcp_sendpage() is enabled by default in LIO-Target and
Open-iSCSI) in 10 Gb/sec KVM guests, which have been disable into order
to get up running with the 10 Gb/sec hardware.  1 Gb/sec e1000e ports
are stable with sendpage() in LIO-Target KVM guests, and these will be
enabled in 10 Gb/sec hardware in subsequent tests.  Also note that
Open-iSCSI WRITEs using tcp_sendpage() have been ommited for this first
run of tests.

It is also important to note that both iSCSI MC/S and dm-multipath are
methods to allow a single Linux/SCSI Logical Unit to scale across
multiple TCP connections using the iSCSI Protocol.  Both of these
methods (iSCSI RFC fabric level multiplexing and OS-level SCSI
Multipath) allow for means for scaling across multiple X3110 Vpaths
(MSI-X TX/RX pairs), and MC/S is a method that has a low amount of
overhead.

Some of the future setups for KVM + 10 Gb/sec will be using dm-multipath
block devices, 10 Gb/sec Ethernet PCIe multi-function mode into KVM
guest, as well as PCIe SR-IOV on recent IOMMU capable hardware
platforms.

Many thanks to the Neterion folks and Sheng Yang for answering my
questions!

--nab


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCHv6 2/4] virtio: find_vqs/del_vqs virtio operations

2009-05-14 Thread Rusty Russell

And here's the fixup patch I applied:

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -318,11 +318,11 @@ static void lg_del_vqs(struct virtio_dev
struct virtqueue *vq, *n;
 
list_for_each_entry_safe(vq, n, vdev-vqs, list)
-   kvm_del_vq(vq);
+   lg_del_vq(vq);
 }
 
 static int lg_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-  struct virtqueue *vqs[]
+  struct virtqueue *vqs[],
   vq_callback_t *callbacks[],
   const char *names[])
 {
@@ -331,7 +331,7 @@ static int lg_find_vqs(struct virtio_dev
 
/* We must have this many virtqueues. */
if (nvqs  ldev-desc-num_vq)
-   return ERR_PTR(-ENOENT);
+   return -ENOENT;
 
for (i = 0; i  nvqs; ++i) {
vqs[i] = lg_find_vq(vdev, i, callbacks[i], names[i]);
@@ -341,7 +341,7 @@ static int lg_find_vqs(struct virtio_dev
return 0;
 
 error:
-   vp_del_vqs(vdev);
+   lg_del_vqs(vdev);
return PTR_ERR(vqs[i]);
 }
 
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -260,7 +260,7 @@ static int p9_virtio_probe(struct virtio
return 0;
 
 out_free_vq:
-   vdev-config-del_vq(chan-vq);
+   vdev-config-del_vqs(vdev);
 fail:
mutex_lock(virtio_9p_lock);
chan_index--;
@@ -331,7 +331,7 @@ static void p9_virtio_remove(struct virt
BUG_ON(chan-inuse);
 
if (chan-initialized) {
-   vdev-config-del_vq(chan-vq);
+   vdev-config-del_vqs(vdev);
chan-initialized = false;
}
 }

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

78 matches

Mail list logo