[PATCH] kvm: qemu: allow kvm.h to include config.h

2008-12-25 Thread Avi Kivity
From: Avi Kivity a...@redhat.com

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/qemu/kvm.h b/qemu/kvm.h
index eeed3dc..05a13ee 100644
--- a/qemu/kvm.h
+++ b/qemu/kvm.h
@@ -14,10 +14,10 @@
 #ifndef QEMU_KVM_H
 #define QEMU_KVM_H
 
-#ifdef KVM_UPSTREAM
-
 #include config.h
 
+#ifdef KVM_UPSTREAM
+
 #ifdef CONFIG_KVM
 extern int kvm_allowed;
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: qemu: Fix KVM includes in helper.c

2008-12-25 Thread Avi Kivity
From: Alexander Graf ag...@suse.de

By default target-i386/helper.c does not include config.h, so
no code in there knows if we're enabling KVM or not.
This breaks the nested activation, as that's depending on the
config options.

This patch fixes compilation broken thanks to my nested SVM
patches.

Signed-off-by: Alexander Graf ag...@suse.de
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h
index bd8a9e8..aec5286 100644
--- a/qemu/qemu-kvm.h
+++ b/qemu/qemu-kvm.h
@@ -161,6 +161,7 @@ int qemu_kvm_has_sync_mmu(void);
 void kvm_init_vcpu(CPUState *env);
 #else
 #define kvm_enabled() (0)
+#define kvm_nested 0
 #define qemu_kvm_irqchip_in_kernel() (0)
 #define qemu_kvm_pit_in_kernel() (0)
 #define kvm_has_sync_mmu() (0)
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: qemu: regenerate bios for smp boot hang fix

2008-12-25 Thread Avi Kivity
From: Avi Kivity a...@redhat.com

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/qemu/pc-bios/bios.bin b/qemu/pc-bios/bios.bin
index 35fffec..768d8f0 100644
Binary files a/qemu/pc-bios/bios.bin and b/qemu/pc-bios/bios.bin differ
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: bios: prevent compiler from caching cpu count while starting up smp

2008-12-25 Thread Avi Kivity
From: Avi Kivity a...@redhat.com

Other cpus are updating the count in parallel; if we get bad timing we might
not notice them starting up and hang.

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/bios/rombios32.c b/bios/rombios32.c
index cceaede..321563d 100755
--- a/bios/rombios32.c
+++ b/bios/rombios32.c
@@ -633,7 +633,7 @@ void smp_probe(void)
 #ifndef BX_QEMU
 delay_ms(10);
 #else
-   while (cmos_readb(0x5f) + 1 != smp_cpus)
+   while (cmos_readb(0x5f) + 1 != readw(smp_cpus))
;
 #endif
 }
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Revert KVM: SVM: Accelerate nested SVM by emulating parts of GIF=0

2008-12-25 Thread Avi Kivity
From: Avi Kivity a...@redhat.com

This reverts commit 037b6e2531d844994cf79bd4190853427c6af2ac (and
7b8052aecd9c533661493d1140cbec0e1ab311d3 as well).  It causes hangs.

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6577934..f53be7e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -78,11 +78,6 @@ static int nested_svm_vmsave(struct vcpu_svm *svm, void 
*nested_vmcb,
 void *arg2, void *opaque);
 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
  bool has_error_code, u32 error_code);
-static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run);
-static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run);
-static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run);
-static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run);
-
 
 static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
 {
@@ -1496,50 +1491,6 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm, 
bool kvm_override)
 nested_svm_exit_handled_real);
 }
 
-static int nested_svm_emulate(struct vcpu_svm *svm, struct kvm_run *kvm_run)
-{
-   int er;
-   u32 opcode = 0;
-   unsigned long rip;
-   unsigned long rip_linear;
-
-   svm-vmcb-save.rax = svm-vcpu.arch.regs[VCPU_REGS_RAX];
-   svm-vmcb-save.rsp = svm-vcpu.arch.regs[VCPU_REGS_RSP];
-   svm-vmcb-save.rip = svm-vcpu.arch.regs[VCPU_REGS_RIP];
-   rip = svm-vcpu.arch.regs[VCPU_REGS_RIP];
-   rip_linear = rip + svm_seg(svm-vcpu, VCPU_SREG_CS)-base;
-
-   er = emulator_read_std(rip_linear, (void *)opcode, 3, svm-vcpu);
-   if (er != X86EMUL_CONTINUE)
-   return er;
-   er = EMULATE_FAIL;
-
-   switch (opcode) {
-   case 0xda010f:
-   vmload_interception(svm, kvm_run);
-   er = EMULATE_DONE;
-   break;
-   case 0xd8010f:
-   vmrun_interception(svm, kvm_run);
-   er = EMULATE_DONE;
-   break;
-   case 0xdb010f:
-   vmsave_interception(svm, kvm_run);
-   er = EMULATE_DONE;
-   break;
-   case 0xdc010f:
-   stgi_interception(svm, kvm_run);
-   er = EMULATE_DONE;
-   break;
-   default:
-   nsvm_printk(NSVM: Opcode %x unknown\n, opcode);
-   }
-
-   nsvm_printk(NSVM: svm emul at 0x%lx - %d\n, rip, er);
-
-   return er;
-}
-
 static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
  void *arg2, void *opaque)
 {
@@ -1635,9 +1586,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
kvm_mmu_reset_context(svm-vcpu);
kvm_mmu_load(svm-vcpu);
 
-   /* KVM calls vmsave after vmrun, so let's run it now if we can */
-   nested_svm_emulate(svm, NULL);
-
return 0;
 }
 
@@ -1848,8 +1796,6 @@ static int stgi_interception(struct vcpu_svm *svm, struct 
kvm_run *kvm_run)
 
 static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
-   int loopcount = 0;
-
if (nested_svm_check_permissions(svm))
return 1;
 
@@ -1862,23 +1808,6 @@ static int clgi_interception(struct vcpu_svm *svm, 
struct kvm_run *kvm_run)
svm_clear_vintr(svm);
svm-vmcb-control.int_ctl = ~V_IRQ_MASK;
 
-   /* Let's try to emulate as many instructions as possible in GIF=0 */
-
-   while (++loopcount  100) {
-   int er;
-
-   er = emulate_instruction(svm-vcpu, kvm_run, 0, 0, 0);
-   nsvm_printk(NSVM: emulating at 0x%lx - %d\n, 
svm-vcpu.arch.regs[VCPU_REGS_RIP], er);
-
-   /* So we can now emulate the SVM instructions that most probably
-  occur at the end of the codepath */
-   if (er != EMULATE_DONE) {
-   while (true)
-   if (nested_svm_emulate(svm, kvm_run) == 
EMULATE_FAIL)
-   break;
-   break;
-   }
-   }
return 1;
 }
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: external module: fix build w/ --with-patched-kernel on Ubuntu 8.10

2008-12-25 Thread Avi Kivity
From: Nolan no...@sigbus.net

And presumably any other distribution that puts only symlinks
in /lib/modules/kernel/build/...

Signed-off-by: Nolan Leake no...@sigbus.net
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/kernel/Makefile b/kernel/Makefile
index 8315e3d..6bf474b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -65,12 +65,12 @@ headers-new = 
$(LINUX)/arch/$(ARCH_DIR)/include/asm/./kvm*.h \
 
 header-sync:
rm -rf $T
-   rsync -R \
+   rsync -R -L \
 $(LINUX)/./include/linux/kvm*.h \
 $(if $(wildcard $(headers-old)), $(headers-old)) \
  $T/
$(if $(wildcard $(headers-new)), \
-   rsync -R \
+   rsync -R -L \
 $(wildcard $(headers-new)) \
  $T/include/asm-$(ARCH_DIR)/)
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: testsuite: fix smptest.flat makefile rules

2008-12-25 Thread Avi Kivity
From: Avi Kivity a...@redhat.com

smptest.flat was not getting built

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/user/config-x86-common.mak b/user/config-x86-common.mak
index edbf6e4..315091a 100644
--- a/user/config-x86-common.mak
+++ b/user/config-x86-common.mak
@@ -20,7 +20,7 @@ FLATLIBS = test/lib/libcflat.a $(libgcc)
 
 tests-common = $(TEST_DIR)/bootstrap \
$(TEST_DIR)/vmexit.flat $(TEST_DIR)/tsc.flat \
-   $(TEST_DIR)/smp.flat  $(TEST_DIR)/port80.flat \
+   $(TEST_DIR)/smptest.flat  $(TEST_DIR)/port80.flat \
$(TEST_DIR)/realmode.flat
 
 test_cases: $(tests-common) $(tests)
@@ -43,7 +43,7 @@ $(TEST_DIR)/vmexit.flat: $(cstart.o) $(TEST_DIR)/vmexit.o
  
 $(TEST_DIR)/test32.flat: $(TEST_DIR)/test32.o
 
-$(TEST_DIR)/smp.flat: $(cstart.o) $(TEST_DIR)/smptest.o
+$(TEST_DIR)/smptest.flat: $(cstart.o) $(TEST_DIR)/smptest.o
  
 $(TEST_DIR)/emulator.flat: $(cstart.o) $(TEST_DIR)/vm.o $(TEST_DIR)/print.o
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: testsuite: avoid clobbering ebx

2008-12-25 Thread Avi Kivity
From: Avi Kivity a...@redhat.com

Some compilers don't enjoy that.

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/user/test/x86/vmexit.c b/user/test/x86/vmexit.c
index f6341cc..0662f34 100644
--- a/user/test/x86/vmexit.c
+++ b/user/test/x86/vmexit.c
@@ -18,6 +18,12 @@ static inline unsigned long long rdtsc()
 
 #define N (1  22)
 
+#ifdef __x86_64__
+#  define R r
+#else
+#  define R e
+#endif
+
 int main()
 {
int i;
@@ -25,7 +31,8 @@ int main()
 
t1 = rdtsc();
for (i = 0; i  N; ++i)
-   asm volatile (cpuid : : : eax, ebx, ecx, edx);
+   asm volatile (push %%Rbx; cpuid; pop %%Rbx
+ : : : eax, ecx, edx);
t2 = rdtsc();
printf(vmexit latency: %d\n, (int)((t2 - t1) / N));
return 0;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM: Cosmetic commit to reconcile upstream and local changes

2008-12-25 Thread Avi Kivity
From: Avi Kivity a...@redhat.com

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index dfc6442..36d2a50 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -86,12 +86,9 @@ extern int kvmppc_core_emulate_op(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
 extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
 extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
 
-
 extern int kvmppc_booke_init(void);
 extern void kvmppc_booke_exit(void);
 
 extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
 
-extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
-
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index a8b9304..9050491 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -389,7 +389,7 @@ static void emergency_vmx_disable_all(void)
 * We can't take any locks and we may be on an inconsistent
 * state, so we use NMIs as IPIs to tell the other CPUs to disable
 * VMX and halt.
-* 
+*
 * For safety, we will avoid running the nmi_shootdown_cpus()
 * stuff unnecessarily, but we don't have a way to check
 * if other CPUs have VMX enabled. So we will call it only if the
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/8] KVM: Use kvm_free_assigned_irq() for free irq

2008-12-25 Thread Sheng Yang
On Tuesday 23 December 2008 23:18:43 Marcelo Tosatti wrote:
 Hi Sheng,

 On Tue, Dec 23, 2008 at 04:00:25PM +0800, Sheng Yang wrote:
  Which is more convenient...
 
  Signed-off-by: Sheng Yang sh...@linux.intel.com
  ---
   virt/kvm/kvm_main.c |   10 ++
   1 files changed, 2 insertions(+), 8 deletions(-)
 
  diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
  index ffd261d..cd84b3e 100644
  --- a/virt/kvm/kvm_main.c
  +++ b/virt/kvm/kvm_main.c
  @@ -284,11 +284,7 @@ static int assigned_device_update_intx(struct kvm
  *kvm, return 0;
 
  if (irqchip_in_kernel(kvm)) {
  -   if (!msi2intx 
  -   adev-irq_requested_type  KVM_ASSIGNED_DEV_HOST_MSI) {
  -   free_irq(adev-host_irq, (void *)kvm);
  -   pci_disable_msi(adev-dev);
  -   }
  +   kvm_free_assigned_irq(kvm, adev);
 
  if (!capable(CAP_SYS_RAWIO))
  return -EPERM;
  @@ -339,9 +335,7 @@ static int assigned_device_update_msi(struct kvm
  *kvm,
 
  if (irqchip_in_kernel(kvm)) {
  if (!msi2intx) {
  -   if (adev-irq_requested_type 
  -   KVM_ASSIGNED_DEV_HOST_INTX)
  -   free_irq(adev-host_irq, (void *)adev);
  +   kvm_free_assigned_irq(kvm, adev);
 
  r = pci_enable_msi(adev-dev);
  if (r)

 Regarding kvm_free_assigned_irq and
 assigned_device_update_msi/update_intx:

 if (cancel_work_sync(assigned_dev-interrupt_work))
 /* We had pending work. That means we will have to take
  * care of kvm_put_kvm.
  */
 kvm_put_kvm(kvm);

 free_irq(assigned_dev-host_irq, (void *)assigned_dev);

 What prevents the host IRQ from being triggered between kvm_put_kvm and
 free_irq?

 Also, if the kvm_put_kvm(kvm) from
 kvm_assigned_dev_interrupt_work_handler happens to be the last one,
 can't this happen:

 - kvm_assigned_dev_interrupt_work_handler
 - kvm_put_kvm
 - kvm_destroy_vm
 - kvm_arch_destroy_vm
 - kvm_free_all_assigned_devices
 - kvm_free_assigned_device
 - kvm_free_assigned_irq
 - cancel_work_sync(assigned_dev-interrupt_work)

 deadlock.

Nice catch! I've updated the patchset to address this, take a look? :)

-- 
regards
Yang, Sheng

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/15] KVM: Split IOAPIC structure

2008-12-25 Thread Sheng Yang
Prepared for reuse ioapic_redir_entry for MSI.

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 include/linux/kvm_types.h |   17 +
 virt/kvm/ioapic.c |6 +++---
 virt/kvm/ioapic.h |   17 +
 3 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 9b6f395..f07de1a 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -53,4 +53,21 @@ struct kvm_pio_request {
int rep;
 };
 
+union kvm_ioapic_redirect_entry {
+   u64 bits;
+   struct {
+   u8 vector;
+   u8 delivery_mode:3;
+   u8 dest_mode:1;
+   u8 delivery_status:1;
+   u8 polarity:1;
+   u8 remote_irr:1;
+   u8 trig_mode:1;
+   u8 mask:1;
+   u8 reserve:7;
+   u8 reserved[4];
+   u8 dest_id;
+   } fields;
+};
+
 #endif /* __KVM_TYPES_H__ */
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 23b81cf..ebb2ab5 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -85,7 +85,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic 
*ioapic,
 
 static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
 {
-   union ioapic_redir_entry *pent;
+   union kvm_ioapic_redirect_entry *pent;
 
pent = ioapic-redirtbl[idx];
 
@@ -272,7 +272,7 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, 
int level)
 {
u32 old_irr = ioapic-irr;
u32 mask = 1  irq;
-   union ioapic_redir_entry entry;
+   union kvm_ioapic_redirect_entry entry;
 
if (irq = 0  irq  IOAPIC_NUM_PINS) {
entry = ioapic-redirtbl[irq];
@@ -291,7 +291,7 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, 
int level)
 static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi,
int trigger_mode)
 {
-   union ioapic_redir_entry *ent;
+   union kvm_ioapic_redirect_entry *ent;
 
ent = ioapic-redirtbl[gsi];
 
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 49c9581..ee5b0bd 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -40,22 +40,7 @@ struct kvm_ioapic {
u32 id;
u32 irr;
u32 pad;
-   union ioapic_redir_entry {
-   u64 bits;
-   struct {
-   u8 vector;
-   u8 delivery_mode:3;
-   u8 dest_mode:1;
-   u8 delivery_status:1;
-   u8 polarity:1;
-   u8 remote_irr:1;
-   u8 trig_mode:1;
-   u8 mask:1;
-   u8 reserve:7;
-   u8 reserved[4];
-   u8 dest_id;
-   } fields;
-   } redirtbl[IOAPIC_NUM_PINS];
+   union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
struct kvm_io_device dev;
struct kvm *kvm;
void (*ack_notifier)(void *opaque, int irq);
-- 
1.5.4.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/15] KVM: Use kvm_free_assigned_irq() for free irq

2008-12-25 Thread Sheng Yang
Which is more convenient...

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 virt/kvm/kvm_main.c |   10 ++
 1 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ffd261d..cd84b3e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -284,11 +284,7 @@ static int assigned_device_update_intx(struct kvm *kvm,
return 0;
 
if (irqchip_in_kernel(kvm)) {
-   if (!msi2intx 
-   adev-irq_requested_type  KVM_ASSIGNED_DEV_HOST_MSI) {
-   free_irq(adev-host_irq, (void *)kvm);
-   pci_disable_msi(adev-dev);
-   }
+   kvm_free_assigned_irq(kvm, adev);
 
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
@@ -339,9 +335,7 @@ static int assigned_device_update_msi(struct kvm *kvm,
 
if (irqchip_in_kernel(kvm)) {
if (!msi2intx) {
-   if (adev-irq_requested_type 
-   KVM_ASSIGNED_DEV_HOST_INTX)
-   free_irq(adev-host_irq, (void *)adev);
+   kvm_free_assigned_irq(kvm, adev);
 
r = pci_enable_msi(adev-dev);
if (r)
-- 
1.5.4.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/15] KVM: Add MSI_ACTION flag for assigned irq

2008-12-25 Thread Sheng Yang
For MSI disable feature later.

Notice I changed ABI here, but due to no userspace patch, I think it's OK.

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 include/linux/kvm.h |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ef7f98e..5b965f6 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -544,6 +544,7 @@ struct kvm_assigned_irq {
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU(1  0)
 
-#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI  (1  0)
+#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION  (1  0)
+#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI  (1  1)
 
 #endif
-- 
1.5.4.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/15] KVM: Add support to disable MSI for assigned device

2008-12-25 Thread Sheng Yang
MSI is always enabled by default for msi2intx=1. But if msi2intx=0, we
have to disable MSI if guest require to do so.

The patch also discard unnecessary msi2intx judgment if guest want to update
MSI state.

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 virt/kvm/kvm_main.c |   12 ++--
 1 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cd84b3e..111738b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -328,6 +328,15 @@ static int assigned_device_update_msi(struct kvm *kvm,
adev-irq_requested_type = ~KVM_ASSIGNED_DEV_GUEST_MSI;
adev-guest_irq = airq-guest_irq;
adev-ack_notifier.gsi = airq-guest_irq;
+   } else {
+   /*
+* Guest require to disable device MSI, we disable MSI and
+* re-enable INTx by default again. Notice it's only for
+* non-msi2intx.
+*/
+   kvm_free_assigned_irq(kvm, adev);
+   assigned_device_update_intx(kvm, adev, airq);
+   return 0;
}
 
if (adev-irq_requested_type  KVM_ASSIGNED_DEV_HOST_MSI)
@@ -399,8 +408,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
}
}
 
-   if ((!msi2intx 
-(assigned_irq-flags  KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) ||
+   if ((assigned_irq-flags  KVM_DEV_IRQ_ASSIGN_MSI_ACTION) ||
(msi2intx  match-dev-msi_enabled)) {
 #ifdef CONFIG_X86
r = assigned_device_update_msi(kvm, match, assigned_irq);
-- 
1.5.4.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 14/15] KVM: Using kfifo for irq recording

2008-12-25 Thread Sheng Yang
For MSI-X, we have to deal with multiply IRQ with same IRQ handler, so it's
necessary to record the IRQ that trigger the IRQ handler.

And this one is also useful for fixing kvm_free_assigned_irq().

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 include/linux/kvm_host.h |4 
 virt/kvm/kvm_main.c  |   30 +++---
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index fbf102c..84b11d5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -17,6 +17,7 @@
 #include linux/preempt.h
 #include linux/marker.h
 #include linux/msi.h
+#include linux/kfifo.h
 #include asm/signal.h
 
 #include linux/kvm.h
@@ -313,6 +314,9 @@ struct kvm_assigned_dev_kernel {
int host_irq;
bool host_irq_disabled;
int guest_irq;
+#define KVM_ASSIGNED_DEV_IRQ_FIFO_LEN  0x100
+   struct kfifo *irq_fifo;
+   spinlock_t irq_fifo_lock;
 #define KVM_ASSIGNED_DEV_GUEST_INTX(1  0)
 #define KVM_ASSIGNED_DEV_GUEST_MSI (1  1)
 #define KVM_ASSIGNED_DEV_HOST_INTX (1  8)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a51e630..1863942 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -99,6 +99,8 @@ static struct kvm_assigned_dev_kernel 
*kvm_find_assigned_dev(struct list_head *h
 static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 {
struct kvm_assigned_dev_kernel *assigned_dev;
+   int irq;
+   u32 gsi;
 
assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
interrupt_work);
@@ -109,14 +111,22 @@ static void 
kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 */
mutex_lock(assigned_dev-kvm-lock);
 
-   kvm_set_irq(assigned_dev-kvm, assigned_dev-irq_source_id,
-   assigned_dev-guest_irq, 1);
+handle_irq:
+   kfifo_get(assigned_dev-irq_fifo,
+ (unsigned char *)irq, sizeof(int));
+
+   gsi = assigned_dev-guest_irq;
+
+   kvm_set_irq(assigned_dev-kvm, assigned_dev-irq_source_id, gsi, 1);
 
if (assigned_dev-irq_requested_type  KVM_ASSIGNED_DEV_GUEST_MSI) {
enable_irq(assigned_dev-host_irq);
assigned_dev-host_irq_disabled = false;
}
 
+   if (kfifo_len(assigned_dev-irq_fifo) != 0)
+   goto handle_irq;
+
mutex_unlock(assigned_dev-kvm-lock);
kvm_put_kvm(assigned_dev-kvm);
 }
@@ -128,6 +138,9 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void 
*dev_id)
 
kvm_get_kvm(assigned_dev-kvm);
 
+   kfifo_put(assigned_dev-irq_fifo,
+ (unsigned char *)irq, sizeof(int));
+
schedule_work(assigned_dev-interrupt_work);
 
disable_irq_nosync(irq);
@@ -201,6 +214,7 @@ static void kvm_free_assigned_device(struct kvm *kvm,
pci_dev_put(assigned_dev-dev);
 
list_del(assigned_dev-list);
+   kfifo_free(assigned_dev-irq_fifo);
kfree(assigned_dev);
 }
 
@@ -448,15 +462,25 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 
list_add(match-list, kvm-arch.assigned_dev_head);
 
+   spin_lock_init(match-irq_fifo_lock);
+   match-irq_fifo = kfifo_alloc(sizeof(unsigned char) *
+ KVM_ASSIGNED_DEV_IRQ_FIFO_LEN,
+ GFP_KERNEL | __GFP_ZERO,
+ match-irq_fifo_lock);
+   if (!match-irq_fifo)
+   goto out_list_del;
+
if (assigned_dev-flags  KVM_DEV_ASSIGN_ENABLE_IOMMU) {
r = kvm_iommu_map_guest(kvm, match);
if (r)
-   goto out_list_del;
+   goto out_fifo_del;
}
 
 out:
mutex_unlock(kvm-lock);
return r;
+out_fifo_del:
+   kfifo_free(match-irq_fifo);
 out_list_del:
list_del(match-list);
pci_release_regions(dev);
-- 
1.5.4.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/15] KVM: Using ioapic_irqchip() macro for kvm_set_irq

2008-12-25 Thread Sheng Yang

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 virt/kvm/irq_comm.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index abfab46..47243ef 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -39,7 +39,7 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, 
int level)
 * IOAPIC.  So set the bit in both. The guest will ignore
 * writes to the unused one.
 */
-   kvm_ioapic_set_irq(kvm-arch.vioapic, irq, !!(*irq_state));
+   kvm_ioapic_set_irq(ioapic_irqchip(kvm), irq, !!(*irq_state));
 #ifdef CONFIG_X86
kvm_pic_set_irq(pic_irqchip(kvm), irq, !!(*irq_state));
 #endif
-- 
1.5.4.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/15] KVM: Update intr delivery func to accept unsigned long* bitmap

2008-12-25 Thread Sheng Yang
Would be used with bit ops, and would be easily extended if KVM_MAX_VCPUS is
increased.

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 arch/x86/kvm/lapic.c |8 
 include/linux/kvm_host.h |2 +-
 virt/kvm/ioapic.c|4 ++--
 virt/kvm/ioapic.h|4 ++--
 virt/kvm/irq_comm.c  |6 +++---
 5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index afac68c..c1e4935 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -403,7 +403,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
delivery_mode,
 }
 
 static struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
-  unsigned long bitmap)
+  unsigned long *bitmap)
 {
int last;
int next;
@@ -415,7 +415,7 @@ static struct kvm_lapic *kvm_apic_round_robin(struct kvm 
*kvm, u8 vector,
do {
if (++next == KVM_MAX_VCPUS)
next = 0;
-   if (kvm-vcpus[next] == NULL || !test_bit(next, bitmap))
+   if (kvm-vcpus[next] == NULL || !test_bit(next, bitmap))
continue;
apic = kvm-vcpus[next]-arch.apic;
if (apic  apic_enabled(apic))
@@ -431,7 +431,7 @@ static struct kvm_lapic *kvm_apic_round_robin(struct kvm 
*kvm, u8 vector,
 }
 
 struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
-   unsigned long bitmap)
+   unsigned long *bitmap)
 {
struct kvm_lapic *apic;
 
@@ -502,7 +502,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
}
 
if (delivery_mode == APIC_DM_LOWEST) {
-   target = kvm_get_lowest_prio_vcpu(vcpu-kvm, vector, lpr_map);
+   target = kvm_get_lowest_prio_vcpu(vcpu-kvm, vector, lpr_map);
if (target != NULL)
__apic_accept_irq(target-arch.apic, delivery_mode,
  vector, level, trig_mode);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4f92317..fbf102c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -332,7 +332,7 @@ struct kvm_gsi_msg {
 
 void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
   union kvm_ioapic_redirect_entry *entry,
-  u32 *deliver_bitmask);
+  unsigned long *deliver_bitmask);
 void kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 gsi, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index ebd5ba6..164a746 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -154,7 +154,7 @@ static void ioapic_inj_nmi(struct kvm_vcpu *vcpu)
 }
 
 void kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
-u8 dest_mode, u32 *mask)
+u8 dest_mode, unsigned long *mask)
 {
int i;
struct kvm *kvm = ioapic-kvm;
@@ -195,7 +195,7 @@ void kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic 
*ioapic, u8 dest,
 static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 {
union kvm_ioapic_redirect_entry entry = ioapic-redirtbl[irq];
-   u32 deliver_bitmask;
+   unsigned long deliver_bitmask;
struct kvm_vcpu *vcpu;
int vcpu_id, r = 0;
 
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index e107dbb..c418a7f 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -65,12 +65,12 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm 
*kvm)
 }
 
 struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
-  unsigned long bitmap);
+  unsigned long *bitmap);
 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
 void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
 void kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
-u8 dest_mode, u32 *mask);
+u8 dest_mode, unsigned long *mask);
 
 #endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 1949587..e74d679 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -31,7 +31,7 @@
 
 void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
   union kvm_ioapic_redirect_entry *entry,
-  u32 *deliver_bitmask)
+  unsigned long *deliver_bitmask)
 {
struct kvm_vcpu *vcpu;
 
@@ -41,7 +41,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
switch 

[PATCH 06/15] KVM: Improve MSI dispatch function

2008-12-25 Thread Sheng Yang
Prepare to merge with kvm_set_irq().

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 virt/kvm/kvm_main.c |8 
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3494861..599257e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -87,7 +87,7 @@ static bool kvm_rebooting;
 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
 
 #ifdef CONFIG_X86
-static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev)
+static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev, 
u32 gsi)
 {
int vcpu_id;
struct kvm_vcpu *vcpu;
@@ -99,7 +99,7 @@ static void assigned_device_msi_dispatch(struct 
kvm_assigned_dev_kernel *dev)
BUG_ON(!ioapic);
 
mutex_lock(dev-kvm-gsi_msg_lock);
-   gsi_msg = kvm_find_gsi_msg(dev-kvm, dev-guest_irq);
+   gsi_msg = kvm_find_gsi_msg(dev-kvm, gsi);
if (!gsi_msg) {
printk(KERN_WARNING kvm: fail to find correlated gsi_msg\n);
return;
@@ -143,7 +143,7 @@ static void assigned_device_msi_dispatch(struct 
kvm_assigned_dev_kernel *dev)
}
 }
 #else
-static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) 
{}
+static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev, 
u32 gsi) {}
 #endif
 
 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head 
*head,
@@ -178,7 +178,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct 
work_struct *work)
assigned_dev-guest_irq, 1);
else if (assigned_dev-irq_requested_type 
KVM_ASSIGNED_DEV_GUEST_MSI) {
-   assigned_device_msi_dispatch(assigned_dev);
+   assigned_device_msi_dispatch(assigned_dev, 
assigned_dev-guest_irq);
enable_irq(assigned_dev-host_irq);
assigned_dev-host_irq_disabled = false;
}
-- 
1.5.4.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 13/15] KVM: bit ops for deliver_bitmap

2008-12-25 Thread Sheng Yang
It's also convenient when we extend KVM supported vcpu number in the future.

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 arch/x86/kvm/lapic.c |7 ---
 virt/kvm/ioapic.c|   24 +---
 virt/kvm/irq_comm.c  |   16 
 3 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c1e4935..359e02c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -477,9 +477,10 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 
struct kvm_vcpu *target;
struct kvm_vcpu *vcpu;
-   unsigned long lpr_map = 0;
+   DECLARE_BITMAP(lpr_map, KVM_MAX_VCPUS);
int i;
 
+   bitmap_zero(lpr_map, KVM_MAX_VCPUS);
apic_debug(icr_high 0x%x, icr_low 0x%x, 
   short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, 
   dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n,
@@ -494,7 +495,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
if (vcpu-arch.apic 
apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) {
if (delivery_mode == APIC_DM_LOWEST)
-   set_bit(vcpu-vcpu_id, lpr_map);
+   set_bit(vcpu-vcpu_id, lpr_map);
else
__apic_accept_irq(vcpu-arch.apic, 
delivery_mode,
  vector, level, trig_mode);
@@ -502,7 +503,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
}
 
if (delivery_mode == APIC_DM_LOWEST) {
-   target = kvm_get_lowest_prio_vcpu(vcpu-kvm, vector, lpr_map);
+   target = kvm_get_lowest_prio_vcpu(vcpu-kvm, vector, lpr_map);
if (target != NULL)
__apic_accept_irq(target-arch.apic, delivery_mode,
  vector, level, trig_mode);
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 164a746..bf83f5e 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -195,7 +195,7 @@ void kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic 
*ioapic, u8 dest,
 static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 {
union kvm_ioapic_redirect_entry entry = ioapic-redirtbl[irq];
-   unsigned long deliver_bitmask;
+   DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS);
struct kvm_vcpu *vcpu;
int vcpu_id, r = 0;
 
@@ -205,22 +205,24 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int 
irq)
 entry.fields.delivery_mode, entry.fields.vector,
 entry.fields.trig_mode);
 
-   kvm_get_intr_delivery_bitmask(ioapic, entry, deliver_bitmask);
-   if (!deliver_bitmask) {
-   ioapic_debug(no target on destination\n);
-   return 0;
-   }
+   bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
 
/* Always delivery PIT interrupt to vcpu 0 */
 #ifdef CONFIG_X86
if (irq == 0)
-   deliver_bitmask = 1  0;
+   set_bit(0, deliver_bitmask);
+   else
 #endif
+   kvm_get_intr_delivery_bitmask(ioapic, entry, deliver_bitmask);
+
+   if (find_first_bit(deliver_bitmask, KVM_MAX_VCPUS) = KVM_MAX_VCPUS) {
+   ioapic_debug(no target on destination\n);
+   return 0;
+   }
 
-   for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
-   if (!(deliver_bitmask  (1  vcpu_id)))
-   continue;
-   deliver_bitmask = ~(1  vcpu_id);
+   while ((vcpu_id = find_first_bit(deliver_bitmask, KVM_MAX_VCPUS))
+KVM_MAX_VCPUS) {
+   clear_bit(vcpu_id, deliver_bitmask);
vcpu = ioapic-kvm-vcpus[vcpu_id];
if (vcpu) {
if (entry.fields.delivery_mode ==
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index e74d679..ecda2c1 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -42,7 +42,7 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
case IOAPIC_LOWEST_PRIORITY:
vcpu = kvm_get_lowest_prio_vcpu(ioapic-kvm,
entry-fields.vector, deliver_bitmask);
-   *deliver_bitmask = 1  vcpu-vcpu_id;
+   set_bit(vcpu-vcpu_id, deliver_bitmask);
break;
case IOAPIC_FIXED:
case IOAPIC_NMI:
@@ -63,11 +63,12 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 
gsi, int level)
struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
struct kvm_gsi_msg *gsi_msg;
union kvm_ioapic_redirect_entry entry;
-   unsigned long deliver_bitmask;
+   DECLARE_BITMAP(deliver_bitmask, KVM_MAX_VCPUS);
 
BUG_ON(!ioapic);
 #endif
 
+   bitmap_zero(deliver_bitmask, KVM_MAX_VCPUS);
if (!(gsi  KVM_GSI_MSG_MASK)) {
int irq = gsi;
 
@@ -111,16 +112,15 @@ void 

[PATCH 08/15] KVM: Merge MSI handling to kvm_set_irq

2008-12-25 Thread Sheng Yang
Using kvm_set_irq to handle all interrupt injection.

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 include/linux/kvm_host.h |2 +-
 virt/kvm/irq_comm.c  |   98 +++---
 virt/kvm/kvm_main.c  |   77 +++-
 3 files changed, 90 insertions(+), 87 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index aa2606b..5b671b6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -330,7 +330,7 @@ struct kvm_gsi_msg {
struct hlist_node link;
 };
 
-void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
+void kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 gsi, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
   struct kvm_irq_ack_notifier *kian);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 47243ef..63cdf01 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -20,28 +20,96 @@
  */
 
 #include linux/kvm_host.h
+
+#ifdef CONFIG_X86
+#include asm/msidef.h
+#endif
+
 #include irq.h
 
 #include ioapic.h
 
 /* This should be called with the kvm-lock mutex held */
-void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
+void kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 gsi, int level)
 {
-   unsigned long *irq_state = (unsigned long *)kvm-arch.irq_states[irq];
-
-   /* Logical OR for level trig interrupt */
-   if (level)
-   set_bit(irq_source_id, irq_state);
-   else
-   clear_bit(irq_source_id, irq_state);
-
-   /* Not possible to detect if the guest uses the PIC or the
-* IOAPIC.  So set the bit in both. The guest will ignore
-* writes to the unused one.
-*/
-   kvm_ioapic_set_irq(ioapic_irqchip(kvm), irq, !!(*irq_state));
+   unsigned long *irq_state;
+#ifdef CONFIG_X86
+   int vcpu_id;
+   struct kvm_vcpu *vcpu;
+   struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+   struct kvm_gsi_msg *gsi_msg;
+   int dest_id, vector, dest_mode, trig_mode, delivery_mode;
+   u32 deliver_bitmask;
+
+   BUG_ON(!ioapic);
+#endif
+
+   if (!(gsi  KVM_GSI_MSG_MASK)) {
+   int irq = gsi;
+
+   irq_state = (unsigned long *)kvm-arch.irq_states[irq];
+
+   /* Logical OR for level trig interrupt */
+   if (level)
+   set_bit(irq_source_id, irq_state);
+   else
+   clear_bit(irq_source_id, irq_state);
+
+   /* Not possible to detect if the guest uses the PIC or the
+* IOAPIC.  So set the bit in both. The guest will ignore
+* writes to the unused one.
+*/
+   kvm_ioapic_set_irq(ioapic, irq, !!(*irq_state));
 #ifdef CONFIG_X86
-   kvm_pic_set_irq(pic_irqchip(kvm), irq, !!(*irq_state));
+   kvm_pic_set_irq(pic_irqchip(kvm), irq, !!(*irq_state));
+#endif
+   return;
+   }
+
+#ifdef CONFIG_X86
+   mutex_lock(kvm-gsi_msg_lock);
+   gsi_msg = kvm_find_gsi_msg(kvm, gsi);
+   mutex_unlock(kvm-gsi_msg_lock);
+   if (!gsi_msg) {
+   printk(KERN_WARNING kvm: fail to find correlated gsi_msg\n);
+   return;
+   }
+
+   dest_id = (gsi_msg-msg.address_lo  MSI_ADDR_DEST_ID_MASK)
+MSI_ADDR_DEST_ID_SHIFT;
+   vector = (gsi_msg-msg.data  MSI_DATA_VECTOR_MASK)
+MSI_DATA_VECTOR_SHIFT;
+   dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
+   (unsigned long *)gsi_msg-msg.address_lo);
+   trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
+   (unsigned long *)gsi_msg-msg.data);
+   delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
+   (unsigned long *)gsi_msg-msg.data);
+   deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
+   dest_id, dest_mode);
+   /* IOAPIC delivery mode value is the same as MSI here */
+   switch (delivery_mode) {
+   case IOAPIC_LOWEST_PRIORITY:
+   vcpu = kvm_get_lowest_prio_vcpu(ioapic-kvm, vector,
+   deliver_bitmask);
+   if (vcpu != NULL)
+   kvm_apic_set_irq(vcpu, vector, trig_mode);
+   else
+   printk(KERN_INFO kvm: null lowest priority vcpu!\n);
+   break;
+   case IOAPIC_FIXED:
+   for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
+   if (!(deliver_bitmask  (1  vcpu_id)))
+   continue;
+   deliver_bitmask = ~(1  vcpu_id);
+   vcpu = ioapic-kvm-vcpus[vcpu_id];
+   if (vcpu)
+   kvm_apic_set_irq(vcpu, vector, 

[PATCH 11/15] KVM: Change API of kvm_ioapic_get_delivery_bitmask

2008-12-25 Thread Sheng Yang
In order to use with bit ops.

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 virt/kvm/ioapic.c   |   17 -
 virt/kvm/ioapic.h   |4 ++--
 virt/kvm/irq_comm.c |5 +++--
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index af9f5de..ebd5ba6 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -153,22 +153,22 @@ static void ioapic_inj_nmi(struct kvm_vcpu *vcpu)
kvm_vcpu_kick(vcpu);
 }
 
-u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
-   u8 dest_mode)
+void kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
+u8 dest_mode, u32 *mask)
 {
-   u32 mask = 0;
int i;
struct kvm *kvm = ioapic-kvm;
struct kvm_vcpu *vcpu;
 
ioapic_debug(dest %d dest_mode %d\n, dest, dest_mode);
 
+   *mask = 0;
if (dest_mode == 0) {   /* Physical mode. */
if (dest == 0xFF) { /* Broadcast. */
for (i = 0; i  KVM_MAX_VCPUS; ++i)
if (kvm-vcpus[i]  kvm-vcpus[i]-arch.apic)
-   mask |= 1  i;
-   return mask;
+   *mask |= 1  i;
+   return;
}
for (i = 0; i  KVM_MAX_VCPUS; ++i) {
vcpu = kvm-vcpus[i];
@@ -176,7 +176,7 @@ u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic 
*ioapic, u8 dest,
continue;
if (kvm_apic_match_physical_addr(vcpu-arch.apic, 
dest)) {
if (vcpu-arch.apic)
-   mask = 1  i;
+   *mask = 1  i;
break;
}
}
@@ -187,10 +187,9 @@ u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic 
*ioapic, u8 dest,
continue;
if (vcpu-arch.apic 
kvm_apic_match_logical_addr(vcpu-arch.apic, dest))
-   mask |= 1  vcpu-vcpu_id;
+   *mask |= 1  vcpu-vcpu_id;
}
-   ioapic_debug(mask %x\n, mask);
-   return mask;
+   ioapic_debug(mask %x\n, *mask);
 }
 
 static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index ee5b0bd..e107dbb 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -70,7 +70,7 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int 
trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
 void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
-u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
-   u8 dest_mode);
+void kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
+u8 dest_mode, u32 *mask);
 
 #endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index d89d8b2..1949587 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -35,8 +35,9 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
 {
struct kvm_vcpu *vcpu;
 
-   *deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
-   entry-fields.dest_id, entry-fields.dest_mode);
+   kvm_ioapic_get_delivery_bitmask(ioapic, entry-fields.dest_id,
+   entry-fields.dest_mode,
+   deliver_bitmask);
switch (entry-fields.delivery_mode) {
case IOAPIC_LOWEST_PRIORITY:
vcpu = kvm_get_lowest_prio_vcpu(ioapic-kvm,
-- 
1.5.4.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/15] Device assignment MSI enhancement

2008-12-25 Thread Sheng Yang
Hi Avi and Marcelo

Merry Xmas! And here is the v2 of patchset. Target at 2.6.29 for it contained
a lot of fix and improvement of current device assignment and MSI feature.

Change from V1:

Addressed Marcelo's comments, and:
1. Fix racy in kvm_free_assigned_irq(). In case to do this, I fetch one
patch (irq_fifo) from original MSI-X patchset. Indeed a nice catch of Marcelo.
:)

2. Unified kvm_set_irq() with ioapic_deliver(). It didn't save much, but
duplicate is always bothering, and I have modified bitmask for vcpu to a real
bitmap (maybe not all, just what I have seen).

And for V1:

1. Add gsi_msg mapping mechanism, which gsi can used to indicated a MSI
interrupt.(Notice API/ABI changed a little, but we don't have userspace patch
now, so it should be OK.)

2. Provide MSI disable capability.

arch/x86/kvm/lapic.c  |   11 ++-
include/linux/kvm.h   |   15 +++-
include/linux/kvm_host.h  |   26 +-
include/linux/kvm_types.h |   17 
virt/kvm/ioapic.c |  117 ++---
virt/kvm/ioapic.h |   23 +
virt/kvm/irq_comm.c   |  184 ---
virt/kvm/kvm_main.c   |  212 -
8 files changed, 415 insertions(+), 190 deletions(-)

Sorry for the patchset size, it's too easy to grow fast, and I am a little too
lazy to split them into more batches in the Xmas... :)

--
regards
Yang, Sheng
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/15] KVM: Using gsi_msg mapping for MSI device assignment

2008-12-25 Thread Sheng Yang
Convert MSI userspace interface to support gsi_msg mapping(and nobody should
be the user of the old interface...).

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 include/linux/kvm_host.h |1 -
 virt/kvm/kvm_main.c  |   35 ++-
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0e5741a..aa2606b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -313,7 +313,6 @@ struct kvm_assigned_dev_kernel {
int host_irq;
bool host_irq_disabled;
int guest_irq;
-   struct msi_msg guest_msi;
 #define KVM_ASSIGNED_DEV_GUEST_INTX(1  0)
 #define KVM_ASSIGNED_DEV_GUEST_MSI (1  1)
 #define KVM_ASSIGNED_DEV_HOST_INTX (1  8)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 26bccf9..3494861 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -92,20 +92,30 @@ static void assigned_device_msi_dispatch(struct 
kvm_assigned_dev_kernel *dev)
int vcpu_id;
struct kvm_vcpu *vcpu;
struct kvm_ioapic *ioapic = ioapic_irqchip(dev-kvm);
-   int dest_id = (dev-guest_msi.address_lo  MSI_ADDR_DEST_ID_MASK)
-MSI_ADDR_DEST_ID_SHIFT;
-   int vector = (dev-guest_msi.data  MSI_DATA_VECTOR_MASK)
-MSI_DATA_VECTOR_SHIFT;
-   int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
-   (unsigned long *)dev-guest_msi.address_lo);
-   int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
-   (unsigned long *)dev-guest_msi.data);
-   int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
-   (unsigned long *)dev-guest_msi.data);
+   struct kvm_gsi_msg *gsi_msg;
+   int dest_id, vector, dest_mode, trig_mode, delivery_mode;
u32 deliver_bitmask;
 
BUG_ON(!ioapic);
 
+   mutex_lock(dev-kvm-gsi_msg_lock);
+   gsi_msg = kvm_find_gsi_msg(dev-kvm, dev-guest_irq);
+   if (!gsi_msg) {
+   printk(KERN_WARNING kvm: fail to find correlated gsi_msg\n);
+   return;
+   }
+   mutex_unlock(dev-kvm-gsi_msg_lock);
+
+   dest_id = (gsi_msg-msg.address_lo  MSI_ADDR_DEST_ID_MASK)
+MSI_ADDR_DEST_ID_SHIFT;
+   vector = (gsi_msg-msg.data  MSI_DATA_VECTOR_MASK)
+MSI_DATA_VECTOR_SHIFT;
+   dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
+   (unsigned long *)gsi_msg-msg.address_lo);
+   trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
+   (unsigned long *)gsi_msg-msg.data);
+   delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
+   (unsigned long *)gsi_msg-msg.data);
deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
dest_id, dest_mode);
/* IOAPIC delivery mode value is the same as MSI here */
@@ -316,17 +326,16 @@ static int assigned_device_update_msi(struct kvm *kvm,
 {
int r;
 
+   adev-guest_irq = airq-guest_irq;
+
if (airq-flags  KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
/* x86 don't care upper address of guest msi message addr */
adev-irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI;
adev-irq_requested_type = ~KVM_ASSIGNED_DEV_GUEST_INTX;
-   adev-guest_msi.address_lo = airq-guest_msi.addr_lo;
-   adev-guest_msi.data = airq-guest_msi.data;
adev-ack_notifier.gsi = -1;
} else if (msi2intx) {
adev-irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX;
adev-irq_requested_type = ~KVM_ASSIGNED_DEV_GUEST_MSI;
-   adev-guest_irq = airq-guest_irq;
adev-ack_notifier.gsi = airq-guest_irq;
} else {
/*
-- 
1.5.4.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/15] KVM: Unified the delivery of IOAPIC and MSI

2008-12-25 Thread Sheng Yang
Duplicate code is always bothering...

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 include/linux/kvm_host.h |3 ++
 virt/kvm/ioapic.c|   84 +-
 virt/kvm/irq_comm.c  |   75 
 3 files changed, 79 insertions(+), 83 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5b671b6..4f92317 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -330,6 +330,9 @@ struct kvm_gsi_msg {
struct hlist_node link;
 };
 
+void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
+  union kvm_ioapic_redirect_entry *entry,
+  u32 *deliver_bitmask);
 void kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 gsi, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index ebb2ab5..af9f5de 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -195,75 +195,53 @@ u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic 
*ioapic, u8 dest,
 
 static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 {
-   u8 dest = ioapic-redirtbl[irq].fields.dest_id;
-   u8 dest_mode = ioapic-redirtbl[irq].fields.dest_mode;
-   u8 delivery_mode = ioapic-redirtbl[irq].fields.delivery_mode;
-   u8 vector = ioapic-redirtbl[irq].fields.vector;
-   u8 trig_mode = ioapic-redirtbl[irq].fields.trig_mode;
+   union kvm_ioapic_redirect_entry entry = ioapic-redirtbl[irq];
u32 deliver_bitmask;
struct kvm_vcpu *vcpu;
int vcpu_id, r = 0;
 
ioapic_debug(dest=%x dest_mode=%x delivery_mode=%x 
 vector=%x trig_mode=%x\n,
-dest, dest_mode, delivery_mode, vector, trig_mode);
+entry.fields.dest, entry.fields.dest_mode,
+entry.fields.delivery_mode, entry.fields.vector,
+entry.fields.trig_mode);
 
-   deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest,
- dest_mode);
+   kvm_get_intr_delivery_bitmask(ioapic, entry, deliver_bitmask);
if (!deliver_bitmask) {
ioapic_debug(no target on destination\n);
return 0;
}
 
-   switch (delivery_mode) {
-   case IOAPIC_LOWEST_PRIORITY:
-   vcpu = kvm_get_lowest_prio_vcpu(ioapic-kvm, vector,
-   deliver_bitmask);
+   /* Always delivery PIT interrupt to vcpu 0 */
 #ifdef CONFIG_X86
-   if (irq == 0)
-   vcpu = ioapic-kvm-vcpus[0];
+   if (irq == 0)
+   deliver_bitmask = 1  0;
 #endif
-   if (vcpu != NULL)
-   r = ioapic_inj_irq(ioapic, vcpu, vector,
-  trig_mode, delivery_mode);
-   else
-   ioapic_debug(null lowest prio vcpu: 
-mask=%x vector=%x delivery_mode=%x\n,
-deliver_bitmask, vector, 
IOAPIC_LOWEST_PRIORITY);
-   break;
-   case IOAPIC_FIXED:
-#ifdef CONFIG_X86
-   if (irq == 0)
-   deliver_bitmask = 1;
-#endif
-   for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
-   if (!(deliver_bitmask  (1  vcpu_id)))
-   continue;
-   deliver_bitmask = ~(1  vcpu_id);
-   vcpu = ioapic-kvm-vcpus[vcpu_id];
-   if (vcpu) {
-   r = ioapic_inj_irq(ioapic, vcpu, vector,
-  trig_mode, delivery_mode);
-   }
-   }
-   break;
-   case IOAPIC_NMI:
-   for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
-   if (!(deliver_bitmask  (1  vcpu_id)))
-   continue;
-   deliver_bitmask = ~(1  vcpu_id);
-   vcpu = ioapic-kvm-vcpus[vcpu_id];
-   if (vcpu)
+
+   for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
+   if (!(deliver_bitmask  (1  vcpu_id)))
+   continue;
+   deliver_bitmask = ~(1  vcpu_id);
+   vcpu = ioapic-kvm-vcpus[vcpu_id];
+   if (vcpu) {
+   if (entry.fields.delivery_mode ==
+   IOAPIC_LOWEST_PRIORITY ||
+   entry.fields.delivery_mode == IOAPIC_FIXED)
+   r = ioapic_inj_irq(ioapic, vcpu,
+  entry.fields.vector,
+  entry.fields.trig_mode,
+   

Re: [PATCH 0/15] Device assignment MSI enhancement

2008-12-25 Thread Sheng Yang
On Thursday 25 December 2008 17:09:24 Sheng Yang wrote:
 Hi Avi and Marcelo

 Merry Xmas! And here is the v2 of patchset. Target at 2.6.29 for it
 contained a lot of fix and improvement of current device assignment and MSI
 feature.

 Change from V1:

 Addressed Marcelo's comments, and:
 1. Fix racy in kvm_free_assigned_irq(). In case to do this, I fetch one
 patch (irq_fifo) from original MSI-X patchset. Indeed a nice catch of
 Marcelo.

 :)

 2. Unified kvm_set_irq() with ioapic_deliver(). It didn't save much, but
 duplicate is always bothering, and I have modified bitmask for vcpu to a
 real bitmap (maybe not all, just what I have seen).

Forgot to mention, I didn't change API for guest to disable MSI which is a 
part of Marcelo's comments, for I think single interface named update with 
some flags represent the current bit state is enough for now...

-- 
regards
Yang, Sheng


 And for V1:

 1. Add gsi_msg mapping mechanism, which gsi can used to indicated a MSI
 interrupt.(Notice API/ABI changed a little, but we don't have userspace
 patch now, so it should be OK.)

 2. Provide MSI disable capability.

 arch/x86/kvm/lapic.c  |   11 ++-
 include/linux/kvm.h   |   15 +++-
 include/linux/kvm_host.h  |   26 +-
 include/linux/kvm_types.h |   17 
 virt/kvm/ioapic.c |  117 ++---
 virt/kvm/ioapic.h |   23 +
 virt/kvm/irq_comm.c   |  184 ---
 virt/kvm/kvm_main.c   |  212
 - 8 files changed, 415
 insertions(+), 190 deletions(-)

 Sorry for the patchset size, it's too easy to grow fast, and I am a little
 too lazy to split them into more batches in the Xmas... :)

 --
 regards
 Yang, Sheng
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Nested KVM

2008-12-25 Thread Avi Kivity

Alexander Graf wrote:


Avi, could you please apply that patch for kvm-82 too, so we get 
something working out? I'll take a closer look at what's broken 
exactly later on.


I'll just revert the emulation loop patch.  We can reapply it once we 
fix the problem.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ANNOUNCE] kvm-82 release

2008-12-25 Thread Avi Kivity
This release adds support for nested virtualization, a feature which 
allows you to run kvm (and possibly other hypervisors) inside a guest. 
This is an experimental feature and is only available on AMD hosts.


There are fixes included for a couple of minor vulnerabilities: one for 
the slirp stack (-net user), which is not usually used in production, 
and another in the vnc server, which allows malicious users to cause a 
VM to hang.


Changes from kvm-81:
- merge qemu-svn
  - uuid support
  - fix CVE-2007-5729 (slirp vulnerability)
  - fix CVE-2008-2382 (vnc denial of service)
  - better scsi support
  - pci subsystem id for pci devices
- this will cause Windows guest to rediscover hardware
  - improved I/O parallelism
  - ppc kvm support
  - hpet support
- not fully integrated yet
  - monitor 'info status' command
- merge bochs-bios-cvs
  - uuid support
  - prepare for S3 sleep
- merge vgabios-cvs
- much improved guest debugging (Jan Kiszka)
  - both debugger in guest and debugger in host
- fix kvm makefile for separate object dir (Andi Kleen)
- nested svm (Alexander Graf)
  - run kvm in kvm in kvm...
- fix ia64 register and stack access from userspace (Jes Sorensen)
- don't treat a global pte as global if cr4.pge is clear
  - fixes Vista x86 smp failure on boot
- properly lock virtual i8259 interrupt controller
- fix large host pages invlpg/resync
  - fixes oops when using host hugetlbfs
- fix vmload instruction misemulated as lidt


Notes:
If you use the modules bundled with kvm-82, you can use any version
of Linux from 2.6.16 upwards.  You may also use kvm-81 userspace with
the kvm modules provided by Linux 2.6.25 or above.  Some features may
only be available in newer releases.

http://kvm.qumranet.com

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2168011 ] kvm_host.h:128: error: field 'mmu_notifier' has incomplete

2008-12-25 Thread SourceForge.net
Bugs item #2168011, was opened at 2008-10-15 11:53
Message generated for change (Comment added) made by wg1
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2168011group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Pending
Resolution: Fixed
Priority: 5
Private: No
Submitted By: Stephane Bakhos (nuitari3)
Assigned to: Nobody/Anonymous (nobody)
Summary: kvm_host.h:128: error: field 'mmu_notifier' has incomplete

Initial Comment:
When compiling kvm-77/76 on a 2.6.27 kernel with AMD IOMMU activated I get:

kvm_host.h:128: error: field 'mmu_notifier' has incomplete type

--

Comment By: Wolfram Gloger (wg1)
Date: 2008-12-25 11:21

Message:
It would only print a warning for those old kernels, not break them.


--

Comment By: Avi Kivity (avik)
Date: 2008-12-24 17:27

Message:
Well, very old kernels don't have kvm support at all, and this would break
them.

--

Comment By: Wolfram Gloger (wg1)
Date: 2008-12-24 16:53

Message:
Oops, you are correct of course.  CONFIG_KVM set and all is well.
May I suggest the following patch so this doesn't bite people so easily.

--- configure.orig  2008-12-14 14:16:27.0 +0100
+++ configure   2008-12-24 16:46:03.0 +0100
@@ -134,6 +134,19 @@
 fi
 fi
 
+if [ -e $kerneldir/.config ]; then
+if egrep -q ^CONFIG_KVM=(y|m) $kerneldir/.config; then
+   :
+else
+   echo Warning: kernel not configured for KVM
+   echo kvm kernel modules may not build correctly
+fi
+else
+echo Error: kernel .config not found
+echo Please make sure your kernel is configured
+exit 1
+fi
+
 #configure user dir
 (cd user; ./configure --prefix=$prefix --kerneldir=$libkvm_kerneldir
\
   --arch=$arch --processor=$processor \


--

Comment By: Avi Kivity (avik)
Date: 2008-12-24 14:49

Message:
You should enable the host kernel's kvm modules even if you don't plan to
use them, so they will select functionality like mmu notifiers.

--

Comment By: Wolfram Gloger (wg1)
Date: 2008-12-15 19:11

Message:
Sorry, cannot seem to attach a file (I looked hard!), so here the patch
inline (applies to kvm-81, too):

--- kernel/include/linux/kvm_host.h.orig2008-11-12
13:23:58.0 +0100
+++ kernel/include/linux/kvm_host.h 2008-11-15 21:08:02.0
+0100
@@ -46,6 +46,7 @@
  * the COPYING file in the top-level directory.
  */
 
+#include linux/mmu_notifier.h
 #include linux/types.h
 #include linux/hardirq.h
 #include linux/list.h
--- kernel/include/linux/mmu_notifier.h.orig2008-12-15
18:31:52.0 +0100
+++ kernel/include/linux/mmu_notifier.h 2008-11-15 21:19:08.0
+0100
@@ -0,0 +1,6 @@
+#ifndef _LINUX_MMU_NOTIFIER_H
+#define _LINUX_MMU_NOTIFIER_H
+
+struct mmu_notifier {};
+
+#endif


--

Comment By: Wolfram Gloger (wg1)
Date: 2008-12-15 19:00

Message:
I'm seeing this too, with kvm-79 and now kvm-81 on Linux-2.6.27.7 and
Linux-2.6.27.9.
I'm surprised that such a FTBS is not more prevalent..
For now, I have helped myself with the attached patch.


--

Comment By: Stephane Bakhos (nuitari3)
Date: 2008-10-15 16:18

Message:
It looks like I was wrong in saying that AMD IOMMU was the cause. When I
complied again I used make -j5 and it looks like it just skipped the kernel
modules.


--

Comment By: Stephane Bakhos (nuitari3)
Date: 2008-10-15 12:11

Message:
It looks like I was wrong in saying that AMD IOMMU was the cause. When I
complied again I used make -j5 and it looks like it just skipped the kernel
modules.


--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2168011group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/4] KVM: Using kfifo for irq recording

2008-12-25 Thread Avi Kivity

Sheng Yang wrote:

For MSI-X, we have to deal with multiply IRQ with same IRQ handler, so it's
necessary to record the IRQ that trigger the IRQ handler.

  


Does MSI-X disallowing coalescing two requests into one interrupt?  Or 
can we still coalesce interrupts (perhaps by recording them as a (irq, 
cpu) pair?)



@@ -313,6 +314,9 @@ struct kvm_assigned_dev_kernel {
int host_irq;
bool host_irq_disabled;
int guest_irq;
+#define KVM_ASSIGNED_DEV_IRQ_FIFO_LEN  0x100
+   struct kfifo *irq_fifo;
+   spinlock_t irq_fifo_lock;
 #define KVM_ASSIGNED_DEV_GUEST_INTX(1  0)
  


What if it runs out?

What does real hardware do?  I'm sure it doesn't have a 100-entry queue.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/4] KVM: Using kfifo for irq recording

2008-12-25 Thread Sheng Yang
On Thursday 25 December 2008 19:07:22 Avi Kivity wrote:
 Sheng Yang wrote:
  For MSI-X, we have to deal with multiply IRQ with same IRQ handler, so
  it's necessary to record the IRQ that trigger the IRQ handler.

 Does MSI-X disallowing coalescing two requests into one interrupt?  Or
 can we still coalesce interrupts (perhaps by recording them as a (irq,
 cpu) pair?)

Disallow? Not quite understand. PCI spec said OS don't need to ensure the 
sequence they handled is the same as they happened. This struct is used just 
because we lost information of irq after schedule_work...

  @@ -313,6 +314,9 @@ struct kvm_assigned_dev_kernel {
  int host_irq;
  bool host_irq_disabled;
  int guest_irq;
  +#define KVM_ASSIGNED_DEV_IRQ_FIFO_LEN  0x100
  +   struct kfifo *irq_fifo;
  +   spinlock_t irq_fifo_lock;
   #define KVM_ASSIGNED_DEV_GUEST_INTX(1  0)

 What if it runs out?

 What does real hardware do?  I'm sure it doesn't have a 100-entry queue.

0x100 is just a simple number which I thought different interrupts of same 
MSI-X device can happen at same period(indeed it's 0x100/sizeof(int)). Maybe 
not that many. And it just used by work function later to find what guest 
vector is, and then inject the correlated interrupt to the guest.

If hardware device driver also postpone the work, I think it also need 
something like this.

-- 
regards
Yang, Sheng

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/5] KVM: MMU: Add for_each_shadow_entry(), a simpler alternative to walk_shadow()

2008-12-25 Thread Avi Kivity
Using a for_each loop style removes the need to write callback and nasty
casts.

Implement the walk_shadow() using the for_each_shadow_entry().

Signed-off-by: Avi Kivity a...@redhat.com
---
 arch/x86/kvm/mmu.c |   69 +---
 1 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3b86df6..3248a3e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -150,6 +150,20 @@ struct kvm_shadow_walk {
 u64 addr, u64 *spte, int level);
 };
 
+struct kvm_shadow_walk_iterator {
+   u64 addr;
+   hpa_t shadow_addr;
+   int level;
+   u64 *sptep;
+   unsigned index;
+};
+
+#define for_each_shadow_entry(_vcpu, _addr, _walker)\
+   for (shadow_walk_init((_walker), _vcpu, _addr);\
+shadow_walk_okay((_walker));  \
+shadow_walk_next((_walker)))
+
+
 struct kvm_unsync_walk {
int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk);
 };
@@ -1254,33 +1268,48 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct 
kvm_vcpu *vcpu,
return sp;
 }
 
+static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
+struct kvm_vcpu *vcpu, u64 addr)
+{
+   iterator-addr = addr;
+   iterator-shadow_addr = vcpu-arch.mmu.root_hpa;
+   iterator-level = vcpu-arch.mmu.shadow_root_level;
+   if (iterator-level == PT32E_ROOT_LEVEL) {
+   iterator-shadow_addr
+   = vcpu-arch.mmu.pae_root[(addr  30)  3];
+   iterator-shadow_addr = PT64_BASE_ADDR_MASK;
+   --iterator-level;
+   if (!iterator-shadow_addr)
+   iterator-level = 0;
+   }
+}
+
+static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
+{
+   if (iterator-level  PT_PAGE_TABLE_LEVEL)
+   return false;
+   iterator-index = SHADOW_PT_INDEX(iterator-addr, iterator-level);
+   iterator-sptep = ((u64 *)__va(iterator-shadow_addr)) + 
iterator-index;
+   return true;
+}
+
+static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
+{
+   iterator-shadow_addr = *iterator-sptep  PT64_BASE_ADDR_MASK;
+   --iterator-level;
+}
+
 static int walk_shadow(struct kvm_shadow_walk *walker,
   struct kvm_vcpu *vcpu, u64 addr)
 {
-   hpa_t shadow_addr;
-   int level;
+   struct kvm_shadow_walk_iterator iterator;
int r;
-   u64 *sptep;
-   unsigned index;
-
-   shadow_addr = vcpu-arch.mmu.root_hpa;
-   level = vcpu-arch.mmu.shadow_root_level;
-   if (level == PT32E_ROOT_LEVEL) {
-   shadow_addr = vcpu-arch.mmu.pae_root[(addr  30)  3];
-   shadow_addr = PT64_BASE_ADDR_MASK;
-   if (!shadow_addr)
-   return 1;
-   --level;
-   }
 
-   while (level = PT_PAGE_TABLE_LEVEL) {
-   index = SHADOW_PT_INDEX(addr, level);
-   sptep = ((u64 *)__va(shadow_addr)) + index;
-   r = walker-entry(walker, vcpu, addr, sptep, level);
+   for_each_shadow_entry(vcpu, addr, iterator) {
+   r = walker-entry(walker, vcpu, addr,
+ iterator.sptep, iterator.level);
if (r)
return r;
-   shadow_addr = *sptep  PT64_BASE_ADDR_MASK;
-   --level;
}
return 0;
 }
-- 
1.6.0.6

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/5] KVM: MMU: Use for_each_shadow_entry() in __direct_map()

2008-12-25 Thread Avi Kivity
Eliminating a callback and a useless structure.

Signed-off-by: Avi Kivity a...@redhat.com
---
 arch/x86/kvm/mmu.c |   83 ++-
 1 files changed, 29 insertions(+), 54 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3248a3e..b4b79b0 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1841,67 +1841,42 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
 {
 }
 
-struct direct_shadow_walk {
-   struct kvm_shadow_walk walker;
-   pfn_t pfn;
-   int write;
-   int largepage;
-   int pt_write;
-};
-
-static int direct_map_entry(struct kvm_shadow_walk *_walk,
-   struct kvm_vcpu *vcpu,
-   u64 addr, u64 *sptep, int level)
+static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
+   int largepage, gfn_t gfn, pfn_t pfn)
 {
-   struct direct_shadow_walk *walk =
-   container_of(_walk, struct direct_shadow_walk, walker);
+   struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp;
+   int pt_write = 0;
gfn_t pseudo_gfn;
-   gfn_t gfn = addr  PAGE_SHIFT;
-
-   if (level == PT_PAGE_TABLE_LEVEL
-   || (walk-largepage  level == PT_DIRECTORY_LEVEL)) {
-   mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
-0, walk-write, 1, walk-pt_write,
-walk-largepage, 0, gfn, walk-pfn, false);
-   ++vcpu-stat.pf_fixed;
-   return 1;
-   }
 
-   if (*sptep == shadow_trap_nonpresent_pte) {
-   pseudo_gfn = (addr  PT64_DIR_BASE_ADDR_MASK)  PAGE_SHIFT;
-   sp = kvm_mmu_get_page(vcpu, pseudo_gfn, (gva_t)addr, level - 1,
- 1, ACC_ALL, sptep);
-   if (!sp) {
-   pgprintk(nonpaging_map: ENOMEM\n);
-   kvm_release_pfn_clean(walk-pfn);
-   return -ENOMEM;
+   for_each_shadow_entry(vcpu, (u64)gfn  PAGE_SHIFT, iterator) {
+   if (iterator.level == PT_PAGE_TABLE_LEVEL
+   || (largepage  iterator.level == PT_DIRECTORY_LEVEL)) {
+   mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
+0, write, 1, pt_write,
+largepage, 0, gfn, pfn, false);
+   ++vcpu-stat.pf_fixed;
+   break;
}
 
-   set_shadow_pte(sptep,
-  __pa(sp-spt)
-  | PT_PRESENT_MASK | PT_WRITABLE_MASK
-  | shadow_user_mask | shadow_x_mask);
-   }
-   return 0;
-}
+   if (*iterator.sptep == shadow_trap_nonpresent_pte) {
+   pseudo_gfn = (iterator.addr  PT64_DIR_BASE_ADDR_MASK) 
 PAGE_SHIFT;
+   sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
+ iterator.level - 1,
+ 1, ACC_ALL, iterator.sptep);
+   if (!sp) {
+   pgprintk(nonpaging_map: ENOMEM\n);
+   kvm_release_pfn_clean(pfn);
+   return -ENOMEM;
+   }
 
-static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
-   int largepage, gfn_t gfn, pfn_t pfn)
-{
-   int r;
-   struct direct_shadow_walk walker = {
-   .walker = { .entry = direct_map_entry, },
-   .pfn = pfn,
-   .largepage = largepage,
-   .write = write,
-   .pt_write = 0,
-   };
-
-   r = walk_shadow(walker.walker, vcpu, gfn  PAGE_SHIFT);
-   if (r  0)
-   return r;
-   return walker.pt_write;
+   set_shadow_pte(iterator.sptep,
+  __pa(sp-spt)
+  | PT_PRESENT_MASK | PT_WRITABLE_MASK
+  | shadow_user_mask | shadow_x_mask);
+   }
+   }
+   return pt_write;
 }
 
 static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
-- 
1.6.0.6

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/5] KVM: MMU: Replace walk_shadow() by for_each_shadow_entry() in invlpg()

2008-12-25 Thread Avi Kivity
Signed-off-by: Avi Kivity a...@redhat.com
---
 arch/x86/kvm/paging_tmpl.h |   81 +--
 1 files changed, 32 insertions(+), 49 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 69c7e33..46b68f9 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -25,7 +25,6 @@
 #if PTTYPE == 64
#define pt_element_t u64
#define guest_walker guest_walker64
-   #define shadow_walker shadow_walker64
#define FNAME(name) paging##64_##name
#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
@@ -42,7 +41,6 @@
 #elif PTTYPE == 32
#define pt_element_t u32
#define guest_walker guest_walker32
-   #define shadow_walker shadow_walker32
#define FNAME(name) paging##32_##name
#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
@@ -73,18 +71,6 @@ struct guest_walker {
u32 error_code;
 };
 
-struct shadow_walker {
-   struct kvm_shadow_walk walker;
-   struct guest_walker *guest_walker;
-   int user_fault;
-   int write_fault;
-   int largepage;
-   int *ptwrite;
-   pfn_t pfn;
-   u64 *sptep;
-   gpa_t pte_gpa;
-};
-
 static gfn_t gpte_to_gfn(pt_element_t gpte)
 {
return (gpte  PT_BASE_ADDR_MASK)  PAGE_SHIFT;
@@ -453,54 +439,52 @@ out_unlock:
return 0;
 }
 
-static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
- struct kvm_vcpu *vcpu, u64 addr,
- u64 *sptep, int level)
+static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 {
-   struct shadow_walker *sw =
-   container_of(_sw, struct shadow_walker, walker);
+   struct kvm_shadow_walk_iterator iterator;
+   pt_element_t gpte;
+   gpa_t pte_gpa = -1;
+   int level;
+   u64 *sptep;
+
+   spin_lock(vcpu-kvm-mmu_lock);
 
-   /* FIXME: properly handle invlpg on large guest pages */
-   if (level == PT_PAGE_TABLE_LEVEL ||
-   ((level == PT_DIRECTORY_LEVEL)  is_large_pte(*sptep))) {
-   struct kvm_mmu_page *sp = page_header(__pa(sptep));
+   for_each_shadow_entry(vcpu, gva, iterator) {
+   level = iterator.level;
+   sptep = iterator.sptep;
 
-   sw-pte_gpa = (sp-gfn  PAGE_SHIFT);
-   sw-pte_gpa += (sptep - sp-spt) * sizeof(pt_element_t);
+   /* FIXME: properly handle invlpg on large guest pages */
+   if (level == PT_PAGE_TABLE_LEVEL ||
+   ((level == PT_DIRECTORY_LEVEL)  is_large_pte(*sptep))) {
+   struct kvm_mmu_page *sp = page_header(__pa(sptep));
 
-   if (is_shadow_present_pte(*sptep)) {
-   rmap_remove(vcpu-kvm, sptep);
-   if (is_large_pte(*sptep))
-   --vcpu-kvm-stat.lpages;
+   pte_gpa = (sp-gfn  PAGE_SHIFT);
+   pte_gpa += (sptep - sp-spt) * sizeof(pt_element_t);
+
+   if (is_shadow_present_pte(*sptep)) {
+   rmap_remove(vcpu-kvm, sptep);
+   if (is_large_pte(*sptep))
+   --vcpu-kvm-stat.lpages;
+   }
+   set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
+   break;
}
-   set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
-   return 1;
-   }
-   if (!is_shadow_present_pte(*sptep))
-   return 1;
-   return 0;
-}
 
-static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
-{
-   pt_element_t gpte;
-   struct shadow_walker walker = {
-   .walker = { .entry = FNAME(shadow_invlpg_entry), },
-   .pte_gpa = -1,
-   };
+   if (!is_shadow_present_pte(*sptep))
+   break;
+   }
 
-   spin_lock(vcpu-kvm-mmu_lock);
-   walk_shadow(walker.walker, vcpu, gva);
spin_unlock(vcpu-kvm-mmu_lock);
-   if (walker.pte_gpa == -1)
+
+   if (pte_gpa == -1)
return;
-   if (kvm_read_guest_atomic(vcpu-kvm, walker.pte_gpa, gpte,
+   if (kvm_read_guest_atomic(vcpu-kvm, pte_gpa, gpte,
  sizeof(pt_element_t)))
return;
if (is_present_pte(gpte)  (gpte  PT_ACCESSED_MASK)) {
if (mmu_topup_memory_caches(vcpu))
return;
-   kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)gpte,
+   kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)gpte,
  sizeof(pt_element_t), 0);
}
 }
@@ -607,7 +591,6 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct 
kvm_mmu_page *sp)
 
 #undef pt_element_t
 #undef 

[PATCH 0/5] for_each_shadow_entry

2008-12-25 Thread Avi Kivity
This patchset replaces walk_shadow(), which calls a callback for each
shadow pte that maps a guest virtal address, by an equivalent for_each style
construct.  Benefits are less thunks and smaller code.

Please review.

Avi Kivity (5):
  KVM: MMU: Add for_each_shadow_entry(), a simpler alternative to
walk_shadow()
  KVM: MMU: Use for_each_shadow_entry() in __direct_map()
  KVM: MMU: Replace walk_shadow() by for_each_shadow_entry() in fetch()
  KVM: MMU: Replace walk_shadow() by for_each_shadow_entry() in
invlpg()
  KVM: MMU: Drop walk_shadow()

 arch/x86/kvm/mmu.c |  150 ++-
 arch/x86/kvm/paging_tmpl.h |  209 +++-
 2 files changed, 157 insertions(+), 202 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/5] KVM: MMU: Replace walk_shadow() by for_each_shadow_entry() in fetch()

2008-12-25 Thread Avi Kivity
Effectively reverting to the pre walk_shadow() version -- but now
with the reusable for_each().

Signed-off-by: Avi Kivity a...@redhat.com
---
 arch/x86/kvm/paging_tmpl.h |  128 
 1 files changed, 58 insertions(+), 70 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 9fd78b6..69c7e33 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -283,91 +283,79 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, 
struct kvm_mmu_page *page,
 /*
  * Fetch a shadow pte for a specific level in the paging hierarchy.
  */
-static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
-   struct kvm_vcpu *vcpu, u64 addr,
-   u64 *sptep, int level)
+static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
+struct guest_walker *gw,
+int user_fault, int write_fault, int largepage,
+int *ptwrite, pfn_t pfn)
 {
-   struct shadow_walker *sw =
-   container_of(_sw, struct shadow_walker, walker);
-   struct guest_walker *gw = sw-guest_walker;
unsigned access = gw-pt_access;
struct kvm_mmu_page *shadow_page;
-   u64 spte;
+   u64 spte, *sptep;
int metaphysical;
gfn_t table_gfn;
int r;
+   int level;
pt_element_t curr_pte;
+   struct kvm_shadow_walk_iterator iterator;
 
-   if (level == PT_PAGE_TABLE_LEVEL
-   || (sw-largepage  level == PT_DIRECTORY_LEVEL)) {
-   mmu_set_spte(vcpu, sptep, access, gw-pte_access  access,
-sw-user_fault, sw-write_fault,
-gw-ptes[gw-level-1]  PT_DIRTY_MASK,
-sw-ptwrite, sw-largepage,
-gw-ptes[gw-level-1]  PT_GLOBAL_MASK,
-gw-gfn, sw-pfn, false);
-   sw-sptep = sptep;
-   return 1;
-   }
-
-   if (is_shadow_present_pte(*sptep)  !is_large_pte(*sptep))
-   return 0;
-
-   if (is_large_pte(*sptep)) {
-   set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
-   kvm_flush_remote_tlbs(vcpu-kvm);
-   rmap_remove(vcpu-kvm, sptep);
-   }
+   if (!is_present_pte(gw-ptes[gw-level - 1]))
+   return NULL;
 
-   if (level == PT_DIRECTORY_LEVEL  gw-level == PT_DIRECTORY_LEVEL) {
-   metaphysical = 1;
-   if (!is_dirty_pte(gw-ptes[level - 1]))
-   access = ~ACC_WRITE_MASK;
-   table_gfn = gpte_to_gfn(gw-ptes[level - 1]);
-   } else {
-   metaphysical = 0;
-   table_gfn = gw-table_gfn[level - 2];
-   }
-   shadow_page = kvm_mmu_get_page(vcpu, table_gfn, (gva_t)addr, level-1,
-  metaphysical, access, sptep);
-   if (!metaphysical) {
-   r = kvm_read_guest_atomic(vcpu-kvm, gw-pte_gpa[level - 2],
- curr_pte, sizeof(curr_pte));
-   if (r || curr_pte != gw-ptes[level - 2]) {
-   kvm_mmu_put_page(shadow_page, sptep);
-   kvm_release_pfn_clean(sw-pfn);
-   sw-sptep = NULL;
-   return 1;
+   for_each_shadow_entry(vcpu, addr, iterator) {
+   level = iterator.level;
+   sptep = iterator.sptep;
+   if (level == PT_PAGE_TABLE_LEVEL
+   || (largepage  level == PT_DIRECTORY_LEVEL)) {
+   mmu_set_spte(vcpu, sptep, access,
+gw-pte_access  access,
+user_fault, write_fault,
+gw-ptes[gw-level-1]  PT_DIRTY_MASK,
+ptwrite, largepage,
+gw-ptes[gw-level-1]  PT_GLOBAL_MASK,
+gw-gfn, pfn, false);
+   break;
}
-   }
 
-   spte = __pa(shadow_page-spt) | PT_PRESENT_MASK | PT_ACCESSED_MASK
-   | PT_WRITABLE_MASK | PT_USER_MASK;
-   *sptep = spte;
-   return 0;
-}
+   if (is_shadow_present_pte(*sptep)  !is_large_pte(*sptep))
+   continue;
 
-static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
-struct guest_walker *guest_walker,
-int user_fault, int write_fault, int largepage,
-int *ptwrite, pfn_t pfn)
-{
-   struct shadow_walker walker = {
-   .walker = { .entry = FNAME(shadow_walk_entry), },
-   .guest_walker = guest_walker,
-   .user_fault = user_fault,
-   .write_fault = write_fault,
-   .largepage = largepage,
-   .ptwrite = ptwrite,
- 

[PATCH 5/5] KVM: MMU: Drop walk_shadow()

2008-12-25 Thread Avi Kivity
No longer used.

Signed-off-by: Avi Kivity a...@redhat.com
---
 arch/x86/kvm/mmu.c |   20 
 1 files changed, 0 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b4b79b0..31ebe69 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -145,11 +145,6 @@ struct kvm_rmap_desc {
struct kvm_rmap_desc *more;
 };
 
-struct kvm_shadow_walk {
-   int (*entry)(struct kvm_shadow_walk *walk, struct kvm_vcpu *vcpu,
-u64 addr, u64 *spte, int level);
-};
-
 struct kvm_shadow_walk_iterator {
u64 addr;
hpa_t shadow_addr;
@@ -1299,21 +1294,6 @@ static void shadow_walk_next(struct 
kvm_shadow_walk_iterator *iterator)
--iterator-level;
 }
 
-static int walk_shadow(struct kvm_shadow_walk *walker,
-  struct kvm_vcpu *vcpu, u64 addr)
-{
-   struct kvm_shadow_walk_iterator iterator;
-   int r;
-
-   for_each_shadow_entry(vcpu, addr, iterator) {
-   r = walker-entry(walker, vcpu, addr,
- iterator.sptep, iterator.level);
-   if (r)
-   return r;
-   }
-   return 0;
-}
-
 static void kvm_mmu_page_unlink_children(struct kvm *kvm,
 struct kvm_mmu_page *sp)
 {
-- 
1.6.0.6

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/4] KVM: Using kfifo for irq recording

2008-12-25 Thread Avi Kivity

Sheng Yang wrote:

On Thursday 25 December 2008 19:07:22 Avi Kivity wrote:
  

Sheng Yang wrote:


For MSI-X, we have to deal with multiply IRQ with same IRQ handler, so
it's necessary to record the IRQ that trigger the IRQ handler.
  

Does MSI-X disallowing coalescing two requests into one interrupt?  Or
can we still coalesce interrupts (perhaps by recording them as a (irq,
cpu) pair?)



Disallow? Not quite understand. PCI spec said OS don't need to ensure the 
sequence they handled is the same as they happened. This struct is used just 
because we lost information of irq after schedule_work...


  


Why can't we store this information in a bitmap?  There are a limited 
number of irqs.


The only reason I can think of for using a fifo is if we want to 
preserve the number and ordering of interrupts.  Is there another reason?



@@ -313,6 +314,9 @@ struct kvm_assigned_dev_kernel {
int host_irq;
bool host_irq_disabled;
int guest_irq;
+#define KVM_ASSIGNED_DEV_IRQ_FIFO_LEN  0x100
+   struct kfifo *irq_fifo;
+   spinlock_t irq_fifo_lock;
 #define KVM_ASSIGNED_DEV_GUEST_INTX(1  0)
  

What if it runs out?

What does real hardware do?  I'm sure it doesn't have a 100-entry queue.



0x100 is just a simple number which I thought different interrupts of same 
MSI-X device can happen at same period(indeed it's 0x100/sizeof(int)). Maybe 
not that many. And it just used by work function later to find what guest 
vector is, and then inject the correlated interrupt to the guest.
  


Maybe it's better to do the conversion immediately, so we can store the 
information in a structure that's not prone to overflow.



--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Nested KVM

2008-12-25 Thread Alexander Graf





On 25.12.2008, at 10:59, Avi Kivity a...@redhat.com wrote:


Alexander Graf wrote:


Avi, could you please apply that patch for kvm-82 too, so we get  
something working out? I'll take a closer look at what's broken  
exactly later on.


I'll just revert the emulation loop patch.  We can reapply it once  
we fix the problem.


Sounds good. It was rather meant as a draft/rfc anyways :-).

Alex




--
error compiling committee.c: too many arguments to function


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [ANNOUNCE] kvm-82 release

2008-12-25 Thread Mark Bidewell
When building KVM-82 on F10 I get the following errors:

make[2]: Entering directory `/usr/src/kernels/2.6.27.9-159.fc10.x86_64'
  LD  /opt/kvm-82/kernel/x86/built-in.o
  CC [M]  /opt/kvm-82/kernel/x86/svm.o
In file included from /opt/kvm-82/kernel/x86/external-module-compat.h:10,
 from command-line:2:
/opt/kvm-82/kernel/x86/../external-module-compat-comm.h:587: error:
conflicting types for 'hrtimer_add_expires_ns'
include/linux/hrtimer.h:245: error: previous definition of
'hrtimer_add_expires_ns' was here
/opt/kvm-82/kernel/x86/../external-module-compat-comm.h:592: error:
conflicting types for 'hrtimer_get_expires'
include/linux/hrtimer.h:250: error: previous definition of
'hrtimer_get_expires' was here
/opt/kvm-82/kernel/x86/../external-module-compat-comm.h:597: error:
conflicting types for 'hrtimer_get_expires_ns'
include/linux/hrtimer.h:260: error: previous definition of
'hrtimer_get_expires_ns' was here
/opt/kvm-82/kernel/x86/../external-module-compat-comm.h:602: error:
conflicting types for 'hrtimer_start_expires'
include/linux/hrtimer.h:341: error: previous definition of
'hrtimer_start_expires' was here
make[4]: *** [/opt/kvm-82/kernel/x86/svm.o] Error 1
make[3]: *** [/opt/kvm-82/kernel/x86] Error 2
make[2]: *** [_module_/opt/kvm-82/kernel] Error 2
make[2]: Leaving directory `/usr/src/kernels/2.6.27.9-159.fc10.x86_64'
make[1]: *** [all] Error 2
make[1]: Leaving directory `/opt/kvm-82/kernel'
make: *** [kernel] Error 2

Has anyone else seen this?

Mark Bidewell

On Thu, Dec 25, 2008 at 5:11 AM, Avi Kivity a...@redhat.com wrote:
 This release adds support for nested virtualization, a feature which allows
 you to run kvm (and possibly other hypervisors) inside a guest. This is an
 experimental feature and is only available on AMD hosts.

 There are fixes included for a couple of minor vulnerabilities: one for the
 slirp stack (-net user), which is not usually used in production, and
 another in the vnc server, which allows malicious users to cause a VM to
 hang.

 Changes from kvm-81:
 - merge qemu-svn
  - uuid support
  - fix CVE-2007-5729 (slirp vulnerability)
  - fix CVE-2008-2382 (vnc denial of service)
  - better scsi support
  - pci subsystem id for pci devices
- this will cause Windows guest to rediscover hardware
  - improved I/O parallelism
  - ppc kvm support
  - hpet support
- not fully integrated yet
  - monitor 'info status' command
 - merge bochs-bios-cvs
  - uuid support
  - prepare for S3 sleep
 - merge vgabios-cvs
 - much improved guest debugging (Jan Kiszka)
  - both debugger in guest and debugger in host
 - fix kvm makefile for separate object dir (Andi Kleen)
 - nested svm (Alexander Graf)
  - run kvm in kvm in kvm...
 - fix ia64 register and stack access from userspace (Jes Sorensen)
 - don't treat a global pte as global if cr4.pge is clear
  - fixes Vista x86 smp failure on boot
 - properly lock virtual i8259 interrupt controller
 - fix large host pages invlpg/resync
  - fixes oops when using host hugetlbfs
 - fix vmload instruction misemulated as lidt


 Notes:
If you use the modules bundled with kvm-82, you can use any version
 of Linux from 2.6.16 upwards.  You may also use kvm-81 userspace with
 the kvm modules provided by Linux 2.6.25 or above.  Some features may
 only be available in newer releases.

 http://kvm.qumranet.com

 --
 I have a truly marvellous patch that fixes the bug which this
 signature is too narrow to contain.

 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


kvm vmload/vmsave vs tss.ist

2008-12-25 Thread Avi Kivity
kvm performance is largely dependent on the frequency and cost of 
switches between guest and host mode.  The cost of a switch is greatly 
influenced by the amount of state we have to load and save.


One of the optimizations that kvm makes in order to reduce the cost is 
to partition the guest state into two; let's call the two parts kernel 
state and user state.  The kernel state consists of registers that are 
used for general kernel execution, for example the general purpose 
registers.  User state consists of registers that are only used in user 
mode (or in the transition to user mode).  When switching from guest to 
host, we only save and reload the kernel state, delaying reloading of 
user state until we actually need to switch to user mode.  Since many 
exits are satisfied entirely in the kernel, we can avoid switching user 
state entirely.  In effect the host kernel runs with some of the cpu 
registers containing guest values.  The mechanism used for deferring 
state switch is PREEMPT_NOTIFIERS, introduced in 2.6.23 IIRC.


Now, AMD SVM instructions also partition register state into two.  The 
VMRUN instruction, which is used to switch to guest mode, loads and 
saves registers corresponding to kernel state.  The VMLOAD and VMSAVE 
instructions load and save user state registers.


The exact registers managed by VMLOAD and VMSAVE are:

 FS GS TR LDTR
 KernelGSBase
 STAR LSTAR CSTAR SFMASK
 SYSENTER_CS SYSENTER_ESP SYSENTER_EIP

None of these registers are ever touched in 64-bit kernel mode, except 
gs.base (which we can save/restore manually), and TR.  The only part of 
the TSS (pointed to by the TR) used in 64-bit mode are the seven 
Interrupt Stack Table (IST) entries.  These are used to provide 
known-good stacks for critical exceptions.


These critical exceptions are: debug, nmi, double fault, stack fault, 
and machine check.


Because of this one detail, kvm must execute vmload/vmsave on every 
guest/host switch. Hardware architects, give yourself a pat on the back.


The impact is even greater when using nested virtualization, since we 
must trap on two additional instructions on every switch.


I would like to remove this limitation.  I see several ways to go about it:

1. Drop the use of IST

This would reduce the (perceived) reliability of the kernel and would 
probably not be welcomed.


2. Introduce a config item for dropping IST, and have kvm defer 
vmload/vmsave depending on the configuration


This would pose a dilemma for kitchen sink distro kernels: kvm 
performance or maximum reliability?


3. Switch off IST when the first VM is created, switch it back on when 
the last VM is destroyed


Most likely no additional code would need to be modified.  It could be 
made conditional if someone wants to retain IST even while kvm is 
active.  We already have hooks in place and know where the host IST is.  
I favor this option. 


4. Some other brilliant idea?

Might be even better than option 3.

hpa/Ingo, any opinions?


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm vmload/vmsave vs tss.ist

2008-12-25 Thread Ingo Molnar

* Avi Kivity a...@redhat.com wrote:

 I would like to remove this limitation.  I see several ways to go about 
 it:

 1. Drop the use of IST

 This would reduce the (perceived) reliability of the kernel and would 
 probably not be welcomed.

 hpa/Ingo, any opinions?

i think we should actually do #1 unconditionally.

ISTs are bad for the native kernel too. They have various nasty 
complications in the stack walker (and hence they _reduce_ reliability in 
practice), and they are non-preemptible as well. Plus we have the 
maximum-stack-footprint ftrace plugin now, which can remove any perception 
about how bad the worst-case stack footprint is in practice.

If it ever becomes an issue we could also soft-switch to a larger (per 
CPU) exception stack from the exception handlers themselves. The 
architectural stack footprint of the various critical exceptions are 
calculatable and low - so we could switch away and get almost the kind of 
separation that ISTs give. There's no deep reason to actually make use of 
hw switched ISTs.

So feel free to send a patch that just standardizes the critical 
exceptions to use the regular kernel stack. (I havent actually tried this 
but it should be relatively simple to implement. Roadblocks are possible.)

Ingo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2466584 ] Guest/Host serial ports no longer working

2008-12-25 Thread SourceForge.net
Bugs item #2466584, was opened at 2008-12-25 10:26
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2466584group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: kernel
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: AndrewB (andybaumhauer)
Assigned to: Nobody/Anonymous (nobody)
Summary: Guest/Host serial ports no longer working

Initial Comment:
Host:
  Linux, Fedora 10 2.6.27.9-159.fc10.x86_64 on Intel Core 2 Duo (Q6600) running 
KVM (kvm-74-10.fc10.x86_64)

Guest:
  Windows XP Home, using qemu-kvm -M pc -m 768 -smp 2 -boot c -hda 
/home/vm/Windows_XP_Home.img -net nic,macaddr=00:16:3e:10:23:ee,vlan=0 -net 
user,vlan=0,script=,ifname=virbr0 -std-vga -soundhw es1370 -localtime -serial 
/dev/ttyS0

Error messages when quitting Guest OS:
kvm_run: Unknown error 524
kvm_run returned -524

Summary:
On Fedora 8 and Fedora 9 using KVM-65 and earlier, connections from Windows 
COM1 to /dev/ttyS0 worked.  On Fedora 10 and KVM-74 serial connection between 
guest and host no longer works.

How to reproduce:
On host OS, use GTKTerm to access the serial port /dev/ttyS0 with a loopback 
adapter attached to the port.  You will see characters echo'ed back.

On Guest OS, use option -serial stdio and Hyperterm to access COM1, and you 
will see characters from Hyperterm on the Host OS terminal that started 
qemu-kvm (so we know that the Guest OS can send data out of KVM).

On Fedora 10, if you regress to KVM-65 (from Fedora 9 repository), by:

rpm -e kvm-74-10.fc10.x86_64
rpm -ihv --force gnutls-2.0.4-2.fc9.x86_64.rpm
rpm -ihv kvm-65-1.fc9.x86_64.rpm

Now option -serial /dev/ttyS0 will operate as expected, and Hyperterm will see 
characters echo'ed from the loopback hardware.




--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2466584group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm vmload/vmsave vs tss.ist

2008-12-25 Thread Avi Kivity

Ingo Molnar wrote:

i think we should actually do #1 unconditionally.

ISTs are bad for the native kernel too. They have various nasty 
complications in the stack walker (and hence they _reduce_ reliability in 
practice), and they are non-preemptible as well. Plus we have the 
maximum-stack-footprint ftrace plugin now, which can remove any perception 
about how bad the worst-case stack footprint is in practice.


If it ever becomes an issue we could also soft-switch to a larger (per 
CPU) exception stack from the exception handlers themselves. The 
architectural stack footprint of the various critical exceptions are 
calculatable and low - so we could switch away and get almost the kind of 
separation that ISTs give. There's no deep reason to actually make use of 
hw switched ISTs.


So feel free to send a patch that just standardizes the critical 
exceptions to use the regular kernel stack. (I havent actually tried this 
but it should be relatively simple to implement. Roadblocks are possible.)
  


Certainly.  There is provision for a debug stack that can be larger than 
the normal exception stack.  This is used for vectors 1 and 3.  If we 
wish to preserve this, we need to to manual stack switching.


Currently DEBUG_STKSZ is 8K, the same as the normal stack (compared to 
4K for the other execption stacks).  Do we need to implement stack 
switching for debug vectors?


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] x86: drop the use of the tss interrupt stack table (IST)

2008-12-25 Thread Avi Kivity
The IST is the only thing that requires a valid TSS while running in
kernel mode.  Dropping its use unlocks an optimization opportunity for
kvm: if we don't need a valid TSS while in kernel mode we can defer the
use of the VMLOAD/VMSAVE instructions until the next context switch,
reducing the executions of these costly instructions by a nice factor.

Kernel reliability should also be improved since interrupt paths are
simplified.

Signed-off-by: Avi Kivity a...@redhat.com
---
 arch/x86/include/asm/desc.h  |   12 -
 arch/x86/include/asm/page_64.h   |7 ---
 arch/x86/include/asm/processor.h |   11 
 arch/x86/kernel/cpu/common.c |   34 -
 arch/x86/kernel/dumpstack_64.c   |   96 --
 arch/x86/kernel/entry_64.S   |   17 ++-
 arch/x86/kernel/traps.c  |   12 ++--
 7 files changed, 10 insertions(+), 179 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index e6b82b1..0465c75 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -369,18 +369,6 @@ static inline void set_task_gate(unsigned int n, unsigned 
int gdt_entry)
_set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry3));
 }
 
-static inline void set_intr_gate_ist(int n, void *addr, unsigned ist)
-{
-   BUG_ON((unsigned)n  0xFF);
-   _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS);
-}
-
-static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
-{
-   BUG_ON((unsigned)n  0xFF);
-   _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
-}
-
 #else
 /*
  * GET_DESC_BASE reads the descriptor base of the specified segment.
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 5ebca29..7c89095 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -16,13 +16,6 @@
 #define IRQSTACK_ORDER 2
 #define IRQSTACKSIZE (PAGE_SIZE  IRQSTACK_ORDER)
 
-#define STACKFAULT_STACK 1
-#define DOUBLEFAULT_STACK 2
-#define NMI_STACK 3
-#define DEBUG_STACK 4
-#define MCE_STACK 5
-#define N_EXCEPTION_STACKS 5  /* hw limit: 7 */
-
 #define PUD_PAGE_SIZE  (_AC(1, UL)  PUD_SHIFT)
 #define PUD_PAGE_MASK  (~(PUD_PAGE_SIZE-1))
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 5ca01e3..4ef899c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -273,13 +273,6 @@ struct tss_struct {
 
 DECLARE_PER_CPU(struct tss_struct, init_tss);
 
-/*
- * Save the original ist values for checking stack pointers during debugging
- */
-struct orig_ist {
-   unsigned long   ist[7];
-};
-
 #defineMXCSR_DEFAULT   0x1f80
 
 struct i387_fsave_struct {
@@ -372,10 +365,6 @@ union thread_xstate {
struct xsave_struct xsave;
 };
 
-#ifdef CONFIG_X86_64
-DECLARE_PER_CPU(struct orig_ist, orig_ist);
-#endif
-
 extern void print_cpu_info(struct cpuinfo_x86 *);
 extern unsigned int xstate_size;
 extern void free_thread_xstate(struct task_struct *);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b9c9ea0..8563c51 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -903,9 +903,6 @@ void __cpuinit pda_init(int cpu)
}
 }
 
-char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
-  DEBUG_STKSZ] __page_aligned_bss;
-
 extern asmlinkage void ignore_sysret(void);
 
 /* May not be marked __init: used by software suspend */
@@ -931,12 +928,6 @@ void syscall_init(void)
 
 unsigned long kernel_eflags;
 
-/*
- * Copies of the original ist values from the tss are only accessed during
- * debugging, no special alignment required.
- */
-DEFINE_PER_CPU(struct orig_ist, orig_ist);
-
 #else
 
 /* Make sure %fs is initialized properly in idle threads */
@@ -960,17 +951,13 @@ void __cpuinit cpu_init(void)
 {
int cpu = stack_smp_processor_id();
struct tss_struct *t = per_cpu(init_tss, cpu);
-   struct orig_ist *orig_ist = per_cpu(orig_ist, cpu);
unsigned long v;
-   char *estacks = NULL;
struct task_struct *me;
int i;
 
/* CPU 0 is initialised in head64.c */
if (cpu != 0)
pda_init(cpu);
-   else
-   estacks = boot_exception_stacks;
 
me = current;
 
@@ -1000,27 +987,6 @@ void __cpuinit cpu_init(void)
if (cpu != 0  x2apic)
enable_x2apic();
 
-   /*
-* set up and load the per-CPU TSS
-*/
-   if (!orig_ist-ist[0]) {
-   static const unsigned int order[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
-   };
-   for (v = 0; v  N_EXCEPTION_STACKS; v++) {
-   if (cpu) {
-   estacks = (char *)__get_free_pages(GFP_ATOMIC, 

Re: kvm vmload/vmsave vs tss.ist

2008-12-25 Thread Ingo Molnar

* Avi Kivity a...@redhat.com wrote:

 Ingo Molnar wrote:
 i think we should actually do #1 unconditionally.

 ISTs are bad for the native kernel too. They have various nasty  
 complications in the stack walker (and hence they _reduce_ reliability 
 in practice), and they are non-preemptible as well. Plus we have the  
 maximum-stack-footprint ftrace plugin now, which can remove any 
 perception about how bad the worst-case stack footprint is in practice.

 If it ever becomes an issue we could also soft-switch to a larger (per  
 CPU) exception stack from the exception handlers themselves. The  
 architectural stack footprint of the various critical exceptions are  
 calculatable and low - so we could switch away and get almost the kind 
 of separation that ISTs give. There's no deep reason to actually make 
 use of hw switched ISTs.

 So feel free to send a patch that just standardizes the critical  
 exceptions to use the regular kernel stack. (I havent actually tried 
 this but it should be relatively simple to implement. Roadblocks are 
 possible.)
   

 Certainly.  There is provision for a debug stack that can be larger than 
 the normal exception stack.  This is used for vectors 1 and 3.  If we 
 wish to preserve this, we need to to manual stack switching.

 Currently DEBUG_STKSZ is 8K, the same as the normal stack (compared to 
 4K for the other execption stacks).  Do we need to implement stack 
 switching for debug vectors?

i'd suggest to reuse the irq-stacks for this. Right now on 64-bit we've 
got the following stack layout: 8K process stacks, a 16K IRQ stack on each 
CPU, shared by all IRQs. Then we have the IST stacks with weird sizes: 
debug:8K, the others: 4K.

Then all the unnecessary IST complications can be removed. If nesting ever 
becomes an issue, the IRQ stack size can be doubled to 32K.

This way we save some small amount of RAM too (right now the IST stacks 
take up 28K of RAM per CPU), and reduce complexity and fragility quite 
visibly. And help KVM ;-)

Ingo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2001121 ] Windows 2003 x64 - SESSION5_INITIALIZATION_FAILED

2008-12-25 Thread SourceForge.net
Bugs item #2001121, was opened at 2008-06-23 21:09
Message generated for change (Comment added) made by masc82
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2001121group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: intel
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Andreas 'ac0v' Specht (ac0v)
Assigned to: Nobody/Anonymous (nobody)
Summary: Windows 2003 x64 - SESSION5_INITIALIZATION_FAILED

Initial Comment:
Host Machine:
CPU:2x Intel(R) Xeon(R) CPU E5405  @ 2.00GHz
Kernel: Linux version 2.6.25-gentoo-r4
Arch:   x86_64
KVM:tried kvm-69 and kvm-70

Guest System:
tried Windows 2003 x64 and Windows 2003 x64 with slipstreamed Service Pack 2

Hi,

I get a BSoD (see attachment) while installing Windows 2003 x64 which contains 
the error message SESSION5_INITIALIZATION_FAILED

Serial log is empty.

I start my KVM via this command:

kvm -hda /dev/lvg1/sap-test -boot d -cdrom 
/srv/install/iso/windows/2003-server-x64.iso -vnc :4 -m 3048 -smp 4 -daemonize

Using -no-kvm or the -no-kvm-pit switch doesn't help and shows only the message 
Setup is starting Windows.

The -no-kvm-irqchip switch has no effect (same BSoD).

Any Ideas?

Regards,
Andreas 'ac0v' Specht

--

Comment By: MaSc82 (masc82)
Date: 2008-12-25 17:35

Message:
Updated to 2.6.28 including kvm modules, which seem to work very well with
kvm81, at the same time supporting win2003 x64, so all mentioned issues are
resolved for me, but only when using the kvm modules of linux kernel
2.6.28.

--

Comment By: MaSc82 (masc82)
Date: 2008-12-22 16:58

Message:
I've got the same issue with kvm-81 and Linux version 2.6.27-gentoo-r7.

The problem does not occur when using the kvm modules coming with the
kernel, but these (probably older?) modules still have bugs with smp and
block device virtio (temporary freeze)..

Can anyone shed some light on this, please?

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2001121group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm vmload/vmsave vs tss.ist

2008-12-25 Thread Ingo Molnar

* Ingo Molnar mi...@elte.hu wrote:

 i'd suggest to reuse the irq-stacks for this. Right now on 64-bit we've 
 got the following stack layout: 8K process stacks, a 16K IRQ stack on 
 each CPU, shared by all IRQs. Then we have the IST stacks with weird 
 sizes: debug:8K, the others: 4K.

this has to be done carefully though, as there's a subtle detail here: 
right now the pda_irqcount and the pda_irqstackptr logic in entry_64.S is 
not re-entry safe and relies on IRQs being off.

If critical exceptions are moved to the IRQ stack then %rsp switching to 
the IRQ stack has to be done atomically: instead of using the pda_irqcount 
check the %rsp value itself should be checked against pda_irqstackptr - if 
it's within that 16K range then we are already on the IRQ stack and do not 
need to switch to it but can just use the current %rsp.

Ingo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm vmload/vmsave vs tss.ist

2008-12-25 Thread Avi Kivity

Ingo Molnar wrote:

* Ingo Molnar mi...@elte.hu wrote:

  
i'd suggest to reuse the irq-stacks for this. Right now on 64-bit we've 
got the following stack layout: 8K process stacks, a 16K IRQ stack on 
each CPU, shared by all IRQs. Then we have the IST stacks with weird 
sizes: debug:8K, the others: 4K.



this has to be done carefully though, as there's a subtle detail here: 
right now the pda_irqcount and the pda_irqstackptr logic in entry_64.S is 
not re-entry safe and relies on IRQs being off.


If critical exceptions are moved to the IRQ stack then %rsp switching to 
the IRQ stack has to be done atomically: instead of using the pda_irqcount 
check the %rsp value itself should be checked against pda_irqstackptr - if 
it's within that 16K range then we are already on the IRQ stack and do not 
need to switch to it but can just use the current %rsp.
  


I think it's enough to switch %rsp before incrementing irqcount, no?

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm vmload/vmsave vs tss.ist

2008-12-25 Thread Ingo Molnar

* Avi Kivity a...@redhat.com wrote:

 Ingo Molnar wrote:
 * Ingo Molnar mi...@elte.hu wrote:

   
 i'd suggest to reuse the irq-stacks for this. Right now on 64-bit 
 we've got the following stack layout: 8K process stacks, a 16K IRQ 
 stack on each CPU, shared by all IRQs. Then we have the IST stacks 
 with weird sizes: debug:8K, the others: 4K.
 

 this has to be done carefully though, as there's a subtle detail here:  
 right now the pda_irqcount and the pda_irqstackptr logic in entry_64.S 
 is not re-entry safe and relies on IRQs being off.

 If critical exceptions are moved to the IRQ stack then %rsp switching 
 to the IRQ stack has to be done atomically: instead of using the 
 pda_irqcount check the %rsp value itself should be checked against 
 pda_irqstackptr - if it's within that 16K range then we are already on 
 the IRQ stack and do not need to switch to it but can just use the 
 current %rsp.
   

 I think it's enough to switch %rsp before incrementing irqcount, no?

no - that would introduce a small race: if an exception (say an NMI or 
MCE, or a debug trap) happens in that small window then the exception 
context thinks that it's on the IRQ stack already, and would use the task 
stack.

So if we want to move them to IRQ stacks all the time, we have to check 
that condition atomically - the safest way of which is to check RSP 
against the (static) pda:[irqstackptr-16K+64..irqstackptr] range.

Ingo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm vmload/vmsave vs tss.ist

2008-12-25 Thread Avi Kivity

Ingo Molnar wrote:

I think it's enough to switch %rsp before incrementing irqcount, no?



no - that would introduce a small race: if an exception (say an NMI or 
MCE, or a debug trap) happens in that small window then the exception 
context thinks that it's on the IRQ stack already, and would use the task 
stack.


  


I'm suggesting

   check irqcount
   if (wasnt_in_irq)
   rsp = irqstack
   ++irqcount

If the NMI happens before the increment, we'll switch the stack 
unconditionally, and if the NMI happens after the increment, then we 
won't switch the stack, but we're guaranteed to be on the irqstack 
anyway.  The window size is negative :)


Similarly, the exit path should be

   oldstack_reg = oldstack;
   --irqcount;
   rsp = oldstack_register;

To guarantee that by the time we decrement irqcount, we don't need the 
stack anymore.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [ANNOUNCE] kvm-82 release

2008-12-25 Thread Andreas Winkelbauer
Mark Bidewell mark.bidewell at alumni.clemson.edu writes:

 
 When building KVM-82 on F10 I get the following errors:
 
 make[2]: Entering directory `/usr/src/kernels/2.6.27.9-159.fc10.x86_64'
   LD  /opt/kvm-82/kernel/x86/built-in.o
   CC [M]  /opt/kvm-82/kernel/x86/svm.o
 In file included from /opt/kvm-82/kernel/x86/external-module-compat.h:10,
  from command-line:2:
 /opt/kvm-82/kernel/x86/../external-module-compat-comm.h:587: error:
 conflicting types for 'hrtimer_add_expires_ns'
 include/linux/hrtimer.h:245: error: previous definition of
 'hrtimer_add_expires_ns' was here
 ...
 make: *** [kernel] Error 2
 
 Has anyone else seen this?

the same problem exists with the latest stock kernel on fedora 9.

you may comment out the conflicting definitions in
kvm-82/kernel/external-module-compat-comm.h to fix the build problem.

bye,
Andreas Winkelbauer

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm vmload/vmsave vs tss.ist

2008-12-25 Thread Ingo Molnar

* Avi Kivity a...@redhat.com wrote:

 Ingo Molnar wrote:
 I think it's enough to switch %rsp before incrementing irqcount, no?
 

 no - that would introduce a small race: if an exception (say an NMI or  
 MCE, or a debug trap) happens in that small window then the exception  
 context thinks that it's on the IRQ stack already, and would use the 
 task stack.

   

 I'm suggesting

check irqcount
if (wasnt_in_irq)
rsp = irqstack
++irqcount

 If the NMI happens before the increment, we'll switch the stack 
 unconditionally, and if the NMI happens after the increment, then we 
 won't switch the stack, but we're guaranteed to be on the irqstack 
 anyway.  The window size is negative :)

 Similarly, the exit path should be

oldstack_reg = oldstack;
--irqcount;
rsp = oldstack_register;

 To guarantee that by the time we decrement irqcount, we don't need the  
 stack anymore.

agreed, something like this would work too. My suggestion, to eliminate 
irqcount altogether and just check RSP against the known-irqstack-range, 
could result in slightly smaller (and thus faster) code, but it's a 
marginal difference at best.

Ingo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm vmload/vmsave vs tss.ist

2008-12-25 Thread Avi Kivity

Avi Kivity wrote:


I'm suggesting

   check irqcount
   if (wasnt_in_irq)
   rsp = irqstack
   ++irqcount

If the NMI happens before the increment, we'll switch the stack 
unconditionally, and if the NMI happens after the increment, then we 
won't switch the stack, but we're guaranteed to be on the irqstack 
anyway.  The window size is negative :)


Similarly, the exit path should be

   oldstack_reg = oldstack;
   --irqcount;
   rsp = oldstack_register;

To guarantee that by the time we decrement irqcount, we don't need the 
stack anymore.




On the other hand, checking %rsp allows us to drop irqcount completely, 
so maybe it's better.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Randomly freezing guests - Workaround?

2008-12-25 Thread Mario Enrico Ragucci
Hi all,

3 weeks ago I asked for help in #kvm. 
I have two AMD64 machines running up to 10 guests (each) via
kvm/libvirt. 

Some of my guest machines (most of them Debian etch/lenny) randomly
froze. Sometimes this happened after several days, sometimes shortly
after the guest had been started.

Someone at #kvm told me to switch the current clocksource (which had
been kvm-clock) to acpi_pm.

My guests did not freeze since then!

Thanks for your support and this excellent piece of software!

Regards, Mario

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [ANNOUNCE] kvm-82 release

2008-12-25 Thread Farkas Levente
Avi Kivity wrote:
 This release adds support for nested virtualization, a feature which
 allows you to run kvm (and possibly other hypervisors) inside a guest.
 This is an experimental feature and is only available on AMD hosts.
 
 There are fixes included for a couple of minor vulnerabilities: one for
 the slirp stack (-net user), which is not usually used in production,
 and another in the vnc server, which allows malicious users to cause a
 VM to hang.

on centos-5, kernel/include-compat/asm/msr-index.h gives dozens of such
warnings during compile:
In file included from
/home/robot/rpm/BUILD/kvm-kmod-82/_kmod_build_/kernel/include/asm/kvm_host.h:65,
 from
/home/robot/rpm/BUILD/kvm-kmod-82/_kmod_build_/kernel/include/linux/kvm_host.h:67,
 from
/home/robot/rpm/BUILD/kvm-kmod-82/_kmod_build_/kernel/x86/lapic.c:60:
/home/robot/rpm/BUILD/kvm-kmod-82/_kmod_build_/kernel/include-compat/asm/msr-index.h:304:1:
warning: MSR_P4_U2L_ESCR0 redefined
In file included from include/asm/processor.h:16,
 from
/home/robot/rpm/BUILD/kvm-kmod-82/_kmod_build_/kernel/include/asm/kvm_para.h:89,
 from
/home/robot/rpm/BUILD/kvm-kmod-82/_kmod_build_/kernel/include/linux/kvm_para.h:63,
 from
/home/robot/rpm/BUILD/kvm-kmod-82/_kmod_build_/kernel/x86/../external-module-compat-comm.h:14,
 from
/home/robot/rpm/BUILD/kvm-kmod-82/_kmod_build_/kernel/x86/external-module-compat.h:9,
 from command line:1:
include/asm/msr.h:407:1: warning: this is the location of the previous
definition
In file included from
/home/robot/rpm/BUILD/kvm-kmod-82/_kmod_build_/kernel/include/asm/kvm_host.h:65,
 from
/home/robot/rpm/BUILD/kvm-kmod-82/_kmod_build_/kernel/include/linux/kvm_host.h:67,
 from
/home/robot/rpm/BUILD/kvm-kmod-82/_kmod_build_/kernel/x86/lapic.c:60:
/home/robot/rpm/BUILD/kvm-kmod-82/_kmod_build_/kernel/include-compat/asm/msr-index.h:305:1:
warning: MSR_P4_U2L_ESCR1 redefined
In file included from include/asm/processor.h:16,
 from
/home/robot/rpm/BUILD/kvm-kmod-82/_kmod_build_/kernel/include/asm/kvm_para.h:89,
 from
/home/robot/rpm/BUILD/kvm-kmod-82/_kmod_build_/kernel/include/linux/kvm_para.h:63,
 from
/home/robot/rpm/BUILD/kvm-kmod-82/_kmod_build_/kernel/x86/../external-module-compat-comm.h:14,
 from
/home/robot/rpm/BUILD/kvm-kmod-82/_kmod_build_/kernel/x86/external-module-compat.h:9,


-- 
  Levente   Si vis pacem para bellum!
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Re: [PATCH 1/5] re-register whole area upon lfb unmap.

2008-12-25 Thread andrzej zaborowski
2008/12/17 Anthony Liguori anth...@codemonkey.ws:
 Glauber Costa wrote:

 set phys_offset correctly for the whole vga area when unmapping linear
 vram
 (for vga optimization). We first register the old pieces as unassigned
 memory, to make things easier for kvm (and possibly other slot based
 implementations in the future). Replacing the region directly would
 make the slot management significantly more complex.


 This change worries me because it involves explicitly unassigning slots and
 then assigning a new, bigger slot.  This is not necessary for TCG.  It
 suggests to me that there's a bug in the kvm slot code and that we're
 changing QEMU to work around it.

 That will means there may be other places in the code that are completely
 valid, but exercise this bug.

 Or is this purely an optimization?

It also changes the semantics because IO callbacks are now passed
offsets from regions starts instead of absolute addresses.  I'm not
able to tell if the change is for good or for bad though.

Cheers
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/3] Remove interrupt stack table usage from x86_64 kernel

2008-12-25 Thread Avi Kivity
The interrupt stack table (IST) mechanism is the only thing preventing
kvm from deferring saving and reloading of some significant state.  It
is also somewhat complicated.

Remove it by switching the special exceptions to use the normal irqstack.

Avi Kivity (3):
  x86: drop the use of the tss interrupt stack table (IST)
  x86: Remove pda.irqcount
  x86: Switch critical exceptions and NMI to irqstack

 arch/x86/include/asm/desc.h  |   12 -
 arch/x86/include/asm/page_64.h   |7 ---
 arch/x86/include/asm/pda.h   |2 +-
 arch/x86/include/asm/processor.h |   11 
 arch/x86/kernel/asm-offsets_64.c |1 -
 arch/x86/kernel/cpu/common.c |   35 --
 arch/x86/kernel/dumpstack_64.c   |   96 --
 arch/x86/kernel/entry_64.S   |   49 ---
 arch/x86/kernel/traps.c  |   12 ++--
 9 files changed, 27 insertions(+), 198 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] x86: Remove pda.irqcount

2008-12-25 Thread Avi Kivity
pda.irqcount is used to test whether we need to switch to an irqstack or not.
We can do without it, however, by testing %rsp directly: if it's already
within the irqstack range we don't need to stacks.

This makes switching the nmi handler to use the irqstack easier.

Signed-off-by: Avi Kivity a...@redhat.com
---
 arch/x86/include/asm/pda.h   |2 +-
 arch/x86/kernel/asm-offsets_64.c |1 -
 arch/x86/kernel/cpu/common.c |1 -
 arch/x86/kernel/entry_64.S   |   29 +
 4 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 2fbfff8..2099610 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -14,7 +14,7 @@ struct x8664_pda {
   address */
unsigned long kernelstack;  /* 16 top of kernel stack for current */
unsigned long oldrsp;   /* 24 user rsp for system call */
-   int irqcount;   /* 32 Irq nesting counter. Starts -1 */
+   int unused; /* 32 for rent */
unsigned int cpunumber; /* 36 Logical CPU number */
 #ifdef CONFIG_CC_STACKPROTECTOR
unsigned long stack_canary; /* 40 stack canary value */
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 7fcf63d..779d010 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -50,7 +50,6 @@ int main(void)
ENTRY(kernelstack); 
ENTRY(oldrsp); 
ENTRY(pcurrent); 
-   ENTRY(irqcount);
ENTRY(cpunumber);
ENTRY(irqstackptr);
ENTRY(data_offset);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8563c51..6313d03 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -877,7 +877,6 @@ void __cpuinit pda_init(int cpu)
mb();
 
pda-cpunumber = cpu;
-   pda-irqcount = -1;
pda-kernelstack = (unsigned long)stack_thread_info() -
 PDA_STACKOFFSET + THREAD_SIZE;
pda-active_mm = init_mm;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 8c882e1..245fecd 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -627,6 +627,15 @@ END(stub_rt_sigreturn)
vector already pushed) */
 #define XCPT_FRAME _frame ORIG_RAX
 
+   .macro enter_irqstack scratch
+   mov %gs:pda_irqstackptr, \scratch
+   sub %rsp, \scratch
+   cmp $IRQSTACKSIZE-64, \scratch
+   jbe 1234f
+   mov %gs:pda_irqstackptr, %rsp
+1234:
+   .endm
+
 /* 
  * Interrupt entry/exit.
  *
@@ -655,14 +664,7 @@ END(stub_rt_sigreturn)
testl $3,CS(%rdi)
je 1f
SWAPGS
-   /* irqcount is used to check if a CPU is already on an interrupt
-  stack or not. While this is essentially redundant with preempt_count
-  it is a little cheaper to use a separate counter in the PDA
-  (short of moving irq_enter into assembly, which would be too
-   much work) */
-1: incl%gs:pda_irqcount
-   cmoveq %gs:pda_irqstackptr,%rsp
-   push%rbp# backlink for old unwinder
+1: enter_irqstack %rax
/*
 * We entered an interrupt context - irqs are off:
 */
@@ -677,7 +679,6 @@ ENTRY(common_interrupt)
 ret_from_intr:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
-   decl %gs:pda_irqcount
leaveq
CFI_DEF_CFA_REGISTERrsp
CFI_ADJUST_CFA_OFFSET   -8
@@ -1325,14 +1326,12 @@ ENTRY(call_softirq)
CFI_REL_OFFSET rbp,0
mov  %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
-   incl %gs:pda_irqcount
-   cmove %gs:pda_irqstackptr,%rsp
+   enter_irqstack %rax
push  %rbp  # backlink for old unwinder
call __do_softirq
leaveq
CFI_DEF_CFA_REGISTERrsp
CFI_ADJUST_CFA_OFFSET   -8
-   decl %gs:pda_irqcount
ret
CFI_ENDPROC
 ENDPROC(call_softirq)
@@ -1369,15 +1368,13 @@ ENTRY(xen_do_hypervisor_callback)   # 
do_hypervisor_callback(struct *pt_regs)
movq %rdi, %rsp# we don't return, adjust the stack frame
CFI_ENDPROC
CFI_DEFAULT_STACK
-11:incl %gs:pda_irqcount
-   movq %rsp,%rbp
+11:movq %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
-   cmovzq %gs:pda_irqstackptr,%rsp
+   enter_irqstack %rax
pushq %rbp  # backlink for old unwinder
call xen_evtchn_do_upcall
popq %rsp
CFI_DEF_CFA_REGISTER rsp
-   decl %gs:pda_irqcount
jmp  error_exit
CFI_ENDPROC
 END(do_hypervisor_callback)
-- 
1.6.0.6

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] x86: Switch critical exceptions and NMI to irqstack

2008-12-25 Thread Avi Kivity
With the special exception stacks gone, the irqstack is a much safer place
than the regular task stacks.

Signed-off-by: Avi Kivity a...@redhat.com
---
 arch/x86/kernel/entry_64.S |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 245fecd..8f40593 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -917,7 +917,10 @@ END(spurious_interrupt)
movq %rsp,%rdi
movq ORIG_RAX(%rsp),%rsi
movq $-1,ORIG_RAX(%rsp)
+   mov %rsp, %rbp
+   enter_irqstack %rax
call \sym
+   mov %rbp, %rsp
DISABLE_INTERRUPTS(CLBR_NONE)
.if \irqtrace
TRACE_IRQS_OFF
-- 
1.6.0.6

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] x86: drop the use of the tss interrupt stack table (IST)

2008-12-25 Thread Avi Kivity
The IST is the only thing that requires a valid TSS while running in
kernel mode.  Dropping its use unlocks an optimization opportunity for
kvm: if we don't need a valid TSS while in kernel mode we can defer the
use of the VMLOAD/VMSAVE instructions until the next context switch,
reducing the executions of these costly instructions by a nice factor.

Kernel reliability should also be improved since interrupt paths are
simplified.

Signed-off-by: Avi Kivity a...@redhat.com
---
 arch/x86/include/asm/desc.h  |   12 -
 arch/x86/include/asm/page_64.h   |7 ---
 arch/x86/include/asm/processor.h |   11 
 arch/x86/kernel/cpu/common.c |   34 -
 arch/x86/kernel/dumpstack_64.c   |   96 --
 arch/x86/kernel/entry_64.S   |   17 ++-
 arch/x86/kernel/traps.c  |   12 ++--
 7 files changed, 10 insertions(+), 179 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index e6b82b1..0465c75 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -369,18 +369,6 @@ static inline void set_task_gate(unsigned int n, unsigned 
int gdt_entry)
_set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry3));
 }
 
-static inline void set_intr_gate_ist(int n, void *addr, unsigned ist)
-{
-   BUG_ON((unsigned)n  0xFF);
-   _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS);
-}
-
-static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
-{
-   BUG_ON((unsigned)n  0xFF);
-   _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
-}
-
 #else
 /*
  * GET_DESC_BASE reads the descriptor base of the specified segment.
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 5ebca29..7c89095 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -16,13 +16,6 @@
 #define IRQSTACK_ORDER 2
 #define IRQSTACKSIZE (PAGE_SIZE  IRQSTACK_ORDER)
 
-#define STACKFAULT_STACK 1
-#define DOUBLEFAULT_STACK 2
-#define NMI_STACK 3
-#define DEBUG_STACK 4
-#define MCE_STACK 5
-#define N_EXCEPTION_STACKS 5  /* hw limit: 7 */
-
 #define PUD_PAGE_SIZE  (_AC(1, UL)  PUD_SHIFT)
 #define PUD_PAGE_MASK  (~(PUD_PAGE_SIZE-1))
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 5ca01e3..4ef899c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -273,13 +273,6 @@ struct tss_struct {
 
 DECLARE_PER_CPU(struct tss_struct, init_tss);
 
-/*
- * Save the original ist values for checking stack pointers during debugging
- */
-struct orig_ist {
-   unsigned long   ist[7];
-};
-
 #defineMXCSR_DEFAULT   0x1f80
 
 struct i387_fsave_struct {
@@ -372,10 +365,6 @@ union thread_xstate {
struct xsave_struct xsave;
 };
 
-#ifdef CONFIG_X86_64
-DECLARE_PER_CPU(struct orig_ist, orig_ist);
-#endif
-
 extern void print_cpu_info(struct cpuinfo_x86 *);
 extern unsigned int xstate_size;
 extern void free_thread_xstate(struct task_struct *);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b9c9ea0..8563c51 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -903,9 +903,6 @@ void __cpuinit pda_init(int cpu)
}
 }
 
-char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
-  DEBUG_STKSZ] __page_aligned_bss;
-
 extern asmlinkage void ignore_sysret(void);
 
 /* May not be marked __init: used by software suspend */
@@ -931,12 +928,6 @@ void syscall_init(void)
 
 unsigned long kernel_eflags;
 
-/*
- * Copies of the original ist values from the tss are only accessed during
- * debugging, no special alignment required.
- */
-DEFINE_PER_CPU(struct orig_ist, orig_ist);
-
 #else
 
 /* Make sure %fs is initialized properly in idle threads */
@@ -960,17 +951,13 @@ void __cpuinit cpu_init(void)
 {
int cpu = stack_smp_processor_id();
struct tss_struct *t = per_cpu(init_tss, cpu);
-   struct orig_ist *orig_ist = per_cpu(orig_ist, cpu);
unsigned long v;
-   char *estacks = NULL;
struct task_struct *me;
int i;
 
/* CPU 0 is initialised in head64.c */
if (cpu != 0)
pda_init(cpu);
-   else
-   estacks = boot_exception_stacks;
 
me = current;
 
@@ -1000,27 +987,6 @@ void __cpuinit cpu_init(void)
if (cpu != 0  x2apic)
enable_x2apic();
 
-   /*
-* set up and load the per-CPU TSS
-*/
-   if (!orig_ist-ist[0]) {
-   static const unsigned int order[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
-   };
-   for (v = 0; v  N_EXCEPTION_STACKS; v++) {
-   if (cpu) {
-   estacks = (char *)__get_free_pages(GFP_ATOMIC, 

Re: [PATCH 1/4] KVM: Using kfifo for irq recording

2008-12-25 Thread Sheng Yang
On Thursday 25 December 2008 21:26:29 Avi Kivity wrote:
 Sheng Yang wrote:
  On Thursday 25 December 2008 19:07:22 Avi Kivity wrote:
  Sheng Yang wrote:
  For MSI-X, we have to deal with multiply IRQ with same IRQ handler, so
  it's necessary to record the IRQ that trigger the IRQ handler.
 
  Does MSI-X disallowing coalescing two requests into one interrupt?  Or
  can we still coalesce interrupts (perhaps by recording them as a (irq,
  cpu) pair?)
 
  Disallow? Not quite understand. PCI spec said OS don't need to ensure the
  sequence they handled is the same as they happened. This struct is used
  just because we lost information of irq after schedule_work...

 Why can't we store this information in a bitmap?  There are a limited
 number of irqs.

 The only reason I can think of for using a fifo is if we want to
 preserve the number and ordering of interrupts.  Is there another reason?

Well, I just think using fifo is more generic and unify the logic of three 
type of interrupt easily, something seems more elegant. 

  @@ -313,6 +314,9 @@ struct kvm_assigned_dev_kernel {
int host_irq;
bool host_irq_disabled;
int guest_irq;
  +#define KVM_ASSIGNED_DEV_IRQ_FIFO_LEN0x100
  + struct kfifo *irq_fifo;
  + spinlock_t irq_fifo_lock;
   #define KVM_ASSIGNED_DEV_GUEST_INTX  (1  0)
 
  What if it runs out?
 
  What does real hardware do?  I'm sure it doesn't have a 100-entry queue.
 
  0x100 is just a simple number which I thought different interrupts of
  same MSI-X device can happen at same period(indeed it's
  0x100/sizeof(int)). Maybe not that many. And it just used by work
  function later to find what guest vector is, and then inject the
  correlated interrupt to the guest.

 Maybe it's better to do the conversion immediately, so we can store the
 information in a structure that's not prone to overflow.

OK. I would give a bitmap to kvm struct with gsi_msg which is unable to 
overflow.

-- 
regards
Yang, Sheng

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 14/15] KVM: Replace host_irq_disable with a new flag

2008-12-25 Thread Sheng Yang
(I discard irq_fifo and change a method to fix this problem)

We can reused the field state later.

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 include/linux/kvm_host.h |3 ++-
 virt/kvm/kvm_main.c  |8 
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index fbf102c..58e4b7e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -311,13 +311,14 @@ struct kvm_assigned_dev_kernel {
int host_busnr;
int host_devfn;
int host_irq;
-   bool host_irq_disabled;
int guest_irq;
 #define KVM_ASSIGNED_DEV_GUEST_INTX(1  0)
 #define KVM_ASSIGNED_DEV_GUEST_MSI (1  1)
 #define KVM_ASSIGNED_DEV_HOST_INTX (1  8)
 #define KVM_ASSIGNED_DEV_HOST_MSI  (1  9)
unsigned long irq_requested_type;
+#define KVM_ASSIGNED_DEV_HOST_IRQ_DISABLED (1  0)
+   unsigned long state;
int irq_source_id;
struct pci_dev *dev;
struct kvm *kvm;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a51e630..065af2d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -114,7 +114,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct 
work_struct *work)
 
if (assigned_dev-irq_requested_type  KVM_ASSIGNED_DEV_GUEST_MSI) {
enable_irq(assigned_dev-host_irq);
-   assigned_dev-host_irq_disabled = false;
+   assigned_dev-state = ~KVM_ASSIGNED_DEV_HOST_IRQ_DISABLED;
}
 
mutex_unlock(assigned_dev-kvm-lock);
@@ -131,7 +131,7 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void 
*dev_id)
schedule_work(assigned_dev-interrupt_work);
 
disable_irq_nosync(irq);
-   assigned_dev-host_irq_disabled = true;
+   assigned_dev-state |= KVM_ASSIGNED_DEV_HOST_IRQ_DISABLED;
 
return IRQ_HANDLED;
 }
@@ -152,9 +152,9 @@ static void kvm_assigned_dev_ack_irq(struct 
kvm_irq_ack_notifier *kian)
/* The guest irq may be shared so this ack may be
 * from another device.
 */
-   if (dev-host_irq_disabled) {
+   if (dev-state  KVM_ASSIGNED_DEV_HOST_IRQ_DISABLED) {
enable_irq(dev-host_irq);
-   dev-host_irq_disabled = false;
+   dev-state = ~KVM_ASSIGNED_DEV_HOST_IRQ_DISABLED;
}
 }
 
-- 
1.5.4.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 15/15] KVM: Fix racy in kvm_free_assigned_irq

2008-12-25 Thread Sheng Yang
Thanks to Marcelo's observation, The following code have potential issue:

if (cancel_work_sync(assigned_dev-interrupt_work))
kvm_put_kvm(kvm);

In fact, cancel_work_sync() would return true either work struct is only
scheduled or the callback of work struct is executed. This code only
consider the former situation.

Also, we have a window between cancel_work_sync() and free_irq. This patch fixs
them two.

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 include/linux/kvm_host.h |1 +
 virt/kvm/kvm_main.c  |   34 ++
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 58e4b7e..e0775b9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -318,6 +318,7 @@ struct kvm_assigned_dev_kernel {
 #define KVM_ASSIGNED_DEV_HOST_MSI  (1  9)
unsigned long irq_requested_type;
 #define KVM_ASSIGNED_DEV_HOST_IRQ_DISABLED (1  0)
+#define KVM_ASSIGNED_DEV_IRQ_GOT_KVM   (1  1)
unsigned long state;
int irq_source_id;
struct pci_dev *dev;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 065af2d..9ffa601 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -119,6 +119,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct 
work_struct *work)
 
mutex_unlock(assigned_dev-kvm-lock);
kvm_put_kvm(assigned_dev-kvm);
+   assigned_dev-state = ~KVM_ASSIGNED_DEV_IRQ_GOT_KVM;
 }
 
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
@@ -126,7 +127,15 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void 
*dev_id)
struct kvm_assigned_dev_kernel *assigned_dev =
(struct kvm_assigned_dev_kernel *) dev_id;
 
+   /*
+* In kvm_free_device_irq, cancel_work_sync return true if:
+* 1. work is scheduled, and then cancelled.
+* 2. work callback is executed.
+*
+* We need to call kvm_put_kvm() for the former, but not the later.
+*/
kvm_get_kvm(assigned_dev-kvm);
+   assigned_dev-state |= KVM_ASSIGNED_DEV_IRQ_GOT_KVM;
 
schedule_work(assigned_dev-interrupt_work);
 
@@ -173,10 +182,27 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
if (!assigned_dev-irq_requested_type)
return;
 
-   if (cancel_work_sync(assigned_dev-interrupt_work))
-   /* We had pending work. That means we will have to take
-* care of kvm_put_kvm.
-*/
+   /*
+* We need to ensure: kvm_put_kvm() paired with kvm_get_kvm() in
+* kvm_assigned_dev_intr, and no more interrupt after we cancelled
+* current one.
+*
+* Here we have two possiblities for cancel_work_sync() return true:
+* 1. The work is scheduled, but callback haven't been called.  We need
+* to call kvm_put_kvm() here. And IRQ is already disabled without
+* doubt.
+*
+* 2. The callback have executed, here we don't need to call
+* kvm_put_kvm(), but we may need to disable irq(e.g. for MSI).
+*
+* We judge the two condition according assigned_dev-state. And we
+* disable irq here anyway, and it may resulted in IRQ nested disable,
+* but it's fine, for we are going to free it.
+*/
+   disable_irq_nosync(assigned_dev-host_irq);
+
+   if (cancel_work_sync(assigned_dev-interrupt_work) 
+   assigned_dev-state  KVM_ASSIGNED_DEV_IRQ_GOT_KVM)
kvm_put_kvm(kvm);
 
free_irq(assigned_dev-host_irq, (void *)assigned_dev);
-- 
1.5.4.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM: userspace: Remove duplicated functionality for cpuid processing

2008-12-25 Thread Amit Shah
host_cpuid is now available in target-i386/helper.c.
Remove the duplicated code now in kvm-specific code.

Signed-off-by: Amit Shah amit.s...@redhat.com
---
 qemu/qemu-kvm-x86.c |   70 ---
 1 files changed, 0 insertions(+), 70 deletions(-)

diff --git a/qemu/qemu-kvm-x86.c b/qemu/qemu-kvm-x86.c
index aa36be8..1bf86e1 100644
--- a/qemu/qemu-kvm-x86.c
+++ b/qemu/qemu-kvm-x86.c
@@ -451,39 +451,6 @@ void kvm_arch_save_regs(CPUState *env)
 }
 }
 
-static void host_cpuid(uint32_t function, uint32_t *eax, uint32_t *ebx,
-  uint32_t *ecx, uint32_t *edx)
-{
-uint32_t vec[4];
-
-#ifdef __x86_64__
-asm volatile(cpuid
-: =a(vec[0]), =b(vec[1]),
-  =c(vec[2]), =d(vec[3])
-: 0(function) : cc);
-#else
-asm volatile(pusha \n\t
-cpuid \n\t
-mov %%eax, 0(%1) \n\t
-mov %%ebx, 4(%1) \n\t
-mov %%ecx, 8(%1) \n\t
-mov %%edx, 12(%1) \n\t
-popa
-: : a(function), S(vec)
-: memory, cc);
-#endif
-
-if (eax)
-   *eax = vec[0];
-if (ebx)
-   *ebx = vec[1];
-if (ecx)
-   *ecx = vec[2];
-if (edx)
-   *edx = vec[3];
-}
-
-
 static void do_cpuid_ent(struct kvm_cpuid_entry *e, uint32_t function,
 CPUState *env)
 {
@@ -494,43 +461,6 @@ static void do_cpuid_ent(struct kvm_cpuid_entry *e, 
uint32_t function,
 e-ebx = env-regs[R_EBX];
 e-ecx = env-regs[R_ECX];
 e-edx = env-regs[R_EDX];
-if (function == 0x8001) {
-   uint32_t h_eax, h_edx;
-
-   host_cpuid(function, h_eax, NULL, NULL, h_edx);
-
-   // long mode
-   if ((h_edx  0x2000) == 0 || !lm_capable_kernel)
-   e-edx = ~0x2000u;
-   // syscall
-   if ((h_edx  0x0800) == 0)
-   e-edx = ~0x0800u;
-   // nx
-   if ((h_edx  0x0010) == 0)
-   e-edx = ~0x0010u;
-   // svm
-   if (!kvm_nested  (e-ecx  4))
-   e-ecx = ~4u;
-}
-// sysenter isn't supported on compatibility mode on AMD.  and syscall
-// isn't supported in compatibility mode on Intel.  so advertise the
-// actuall cpu, and say goodbye to migration between different vendors
-// is you use compatibility mode.
-if (function == 0) {
-   uint32_t bcd[3];
-
-   host_cpuid(0, NULL, bcd[0], bcd[1], bcd[2]);
-   e-ebx = bcd[0];
-   e-ecx = bcd[1];
-   e-edx = bcd[2];
-}
-// Hypervisor present bit for Microsoft guests
-if (function == 1)
-   e-ecx |= (1u  31);
-
-// 3dnow isn't properly emulated yet
-if (function == 0x8001)
-   e-edx = ~0xc000;
 }
 
 struct kvm_para_features {
-- 
1.5.4.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html