date:20080925

[PATCH] kvm: external module: relay_open() compat

2008-09-25 Thread Avi Kivity

From: Eduardo Habkost [EMAIL PROTECTED]

On 2.6.21 a new parameter was added to relay_open(). Handle this parameter
on kvm_relay_open() if needed.

Signed-off-by: Eduardo Habkost [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/kernel/external-module-compat-comm.h 
b/kernel/external-module-compat-comm.h
index 2499277..30d17fa 100644
--- a/kernel/external-module-compat-comm.h
+++ b/kernel/external-module-compat-comm.h
@@ -532,6 +532,25 @@ struct pci_dev *pci_get_bus_and_slot(unsigned int bus, 
unsigned int devfn);
 
 #endif
 
+#if LINUX_VERSION_CODE  KERNEL_VERSION(2,6,21)
+
+#include linux/relay.h
+
+/* relay_open() interface has changed on 2.6.21 */
+
+struct rchan *kvm_relay_open(const char *base_filename,
+struct dentry *parent,
+size_t subbuf_size,
+size_t n_subbufs,
+struct rchan_callbacks *cb,
+void *private_data);
+
+#else
+
+#define kvm_relay_open relay_open
+
+#endif
+
 #if LINUX_VERSION_CODE  KERNEL_VERSION(2,6,27)
 
 static inline int get_user_pages_fast(unsigned long start, int nr_pages,
diff --git a/kernel/external-module-compat.c b/kernel/external-module-compat.c
index 4b9a9f2..c0a0355 100644
--- a/kernel/external-module-compat.c
+++ b/kernel/external-module-compat.c
@@ -276,3 +276,25 @@ int intel_iommu_found()
 }
 
 #endif
+
+
+#if LINUX_VERSION_CODE  KERNEL_VERSION(2,6,21)
+
+/* relay_open() interface has changed on 2.6.21 */
+
+struct rchan *kvm_relay_open(const char *base_filename,
+struct dentry *parent,
+size_t subbuf_size,
+size_t n_subbufs,
+struct rchan_callbacks *cb,
+void *private_data)
+{
+   struct rchan *chan = relay_open(base_filename, parent,
+   subbuf_size, n_subbufs,
+   cb);
+   if (chan)
+   chan-private_data = private_data;
+   return chan;
+}
+
+#endif
diff --git a/kernel/x86/hack-module.awk b/kernel/x86/hack-module.awk
index 7f59483..c3fa51e 100644
--- a/kernel/x86/hack-module.awk
+++ b/kernel/x86/hack-module.awk
@@ -1,5 +1,5 @@
 BEGIN { split(INIT_WORK tsc_khz desc_struct ldttss_desc64 desc_ptr  \
- on_each_cpu , compat_apis); }
+ on_each_cpu relay_open , compat_apis); }
 
 /^int kvm_init\(/ { anon_inodes = 1 }
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] kvm: external module: spin_needbreak() compat

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/kernel/external-module-compat-comm.h 
b/kernel/external-module-compat-comm.h
index 30d17fa..76ddca3 100644
--- a/kernel/external-module-compat-comm.h
+++ b/kernel/external-module-compat-comm.h
@@ -567,3 +567,11 @@ static inline int get_user_pages_fast(unsigned long start, 
int nr_pages,
 }
 
 #endif
+
+/* spin_needbreak() was called something else in 2.6.24 */
+#if LINUX_VERSION_CODE = KERNEL_VERSION(2,6,24)
+
+#define spin_needbreak need_lockbreak
+
+#endif
+
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] kvm: external module: Hack DEFINE_SIMPLE_ATTRIBUTE for lost_records_get() also

2008-09-25 Thread Avi Kivity

From: Eduardo Habkost [EMAIL PROTECTED]

Change the DEFINE_SIMPLE_ATTRIBUTE pattern to match the lost_records_get()
definition also, so kvm_trace can compile on older kernels.

Signed-off-by: Eduardo Habkost [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/kernel/x86/hack-module.awk b/kernel/x86/hack-module.awk
index 3f364f8..7f59483 100644
--- a/kernel/x86/hack-module.awk
+++ b/kernel/x86/hack-module.awk
@@ -42,11 +42,11 @@ BEGIN { split(INIT_WORK tsc_khz desc_struct ldttss_desc64 
desc_ptr  \
 $0 = \t.VMA_OPS_FAULT(fault) = VMA_OPS_FAULT_FUNC( fcn ),
 }
 
-/^static int .*_stat_get/ {
+/^static int (.*_stat_get|lost_records_get)/ {
 $3 = __ $3
 }
 
-/DEFINE_SIMPLE_ATTRIBUTE.*_stat_get/ {
+/DEFINE_SIMPLE_ATTRIBUTE.*(_stat_get|lost_records_get)/ {
 name = gensub(/,/, , g, $2);
 print MAKE_SIMPLE_ATTRIBUTE_GETTER( name )
 }
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] kvm: external module: add --with-kvm-trace arg to configure

2008-09-25 Thread Avi Kivity

From: Eduardo Habkost [EMAIL PROTECTED]

/configure --with-kvm-trace  will set a CONFIG_KVM_TRACE option on a
kernel/config.kbuild file. This file can be included from the Kbuild
files to get kvm-specific CONFIG_* definitions.

Signed-off-by: Eduardo Habkost [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/.gitignore b/.gitignore
index bb35cca..0cabc17 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,6 +24,7 @@ vgabios/*.txt
 extboot/extboot.bin
 extboot/extboot.img
 extboot/signrom
+kernel/config.kbuild
 kernel/modules.order
 kernel/Module.symvers
 kernel/Modules.symvers
diff --git a/configure b/configure
index 3bb10ce..78c2f9c 100755
--- a/configure
+++ b/configure
@@ -8,6 +8,7 @@ objcopy=objcopy
 want_module=1
 qemu_cflags=
 qemu_ldflags=
+kvm_trace=
 qemu_opts=()
 cross_prefix=
 arch=`uname -m`
@@ -22,6 +23,7 @@ usage() {
--cross-prefix=PREFIX  prefix for cross compile
--prefix=PREFIXwhere to install things ($prefix)
--with-patched-kernel  don't use external module
+   --with-kvm-trace   Enable kvm_trace
--kerneldir=DIRkernel build directory ($kerneldir)
--qemu-cflags=CFLAGS   CFLAGS to add to qemu configuration
--qemu-ldflags=LDFLAGS LDFLAGS to add to qemu configuration
@@ -56,6 +58,9 @@ while [[ $1 = -* ]]; do
--with-patched-kernel)
want_module=
;;
+   --with-kvm-trace)
+   kvm_trace=y
+   ;;
--qemu-cflags)
qemu_cflags=$arg
;;
@@ -131,3 +136,9 @@ CC=$cross_prefix$cc
 LD=$cross_prefix$ld
 OBJCOPY=$cross_prefix$objcopy
 EOF
+
+if [ -n $want_module ];then
+cat EOF  kernel/config.kbuild
+CONFIG_KVM_TRACE=$kvm_trace
+EOF
+fi
diff --git a/kernel/Makefile b/kernel/Makefile
index 151183a..9842dab 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -1,4 +1,5 @@
 include ../config.mak
+include config.kbuild
 
 ARCH_DIR = $(if $(filter $(ARCH),x86_64 i386),x86,$(ARCH))
 ARCH_CONFIG := $(shell echo $(ARCH_DIR) | tr '[:lower:]' '[:upper:]')
@@ -33,12 +34,16 @@ hack-files-ia64 = kvm_main.c kvm_fw.c
 
 hack-files = $(hack-files-$(ARCH_DIR))
 
+ifeq ($(CONFIG_KVM_TRACE),y)
+module_defines += -DCONFIG_KVM_TRACE=y
+endif
+
 all:: header-link prerequisite
 #  include header priority 1) $LINUX 2) $KERNELDIR 3) include-compat
$(MAKE) -C $(KERNELDIR) M=`pwd` \
LINUXINCLUDE=-I`pwd`/include -Iinclude 
-Iarch/${ARCH_DIR}/include -I`pwd`/include-compat \
-include include/linux/autoconf.h \
-   -include `pwd`/$(ARCH_DIR)/external-module-compat.h
+   -include `pwd`/$(ARCH_DIR)/external-module-compat.h 
$(module_defines)
$$@
 
 sync: header-sync source-sync header-link
diff --git a/kernel/x86/Kbuild b/kernel/x86/Kbuild
index 8dc0483..bc584f9 100644
--- a/kernel/x86/Kbuild
+++ b/kernel/x86/Kbuild
@@ -1,3 +1,7 @@
+# trick to get the kvm-specific CONFIG_KVM_* definitions,
+# because the kernel source tree won't have them
+include $(obj)/../config.kbuild
+
 obj-m := kvm.o kvm-intel.o kvm-amd.o
 kvm-objs := kvm_main.o x86.o mmu.o x86_emulate.o ../anon_inodes.o irq.o 
i8259.o \
 lapic.o ioapic.o preempt.o i8254.o coalesced_mmio.o \
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] kvm: external module: add kvm_trace.c to hack-files-x86

2008-09-25 Thread Avi Kivity

From: Eduardo Habkost [EMAIL PROTECTED]

We will add some hacks for kvm_trace.c on hack-module.awk

Signed-off-by: Eduardo Habkost [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/kernel/Makefile b/kernel/Makefile
index 3f5f6da..151183a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -28,7 +28,7 @@ unifdef = mv $1 $1.orig  \
 
 hack = $(call _hack,$T/$(strip $1))
 
-hack-files-x86 = kvm_main.c mmu.c vmx.c svm.c x86.c irq.h lapic.c i8254.c
+hack-files-x86 = kvm_main.c mmu.c vmx.c svm.c x86.c irq.h lapic.c i8254.c 
kvm_trace.c
 hack-files-ia64 = kvm_main.c kvm_fw.c
 
 hack-files = $(hack-files-$(ARCH_DIR))
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] kvm: external module: always generate config.kbuild

2008-09-25 Thread Avi Kivity

From: Eduardo Habkost [EMAIL PROTECTED]

When implementing --with-kvm-trace, I supposed make would never enter
the 'kernel' directory when compiling with --with-patched-kernel. I was
wrong and broke --with-patched-kernel.

Change configure to always generate config.kbuild on the kernel
directory. Otherwise make will explode on 'make header-sync', that runs
even when --with-patched-kernel was used.

Signed-off-by: Eduardo Habkost [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/configure b/configure
index 78c2f9c..3b27364 100755
--- a/configure
+++ b/configure
@@ -137,8 +137,6 @@ LD=$cross_prefix$ld
 OBJCOPY=$cross_prefix$objcopy
 EOF
 
-if [ -n $want_module ];then
 cat EOF  kernel/config.kbuild
 CONFIG_KVM_TRACE=$kvm_trace
 EOF
-fi
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] kvm: qemu: avoid allocating codegen buffer if kvm is enabled

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

Suggested by Anthony Liguori.

Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/qemu/exec.c b/qemu/exec.c
index cf9c99c..5a2554d 100644
--- a/qemu/exec.c
+++ b/qemu/exec.c
@@ -407,9 +407,8 @@ static uint8_t 
static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE];
 
 static void code_gen_alloc(unsigned long tb_size)
 {
-#ifdef TARGET_IA64
-return;
-#endif
+if (kvm_enabled())
+return;
 
 #ifdef USE_STATIC_CODE_GEN_BUFFER
 code_gen_buffer = static_code_gen_buffer;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] KVM: MMU: do not write-protect large mappings

2008-09-25 Thread Avi Kivity

From: Marcelo Tosatti [EMAIL PROTECTED]

There is not much point in write protecting large mappings. This
can only happen when a page is shadowed during the window between
is_largepage_backed and mmu_lock acquision. Zap the entry instead, so
the next pagefault will find a shadowed page via is_largepage_backed and
fallback to 4k translations.

Simplifies out of sync shadow.

Signed-off-by: Marcelo Tosatti [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 23752ef..731e6fe 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1180,11 +1180,16 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 
*shadow_pte,
|| (write_fault  !is_write_protection(vcpu)  !user_fault)) {
struct kvm_mmu_page *shadow;
 
+   if (largepage  has_wrprotected_page(vcpu-kvm, gfn)) {
+   ret = 1;
+   spte = shadow_trap_nonpresent_pte;
+   goto set_pte;
+   }
+
spte |= PT_WRITABLE_MASK;
 
shadow = kvm_mmu_lookup_page(vcpu-kvm, gfn);
-   if (shadow ||
-  (largepage  has_wrprotected_page(vcpu-kvm, gfn))) {
+   if (shadow) {
pgprintk(%s: found shadow page for %lx, marking ro\n,
 __func__, gfn);
ret = 1;
@@ -1197,6 +1202,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 
*shadow_pte,
if (pte_access  ACC_WRITE_MASK)
mark_page_dirty(vcpu-kvm, gfn);
 
+set_pte:
set_shadow_pte(shadow_pte, spte);
return ret;
 }
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] KVM: PIC: enhance IPI avoidance

2008-09-25 Thread Avi Kivity

From: Marcelo Tosatti [EMAIL PROTECTED]

The PIC code makes little effort to avoid kvm_vcpu_kick(), resulting in
unnecessary guest exits in some conditions.

For example, if the timer interrupt is routed through the IOAPIC, IRR
for IRQ 0 will get set but not cleared, since the APIC is handling the
acks.

This means that everytime an interrupt  16 is triggered, the priority
logic will find IRQ0 pending and send an IPI to vcpu0 (in case IRQ0 is
not masked, which is Linux's case).

Introduce a new variable isr_ack to represent the IRQ's for which the
guest has been signalled / cleared the ISR. Use it to avoid more than
one IPI per trigger-ack cycle, in addition to the avoidance when ISR is
set in get_priority().

Signed-off-by: Marcelo Tosatti [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 71e3eee..17e41e1 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -33,6 +33,14 @@
 static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
 {
s-isr = ~(1  irq);
+   s-isr_ack |= (1  irq);
+}
+
+void kvm_pic_clear_isr_ack(struct kvm *kvm)
+{
+   struct kvm_pic *s = pic_irqchip(kvm);
+   s-pics[0].isr_ack = 0xff;
+   s-pics[1].isr_ack = 0xff;
 }
 
 /*
@@ -213,6 +221,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
s-irr = 0;
s-imr = 0;
s-isr = 0;
+   s-isr_ack = 0xff;
s-priority_add = 0;
s-irq_base = 0;
s-read_reg_select = 0;
@@ -444,10 +453,14 @@ static void pic_irq_request(void *opaque, int level)
 {
struct kvm *kvm = opaque;
struct kvm_vcpu *vcpu = kvm-vcpus[0];
+   struct kvm_pic *s = pic_irqchip(kvm);
+   int irq = pic_get_irq(s-pics[0]);
 
-   pic_irqchip(kvm)-output = level;
-   if (vcpu)
+   s-output = level;
+   if (vcpu  level  (s-pics[0].isr_ack  (1  irq))) {
+   s-pics[0].isr_ack = ~(1  irq);
kvm_vcpu_kick(vcpu);
+   }
 }
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm)
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 479a3d2..4748532 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -42,6 +42,7 @@ struct kvm_kpic_state {
u8 irr; /* interrupt request register */
u8 imr; /* interrupt mask register */
u8 isr; /* interrupt service register */
+   u8 isr_ack; /* interrupt ack detection */
u8 priority_add;/* highest irq priority */
u8 irq_base;
u8 read_reg_select;
@@ -70,6 +71,7 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm);
 void kvm_pic_set_irq(void *opaque, int irq, int level);
 int kvm_pic_read_irq(struct kvm *kvm);
 void kvm_pic_update_irq(struct kvm_pic *s);
+void kvm_pic_clear_isr_ack(struct kvm *kvm);
 
 static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
 {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1c5864a..4cfdd1b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3963,6 +3963,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
pr_debug(Set back pending irq %d\n,
 pending_vec);
}
+   kvm_pic_clear_isr_ack(vcpu-kvm);
}
 
kvm_set_segment(vcpu, sregs-cs, VCPU_SREG_CS);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] KVM: MMU: add oos_shadow parameter to disable oos

2008-09-25 Thread Avi Kivity

From: Marcelo Tosatti [EMAIL PROTECTED]

Subject says it all.

Signed-off-by: Marcelo Tosatti [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 6ddb16c..9ff8d90 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -70,6 +70,9 @@ static int dbg = 0;
 module_param(dbg, bool, 0644);
 #endif
 
+static int oos_shadow = 1;
+module_param(oos_shadow, bool, 0644);
+
 #ifndef MMU_DEBUG
 #define ASSERT(x) do { } while (0)
 #else
@@ -1424,7 +1427,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, 
gfn_t gfn,
return 1;
if (shadow-unsync)
return 0;
-   if (can_unsync)
+   if (can_unsync  oos_shadow)
return kvm_unsync_page(vcpu, shadow);
return 1;
}
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 8/9] coalesce mmio regions with an explicit call

2008-09-25 Thread Avi Kivity

Glauber Costa wrote:

 Any ideas about what's up for the other hypervisors that may (we hope) be 
 integrated
 in the future? Xen?
   

Xen should benefit even more (much more).  IIRC Windows wouldn't boot
since it was spending all its time context switching when the splash
screen with its KITT bar was displayed, so they hacked something for
vga, but nothing generic.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: KVM Migration fails

2008-09-25 Thread jd

Hi Chris, 
 
Thanks for the response.

It errors out bit after the command is given.. so I guess... it must have 
transferred some pages/bits.

What kind of information do you want me to collect ?
Is there a way to turn on logging..debugging for kvm process ?

Will also try tap option.

thanks.
/Jd

--- On Wed, 9/24/08, Chris Lalancette [EMAIL PROTECTED] wrote:

 From: Chris Lalancette [EMAIL PROTECTED]
 Subject: Re: KVM Migration fails
 To: [EMAIL PROTECTED]
 Cc: KVM List kvm@vger.kernel.org
 Date: Wednesday, September 24, 2008, 10:56 PM
 jd wrote:
  Hi I have a setup using shared nfs disks. When
 migration is attempted, it
  fails... any ideas on how to debug this..?
  
  /Jd
  
  Details ===
  
  migration: write failed (Connection reset by peer)^M
 Migration failed! ret=0
  error=9
 
 Not a lot of detail here, but when does it fail during the
 process?
 Immediately?  After transferring some of the image?  Right
 at the end?
 Connection reset by peer sort of sounds like there is a
 firewall or something in
 the way, although if it transfers some data before
 crashing, then it's probably
 something else.  The only other thing I can think of based
 on your command-line
 below is something to do with your -net options; I've
 only successfully migrated
 with -net tap before, I've never tried -net user.
 
 Chris Lalancette
 
  
  Source : KVM-73, Cent OS 5.2, 64 bit.
  
  qemu-system-x86_64 -net
 nic,vlan=0,macaddr=00:16:3e:16:f4:f0 -net user,vlan=0
  -hda /mnt/nfs/vmdisks/XPSP2-KVM.disk.xm -boot c -m
 1024 -no-acpi  -vnc :22
  -name XPSP2-KVM -smp 2 -monitor
  unix:/var/run/kvm/monitors/XPSP2-KVM,server,nowait
 -pidfile
  /var/run/kvm/pids/XPSP2-KVM -daemonize
  
  
  
  Dest   : KVM-70, Fedora 8, 64bit
  
  qemu-system-x86_64 -net
 nic,vlan=0,macaddr=00:16:3e:16:f4:f0 -net user,vlan=0
  -hda /mnt/nfs/vmdisks/XPSP2-KVM.disk.xm -boot c -m
 1024 -no-acpi  -vnc :23
  -incoming tcp://0:8002 -name XPSP2-KVM -smp 2 -monitor
  unix:/var/run/kvm/monitors/XPSP2-KVM,server,nowait
 -pidfile
  /var/run/kvm/pids/XPSP2-KVM -daemonize
 --
 To unsubscribe from this list: send the line
 unsubscribe kvm in
 the body of a message to [EMAIL PROTECTED]
 More majordomo info at 
 http://vger.kernel.org/majordomo-info.html


  
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: KVM Migration fails

2008-09-25 Thread jd

The error code and the messages seem bit different here.

/Jd


--- On Wed, 9/24/08, Yang, Sheng [EMAIL PROTECTED] wrote:

 From: Yang, Sheng [EMAIL PROTECTED]
 Subject: Re: KVM Migration fails
 To: kvm@vger.kernel.org, [EMAIL PROTECTED] [EMAIL PROTECTED]
 Date: Wednesday, September 24, 2008, 11:23 PM
 On Thursday 25 September 2008 12:22:42 jd wrote:
  Hi
I have a setup using shared nfs disks. When
 migration is attempted, it
  fails... any ideas on how to debug this..?
 
 It's a regression bug recently. 
 
 Please refer to 
 
 https://sourceforge.net/tracker/index.php?func=detailaid=2106661group_id=180599atid=893831
 
 I think a git bisect can also help. 
 --
 regards
 Yang, Sheng
 
  /Jd
 
  Details
  ===
 
  migration: write failed (Connection reset by peer)^M
  Migration failed! ret=0 error=9
 
  Source : KVM-73, Cent OS 5.2, 64 bit.
 
  qemu-system-x86_64 -net
 nic,vlan=0,macaddr=00:16:3e:16:f4:f0 -net
  user,vlan=0 -hda /mnt/nfs/vmdisks/XPSP2-KVM.disk.xm
 -boot c -m 1024
  -no-acpi  -vnc :22 -name XPSP2-KVM -smp 2 -monitor
  unix:/var/run/kvm/monitors/XPSP2-KVM,server,nowait
 -pidfile 
  /var/run/kvm/pids/XPSP2-KVM -daemonize
 
 
 
  Dest   : KVM-70, Fedora 8, 64bit
 
  qemu-system-x86_64 -net
 nic,vlan=0,macaddr=00:16:3e:16:f4:f0 -net
  user,vlan=0 -hda /mnt/nfs/vmdisks/XPSP2-KVM.disk.xm
 -boot c -m 1024
  -no-acpi  -vnc :23 -incoming tcp://0:8002 -name
 XPSP2-KVM -smp 2 -monitor
  unix:/var/run/kvm/monitors/XPSP2-KVM,server,nowait
 -pidfile 
  /var/run/kvm/pids/XPSP2-KVM -daemonize
 
 
 
 
 
 
 
  --
  To unsubscribe from this list: send the line
 unsubscribe kvm in
  the body of a message to [EMAIL PROTECTED]
  More majordomo info at 
 http://vger.kernel.org/majordomo-info.html
 
 
 --
 To unsubscribe from this list: send the line
 unsubscribe kvm in
 the body of a message to [EMAIL PROTECTED]
 More majordomo info at 
 http://vger.kernel.org/majordomo-info.html


  
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: KVM Migration fails

2008-09-25 Thread Yang, Sheng

On Thursday 25 September 2008 15:23:22 jd wrote:
 The error code and the messages seem bit different here.


Um... At least I believe it's a regression, and our migration test never 
success after that. So I think it's worth to look into it. If you are lucky 
enough, you would got two bugs. :)

--
regards
Yang, Sheng

 /Jd

 --- On Wed, 9/24/08, Yang, Sheng [EMAIL PROTECTED] wrote:
  From: Yang, Sheng [EMAIL PROTECTED]
  Subject: Re: KVM Migration fails
  To: kvm@vger.kernel.org, [EMAIL PROTECTED] [EMAIL PROTECTED]
  Date: Wednesday, September 24, 2008, 11:23 PM
 
  On Thursday 25 September 2008 12:22:42 jd wrote:
   Hi
 I have a setup using shared nfs disks. When
 
  migration is attempted, it
 
   fails... any ideas on how to debug this..?
 
  It's a regression bug recently.
 
  Please refer to
 
  https://sourceforge.net/tracker/index.php?func=detailaid=2106661group_i
 d=180599atid=893831
 
  I think a git bisect can also help.
  --
  regards
  Yang, Sheng
 
   /Jd
  
   Details
   ===
  
   migration: write failed (Connection reset by peer)^M
   Migration failed! ret=0 error=9
  
   Source : KVM-73, Cent OS 5.2, 64 bit.
  
   qemu-system-x86_64 -net
 
  nic,vlan=0,macaddr=00:16:3e:16:f4:f0 -net
 
   user,vlan=0 -hda /mnt/nfs/vmdisks/XPSP2-KVM.disk.xm
 
  -boot c -m 1024
 
   -no-acpi  -vnc :22 -name XPSP2-KVM -smp 2 -monitor
   unix:/var/run/kvm/monitors/XPSP2-KVM,server,nowait
 
  -pidfile
 
   /var/run/kvm/pids/XPSP2-KVM -daemonize
  
  
  
   Dest   : KVM-70, Fedora 8, 64bit
  
   qemu-system-x86_64 -net
 
  nic,vlan=0,macaddr=00:16:3e:16:f4:f0 -net
 
   user,vlan=0 -hda /mnt/nfs/vmdisks/XPSP2-KVM.disk.xm
 
  -boot c -m 1024
 
   -no-acpi  -vnc :23 -incoming tcp://0:8002 -name
 
  XPSP2-KVM -smp 2 -monitor
 
   unix:/var/run/kvm/monitors/XPSP2-KVM,server,nowait
 
  -pidfile
 
   /var/run/kvm/pids/XPSP2-KVM -daemonize
  
  
  
  
  
  
  
   --
   To unsubscribe from this list: send the line
 
  unsubscribe kvm in
 
   the body of a message to [EMAIL PROTECTED]
   More majordomo info at
 
  http://vger.kernel.org/majordomo-info.html
 
 
  --
  To unsubscribe from this list: send the line
  unsubscribe kvm in
  the body of a message to [EMAIL PROTECTED]
  More majordomo info at
  http://vger.kernel.org/majordomo-info.html

 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to [EMAIL PROTECTED]
 More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: KVM Migration fails

2008-09-25 Thread Uri Lublin


jd wrote:

Hi
  I have a setup using shared nfs disks. When migration is attempted, it fails... any ideas on how to debug this..? 
  
/Jd


Details
===

migration: write failed (Connection reset by peer)^M
Migration failed! ret=0 error=9

Source : KVM-73, Cent OS 5.2, 64 bit.

qemu-system-x86_64 -net nic,vlan=0,macaddr=00:16:3e:16:f4:f0 -net user,vlan=0 
-hda /mnt/nfs/vmdisks/XPSP2-KVM.disk.xm -boot c -m 1024 -no-acpi  -vnc :22 
-name XPSP2-KVM -smp 2 -monitor 
unix:/var/run/kvm/monitors/XPSP2-KVM,server,nowait -pidfile  
/var/run/kvm/pids/XPSP2-KVM -daemonize



Dest   : KVM-70, Fedora 8, 64bit

qemu-system-x86_64 -net nic,vlan=0,macaddr=00:16:3e:16:f4:f0 -net user,vlan=0 -hda /mnt/nfs/vmdisks/XPSP2-KVM.disk.xm -boot c -m 1024 -no-acpi  -vnc :23 -incoming tcp://0:8002 -name XPSP2-KVM -smp 2 -monitor unix:/var/run/kvm/monitors/XPSP2-KVM,server,nowait -pidfile  /var/run/kvm/pids/XPSP2-KVM -daemonize 




Hi,

The error on the source is write failed  Many times that means the 
destination has exited due to an error on its side. Do you see any error message 
on the destination ?


Uri.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-25 Thread Jan Kiszka

Jan Kiszka wrote:
...
 Index: b/arch/x86/kvm/vmx.c
 ===
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -90,6 +90,11 @@ struct vcpu_vmx {
   } rmode;
   int vpid;
   bool emulation_required;
 +
 + /* Support for vnmi-less CPUs */
 + int soft_vnmi_blocked;
 + ktime_t entry_time;
 + s64 vnmi_blocked_time;

I meanwhile realized that these states (except entry_time) and probably
also arch.nmi_pending/injected are things that should be considered when
the vcpu state is saved and restored, right? What is the right interface
for this? An extension of kvm_sregs?

BTW, via which channel is GUEST_INTERRUPTIBILITY_INFO from the vmcs
saved/restored? I'm currently not seeing any related, CPU-specific code.
For NMI code, the virtual blocking bit would be relevant (if the CPU
supports it, of course), but I guess the other bits are also important
enough to let them survive.

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: KVM: PIC: enhance IPI avoidance

2008-09-25 Thread Avi Kivity


Marcelo Tosatti wrote:

True. Anything other potential problem you could think of?

  


No, so applied the patch.  Thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] VT-d: Fix iommu map page for mmio pages

2008-09-25 Thread Avi Kivity


Han, Weidong wrote:

Don't need to map mmio pages for iommu. When find mmio pages in
kvm_iommu_map_pages(), don't map them, and shouldn't return error due to
it's not an error. If return error (such as -EINVAL), device assigment
will fail.

  



I don't understand.  Why don't we need to map mmio pages?  We certainly 
don't want them emulated.



@@ -36,14 +36,13 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 {
gfn_t gfn = base_gfn;
pfn_t pfn;
-   int i, r;
+   int i, r = 0;
struct dmar_domain *domain = kvm-arch.intel_iommu_domain;
 
 	/* check if iommu exists and in use */

if (!domain)
return 0;
 
-	r = -EINVAL;

for (i = 0; i  npages; i++) {
/* check if already mapped */
pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
@@ -60,13 +59,14 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 DMA_PTE_READ |
 DMA_PTE_WRITE);
if (r) {
-   printk(KERN_DEBUG kvm_iommu_map_pages:
+   printk(KERN_ERR kvm_iommu_map_pages:
   iommu failed to map pfn=%lx\n,
pfn);
goto unmap_pages;
}
} else {
-   printk(KERN_DEBUG kvm_iommu_map_page:
-  invalid pfn=%lx\n, pfn);
+   printk(KERN_DEBUG kvm_iommu_map_pages:
+  invalid pfn=%lx, iommu needn't map 
+  MMIO pages!\n, pfn);
goto unmap_pages;
}


If a slot has a mix of mmio and non-mmio pages, you will unmap the 
non-mmio pages, yet return no error.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-25 Thread Avi Kivity


Jan Kiszka wrote:

Jan Kiszka wrote:
..
  

Index: b/arch/x86/kvm/vmx.c
===
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -90,6 +90,11 @@ struct vcpu_vmx {
} rmode;
int vpid;
bool emulation_required;
+
+   /* Support for vnmi-less CPUs */
+   int soft_vnmi_blocked;
+   ktime_t entry_time;
+   s64 vnmi_blocked_time;



I meanwhile realized that these states (except entry_time) and probably
also arch.nmi_pending/injected are things that should be considered when
the vcpu state is saved and restored, right? What is the right interface
for this? An extension of kvm_sregs?

  


kvm_sregs can't be extended because that would break the ABI, so we have 
to add a new ioctl.


I have some patches that allow ioctls to be extended, so if that's 
accepted, we can avoid the new ioctl.



BTW, via which channel is GUEST_INTERRUPTIBILITY_INFO from the vmcs
saved/restored? I'm currently not seeing any related, CPU-specific code.
  


Looks like it's missing.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 8/9] coalesce mmio regions with an explicit call

2008-09-25 Thread Glauber Costa

On Thu, Sep 25, 2008 at 10:08:53AM +0300, Avi Kivity wrote:
 Glauber Costa wrote:
 
  Any ideas about what's up for the other hypervisors that may (we hope) be 
  integrated
  in the future? Xen?

 
 Xen should benefit even more (much more).  IIRC Windows wouldn't boot
 since it was spending all its time context switching when the splash
 screen with its KITT bar was displayed, so they hacked something for
 vga, but nothing generic.
That's the point. It's not just a word play between qemu and kvm, because
if we introduce generic hooks that kvm happens to fill, but qemu not, other 
hypervirors
may (we hope) fill it in the future.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/4] Allow enabling kvm_trace on external module

2008-09-25 Thread Avi Kivity

Eduardo Habkost wrote:

From: Eduardo Habkost [EMAIL PROTECTED]
Date: Wed, 24 Sep 2008 14:11:42 -0300
Subject: Always generate config.kbuild

When implementing --with-kvm-trace, I supposed make would never enter
the 'kernel' directory when compiling with --with-patched-kernel. I was
wrong and broke --with-patched-kernel.

Change configure to always generate config.kbuild on the kernel
directory. Otherwise make will explode on 'make header-sync', that runs
even when --with-patched-kernel was used.

Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 01/39] KVM: VMX: Clean up magic number 0x66 in init_rmode_tss

2008-09-25 Thread Avi Kivity

From: Sheng Yang [EMAIL PROTECTED]

Signed-off-by: Sheng Yang [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/vmx.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ddb49e3..229e2d0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1732,7 +1732,8 @@ static int init_rmode_tss(struct kvm *kvm)
if (r  0)
goto out;
data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
-   r = kvm_write_guest_page(kvm, fn++, data, 0x66, sizeof(u16));
+   r = kvm_write_guest_page(kvm, fn++, data,
+   TSS_IOPB_BASE_OFFSET, sizeof(u16));
if (r  0)
goto out;
r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 03/39] KVM: set debug registers after schedulable section

2008-09-25 Thread Avi Kivity

From: Marcelo Tosatti [EMAIL PROTECTED]

The vcpu thread can be preempted after the guest_debug_pre() callback,
resulting in invalid debug registers on the new vcpu.

Move it inside the non-preemptable section.

Signed-off-by: Marcelo Tosatti [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/x86.c |9 -
 1 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f1b0223..4a03375 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3113,10 +3113,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
down_read(vcpu-kvm-slots_lock);
vapic_enter(vcpu);
 
-preempted:
-   if (vcpu-guest_debug.enabled)
-   kvm_x86_ops-guest_debug_pre(vcpu);
-
 again:
if (vcpu-requests)
if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, vcpu-requests))
@@ -3170,6 +3166,9 @@ again:
goto out;
}
 
+   if (vcpu-guest_debug.enabled)
+   kvm_x86_ops-guest_debug_pre(vcpu);
+
vcpu-guest_mode = 1;
/*
 * Make sure that guest_mode assignment won't happen after
@@ -3244,7 +3243,7 @@ out:
if (r  0) {
kvm_resched(vcpu);
down_read(vcpu-kvm-slots_lock);
-   goto preempted;
+   goto again;
}
 
post_kvm_run_save(vcpu, kvm_run);
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 04/39] KVM: VMX: Use interrupt queue for !irqchip_in_kernel

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/vmx.c |   11 +--
 1 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 229e2d0..81db7d4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2173,7 +2173,7 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
clear_bit(bit_index, vcpu-arch.irq_pending[word_index]);
if (!vcpu-arch.irq_pending[word_index])
clear_bit(word_index, vcpu-arch.irq_summary);
-   vmx_inject_irq(vcpu, irq);
+   kvm_queue_interrupt(vcpu, irq);
 }
 
 
@@ -2187,13 +2187,12 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)  3) == 0);
 
if (vcpu-arch.interrupt_window_open 
-   vcpu-arch.irq_summary 
-   !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD)  INTR_INFO_VALID_MASK))
-   /*
-* If interrupts enabled, and not blocked by sti or mov ss. 
Good.
-*/
+   vcpu-arch.irq_summary  !vcpu-arch.interrupt.pending)
kvm_do_inject_irq(vcpu);
 
+   if (vcpu-arch.interrupt_window_open  vcpu-arch.interrupt.pending)
+   vmx_inject_irq(vcpu, vcpu-arch.interrupt.nr);
+
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
if (!vcpu-arch.interrupt_window_open 
(vcpu-arch.irq_summary || kvm_run-request_interrupt_window))
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 06/39] KVM: fix i8259 reset irq acking

2008-09-25 Thread Avi Kivity

From: Marcelo Tosatti [EMAIL PROTECTED]

The irq ack during pic reset has three problems:

- Ignores slave/master PIC, using gsi 0-8 for both.
- Generates an ACK even if the APIC is in control.
- Depends upon IMR being clear, which is broken if the irq was masked
at the time it was generated.

The last one causes the BIOS to hang after the first reboot of
Windows installation, since PIT interrupts stop.

[avi: fix check whether pic interrupts are seen by cpu]

Signed-off-by: Marcelo Tosatti [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/i8259.c |   16 +++-
 1 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index de70499..71e3eee 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -195,13 +195,19 @@ int kvm_pic_read_irq(struct kvm *kvm)
 
 void kvm_pic_reset(struct kvm_kpic_state *s)
 {
-   int irq;
+   int irq, irqbase;
struct kvm *kvm = s-pics_state-irq_request_opaque;
+   struct kvm_vcpu *vcpu0 = kvm-vcpus[0];
 
-   for (irq = 0; irq  PIC_NUM_PINS; irq++) {
-   if (!(s-imr  (1  irq))  (s-irr  (1  irq) ||
-   s-isr  (1  irq)))
-   kvm_notify_acked_irq(kvm, irq);
+   if (s == s-pics_state-pics[0])
+   irqbase = 0;
+   else
+   irqbase = 8;
+
+   for (irq = 0; irq  PIC_NUM_PINS/2; irq++) {
+   if (vcpu0  kvm_apic_accept_pic_intr(vcpu0))
+   if (s-irr  (1  irq) || s-isr  (1  irq))
+   kvm_notify_acked_irq(kvm, irq+irqbase);
}
s-last_irr = 0;
s-irr = 0;
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 07/39] KVM: Handle spurious acks for PIT interrupts

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

Spurious acks can be generated, for example if the PIC is being reset.
Handle those acks gracefully rather than flooding the log with warnings.

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/i8254.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 7d04dd3..c842060 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -228,7 +228,7 @@ void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
 irq_ack_notifier);
spin_lock(ps-inject_lock);
if (atomic_dec_return(ps-pit_timer.pending)  0)
-   WARN_ON(1);
+   atomic_inc(ps-pit_timer.pending);
ps-irq_ack = 1;
spin_unlock(ps-inject_lock);
 }
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 05/39] KVM: Simplify exception entries by using __ASM_SIZE and _ASM_PTR

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 include/asm-x86/kvm_host.h |   13 ++---
 1 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 1161af1..982b6b2 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -734,15 +734,6 @@ enum {
TASK_SWITCH_GATE = 3,
 };
 
-
-#ifdef CONFIG_64BIT
-# define KVM_EX_ENTRY .quad
-# define KVM_EX_PUSH pushq
-#else
-# define KVM_EX_ENTRY .long
-# define KVM_EX_PUSH pushl
-#endif
-
 /*
  * Hardware virtualization extension instructions may fault if a
  * reboot turns off virtualization while processes are running.
@@ -754,11 +745,11 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
666:  insn \n\t \
.pushsection .fixup, \ax\ \n \
667: \n\t \
-   KVM_EX_PUSH  $666b \n\t \
+   __ASM_SIZE(push)  $666b \n\t\
jmp kvm_handle_fault_on_reboot \n\t \
.popsection \n\t \
.pushsection __ex_table, \a\ \n\t \
-   KVM_EX_ENTRY  666b, 667b \n\t \
+   _ASM_PTR  666b, 667b \n\t \
.popsection
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 10/39] KVM: VMX: Add module parameter and emulation flag.

2008-09-25 Thread Avi Kivity

From: Mohammed Gamal [EMAIL PROTECTED]

The patch adds the module parameter required to enable emulating invalid
guest state, as well as the emulation_required flag used to drive
emulation whenever needed.

Signed-off-by: Mohammed Gamal [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/vmx.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e889b76..7c5f611 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -49,6 +49,9 @@ module_param(flexpriority_enabled, bool, 0);
 static int enable_ept = 1;
 module_param(enable_ept, bool, 0);
 
+static int emulate_invalid_guest_state = 0;
+module_param(emulate_invalid_guest_state, bool, 0);
+
 struct vmcs {
u32 revision_id;
u32 abort;
@@ -86,6 +89,7 @@ struct vcpu_vmx {
} irq;
} rmode;
int vpid;
+   bool emulation_required;
 };
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 11/39] KVM: VMX: Add invalid guest state handler

2008-09-25 Thread Avi Kivity

From: Mohammed Gamal [EMAIL PROTECTED]

This adds the invalid guest state handler function which invokes the x86
emulator until getting the guest to a VMX-friendly state.

[avi: leave atomic context if scheduling]
[guillaume: return to atomic context correctly]

Signed-off-by: Laurent Vivier [EMAIL PROTECTED]
Signed-off-by: Guillaume Thouvenin [EMAIL PROTECTED]
Signed-off-by: Mohammed Gamal [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/vmx.c |   37 +
 1 files changed, 37 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7c5f611..eae1f2c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2892,6 +2892,43 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, 
struct kvm_run *kvm_run)
return 1;
 }
 
+static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
+   struct kvm_run *kvm_run)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+   int err;
+
+   preempt_enable();
+   local_irq_enable();
+
+   while (!guest_state_valid(vcpu)) {
+   err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
+
+   switch (err) {
+   case EMULATE_DONE:
+   break;
+   case EMULATE_DO_MMIO:
+   kvm_report_emulation_failure(vcpu, mmio);
+   /* TODO: Handle MMIO */
+   return;
+   default:
+   kvm_report_emulation_failure(vcpu, emulation 
failure);
+   return;
+   }
+
+   if (signal_pending(current))
+   break;
+   if (need_resched())
+   schedule();
+   }
+
+   local_irq_disable();
+   preempt_disable();
+
+   /* Guest state should be valid now, no more emulation should be needed 
*/
+   vmx-emulation_required = 0;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 12/39] KVM: VMX: Modify mode switching and vmentry functions

2008-09-25 Thread Avi Kivity

From: Mohammed Gamal [EMAIL PROTECTED]

This patch modifies mode switching and vmentry function in order to
drive invalid guest state emulation.

Signed-off-by: Mohammed Gamal [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/vmx.c |   20 
 1 files changed, 20 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index eae1f2c..9840f37 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1298,7 +1298,9 @@ static void fix_pmode_dataseg(int seg, struct 
kvm_save_segment *save)
 static void enter_pmode(struct kvm_vcpu *vcpu)
 {
unsigned long flags;
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+   vmx-emulation_required = 1;
vcpu-arch.rmode.active = 0;
 
vmcs_writel(GUEST_TR_BASE, vcpu-arch.rmode.tr.base);
@@ -1315,6 +1317,9 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 
update_exception_bitmap(vcpu);
 
+   if (emulate_invalid_guest_state)
+   return;
+
fix_pmode_dataseg(VCPU_SREG_ES, vcpu-arch.rmode.es);
fix_pmode_dataseg(VCPU_SREG_DS, vcpu-arch.rmode.ds);
fix_pmode_dataseg(VCPU_SREG_GS, vcpu-arch.rmode.gs);
@@ -1355,7 +1360,9 @@ static void fix_rmode_seg(int seg, struct 
kvm_save_segment *save)
 static void enter_rmode(struct kvm_vcpu *vcpu)
 {
unsigned long flags;
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+   vmx-emulation_required = 1;
vcpu-arch.rmode.active = 1;
 
vcpu-arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
@@ -1377,6 +1384,9 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
update_exception_bitmap(vcpu);
 
+   if (emulate_invalid_guest_state)
+   goto continue_rmode;
+
vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE)  4);
vmcs_write32(GUEST_SS_LIMIT, 0x);
vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
@@ -1392,6 +1402,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
fix_rmode_seg(VCPU_SREG_GS, vcpu-arch.rmode.gs);
fix_rmode_seg(VCPU_SREG_FS, vcpu-arch.rmode.fs);
 
+continue_rmode:
kvm_mmu_reset_context(vcpu);
init_rmode(vcpu-kvm);
 }
@@ -2317,6 +2328,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 
ret = 0;
 
+   /* HACK: Don't enable emulation on guest boot/reset */
+   vmx-emulation_required = 0;
+
 out:
up_read(vcpu-kvm-slots_lock);
return ret;
@@ -3190,6 +3204,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 intr_info;
 
+   /* Handle invalid guest state instead of entering VMX */
+   if (vmx-emulation_required  emulate_invalid_guest_state) {
+   handle_invalid_guest_state(vcpu, kvm_run);
+   return;
+   }
+
if (test_bit(VCPU_REGS_RSP, (unsigned long *)vcpu-arch.regs_dirty))
vmcs_writel(GUEST_RSP, vcpu-arch.regs[VCPU_REGS_RSP]);
if (test_bit(VCPU_REGS_RIP, (unsigned long *)vcpu-arch.regs_dirty))
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 15/39] KVM: make irq ack notifier functions static

2008-09-25 Thread Avi Kivity

From: Harvey Harrison [EMAIL PROTECTED]

sparse says:

arch/x86/kvm/x86.c:107:32: warning: symbol 'kvm_find_assigned_dev' was not 
declared. Should it be static?
arch/x86/kvm/i8254.c:225:6: warning: symbol 'kvm_pit_ack_irq' was not declared. 
Should it be static?

Signed-off-by: Harvey Harrison [EMAIL PROTECTED]
Signed-off-by: Andrew Morton [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/i8254.c |2 +-
 arch/x86/kvm/x86.c   |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index fdaa0f0..4cb4430 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -222,7 +222,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu)
return 0;
 }
 
-void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
+static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
 {
struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
 irq_ack_notifier);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5b3c882..22edd95 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -104,7 +104,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
 };
 
-struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
+static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head 
*head,
  int assigned_dev_id)
 {
struct list_head *ptr;
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 16/39] KVM: ia64: add a dummy irq ack notification

2008-09-25 Thread Avi Kivity

From: Xiantao Zhang [EMAIL PROTECTED]

Before enabling notify_acked_irq for ia64, leave the related APIs as
nop-op first.

Signed-off-by: Xiantao Zhang [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/ia64/kvm/irq.h |   32 
 virt/kvm/ioapic.c   |2 +-
 2 files changed, 33 insertions(+), 1 deletions(-)
 create mode 100644 arch/ia64/kvm/irq.h

diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
new file mode 100644
index 000..f2e6545
--- /dev/null
+++ b/arch/ia64/kvm/irq.h
@@ -0,0 +1,32 @@
+/*
+ * irq.h: In-kernel interrupt controller related definitions
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Authors:
+ *   Xiantao Zhang [EMAIL PROTECTED]
+ *
+ */
+
+#ifndef __IRQ_H
+#define __IRQ_H
+
+struct kvm;
+
+static inline void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi)
+{
+}
+
+#endif
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 515cd7c..53772bb 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -386,7 +386,7 @@ static void ioapic_mmio_write(struct kvm_io_device *this, 
gpa_t addr, int len,
break;
 #ifdef CONFIG_IA64
case IOAPIC_REG_EOI:
-   kvm_ioapic_update_eoi(ioapic-kvm, data);
+   kvm_ioapic_update_eoi(ioapic-kvm, data, IOAPIC_LEVEL_TRIG);
break;
 #endif
 
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 17/39] KVM: VMX: Change cs reset state to be a data segment

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

Real mode cs is a data segment, not a code segment.

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/vmx.c |3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9840f37..6aa305a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2239,6 +2239,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 
fx_init(vmx-vcpu);
 
+   seg_setup(VCPU_SREG_CS);
/*
 * GUEST_CS_BASE should really be 0x, but VT vm86 mode
 * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR  4.  Sigh.
@@ -2250,8 +2251,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmcs_write16(GUEST_CS_SELECTOR, vmx-vcpu.arch.sipi_vector  
8);
vmcs_writel(GUEST_CS_BASE, vmx-vcpu.arch.sipi_vector  12);
}
-   vmcs_write32(GUEST_CS_LIMIT, 0x);
-   vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
 
seg_setup(VCPU_SREG_DS);
seg_setup(VCPU_SREG_ES);
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 18/39] KVM: VMX: Change segment dpl at reset to 3

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

This is more emulation friendly, if not 100% correct.

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/vmx.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6aa305a..71e57ae 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1991,7 +1991,7 @@ static void seg_setup(int seg)
vmcs_write16(sf-selector, 0);
vmcs_writel(sf-base, 0);
vmcs_write32(sf-limit, 0x);
-   vmcs_write32(sf-ar_bytes, 0x93);
+   vmcs_write32(sf-ar_bytes, 0xf3);
 }
 
 static int alloc_apic_access_page(struct kvm *kvm)
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 19/39] KVM: Load real mode segments correctly

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

Real mode segments to not reference the GDT or LDT; they simply compute
base = selector * 16.

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/x86.c |   22 ++
 1 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 22edd95..bfc7c33 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3588,11 +3588,33 @@ static int load_segment_descriptor_to_kvm_desct(struct 
kvm_vcpu *vcpu,
return 0;
 }
 
+int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
+{
+   struct kvm_segment segvar = {
+   .base = selector  4,
+   .limit = 0x,
+   .selector = selector,
+   .type = 3,
+   .present = 1,
+   .dpl = 3,
+   .db = 0,
+   .s = 1,
+   .l = 0,
+   .g = 0,
+   .avl = 0,
+   .unusable = 0,
+   };
+   kvm_x86_ops-set_segment(vcpu, segvar, seg);
+   return 0;
+}
+
 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
int type_bits, int seg)
 {
struct kvm_segment kvm_seg;
 
+   if (!(vcpu-arch.cr0  X86_CR0_PE))
+   return kvm_load_realmode_segment(vcpu, selector, seg);
if (load_segment_descriptor_to_kvm_desct(vcpu, selector, kvm_seg))
return 1;
kvm_seg.type |= type_bits;
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 22/39] KVM: MMU: Move SHADOW_PT_INDEX to mmu.c

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

It is not specific to the paging mode, so can be made global (and reusable).

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/mmu.c |2 ++
 arch/x86/kvm/paging_tmpl.h |3 ---
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 171bcea..51d4cd7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -135,6 +135,8 @@ module_param(dbg, bool, 0644);
 #define ACC_USER_MASKPT_USER_MASK
 #define ACC_ALL  (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
 
+#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
+
 struct kvm_rmap_desc {
u64 *shadow_ptes[RMAP_EXT];
struct kvm_rmap_desc *more;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 4a814bf..ebb26a0 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -29,7 +29,6 @@
#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
-   #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
#define PT_LEVEL_BITS PT64_LEVEL_BITS
#ifdef CONFIG_X86_64
@@ -46,7 +45,6 @@
#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
-   #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
#define PT_LEVEL_BITS PT32_LEVEL_BITS
#define PT_MAX_FULL_LEVELS 2
@@ -504,7 +502,6 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
 #undef FNAME
 #undef PT_BASE_ADDR_MASK
 #undef PT_INDEX
-#undef SHADOW_PT_INDEX
 #undef PT_LEVEL_MASK
 #undef PT_DIR_BASE_ADDR_MASK
 #undef PT_LEVEL_BITS
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 25/39] KVM: MMU: Infer shadow root level in direct_map()

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

In all cases the shadow root level is available in mmu.shadow_root_level,
so there is no need to pass it as a parameter.

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/mmu.c |9 -
 1 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3ee856f..72f739a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1227,11 +1227,11 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
 }
 
 static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
-  int largepage, gfn_t gfn, pfn_t pfn,
-  int level)
+   int largepage, gfn_t gfn, pfn_t pfn)
 {
hpa_t table_addr = vcpu-arch.mmu.root_hpa;
int pt_write = 0;
+   int level = vcpu-arch.mmu.shadow_root_level;
 
for (; ; level--) {
u32 index = PT64_INDEX(v, level);
@@ -1299,8 +1299,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, 
int write, gfn_t gfn)
if (mmu_notifier_retry(vcpu, mmu_seq))
goto out_unlock;
kvm_mmu_free_some_pages(vcpu);
-   r = __direct_map(vcpu, v, write, largepage, gfn, pfn,
-PT32E_ROOT_LEVEL);
+   r = __direct_map(vcpu, v, write, largepage, gfn, pfn);
spin_unlock(vcpu-kvm-mmu_lock);
 
 
@@ -1455,7 +1454,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t 
gpa,
goto out_unlock;
kvm_mmu_free_some_pages(vcpu);
r = __direct_map(vcpu, gpa, error_code  PFERR_WRITE_MASK,
-largepage, gfn, pfn, kvm_x86_ops-get_tdp_level());
+largepage, gfn, pfn);
spin_unlock(vcpu-kvm-mmu_lock);
 
return r;
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 24/39] KVM: ia64: Enable virtio driver for ia64 in Kconfig

2008-09-25 Thread Avi Kivity

From: Xiantao Zhang [EMAIL PROTECTED]

kvm/ia64 uses the virtio drivers to optimize its I/O subsytem.

Signed-off-by: Xiantao Zhang [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/ia64/kvm/Kconfig |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 7914e48..8e99fed 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -46,4 +46,6 @@ config KVM_INTEL
 config KVM_TRACE
bool
 
+source drivers/virtio/Kconfig
+
 endif # VIRTUALIZATION
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 28/39] KVM: MMU: Convert the paging mode shadow walk to use the generic walker

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/paging_tmpl.h |  158 
 1 files changed, 86 insertions(+), 72 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ebb26a0..b7064e1 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -25,6 +25,7 @@
 #if PTTYPE == 64
#define pt_element_t u64
#define guest_walker guest_walker64
+   #define shadow_walker shadow_walker64
#define FNAME(name) paging##64_##name
#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
@@ -41,6 +42,7 @@
 #elif PTTYPE == 32
#define pt_element_t u32
#define guest_walker guest_walker32
+   #define shadow_walker shadow_walker32
#define FNAME(name) paging##32_##name
#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
@@ -71,6 +73,17 @@ struct guest_walker {
u32 error_code;
 };
 
+struct shadow_walker {
+   struct kvm_shadow_walk walker;
+   struct guest_walker *guest_walker;
+   int user_fault;
+   int write_fault;
+   int largepage;
+   int *ptwrite;
+   pfn_t pfn;
+   u64 *sptep;
+};
+
 static gfn_t gpte_to_gfn(pt_element_t gpte)
 {
return (gpte  PT_BASE_ADDR_MASK)  PAGE_SHIFT;
@@ -272,86 +285,86 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, 
struct kvm_mmu_page *page,
 /*
  * Fetch a shadow pte for a specific level in the paging hierarchy.
  */
-static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
-struct guest_walker *walker,
-int user_fault, int write_fault, int largepage,
-int *ptwrite, pfn_t pfn)
+static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
+   struct kvm_vcpu *vcpu, gva_t addr,
+   u64 *sptep, int level)
 {
-   hpa_t shadow_addr;
-   int level;
-   u64 *shadow_ent;
-   unsigned access = walker-pt_access;
-
-   if (!is_present_pte(walker-ptes[walker-level - 1]))
-   return NULL;
-
-   shadow_addr = vcpu-arch.mmu.root_hpa;
-   level = vcpu-arch.mmu.shadow_root_level;
-   if (level == PT32E_ROOT_LEVEL) {
-   shadow_addr = vcpu-arch.mmu.pae_root[(addr  30)  3];
-   shadow_addr = PT64_BASE_ADDR_MASK;
-   --level;
+   struct shadow_walker *sw =
+   container_of(_sw, struct shadow_walker, walker);
+   struct guest_walker *gw = sw-guest_walker;
+   unsigned access = gw-pt_access;
+   struct kvm_mmu_page *shadow_page;
+   u64 spte;
+   int metaphysical;
+   gfn_t table_gfn;
+   int r;
+   pt_element_t curr_pte;
+
+   if (level == PT_PAGE_TABLE_LEVEL
+   || (sw-largepage  level == PT_DIRECTORY_LEVEL)) {
+   mmu_set_spte(vcpu, sptep, access, gw-pte_access  access,
+sw-user_fault, sw-write_fault,
+gw-ptes[gw-level-1]  PT_DIRTY_MASK,
+sw-ptwrite, sw-largepage, gw-gfn, sw-pfn,
+false);
+   sw-sptep = sptep;
+   return 1;
}
 
-   for (; ; level--) {
-   u32 index = SHADOW_PT_INDEX(addr, level);
-   struct kvm_mmu_page *shadow_page;
-   u64 shadow_pte;
-   int metaphysical;
-   gfn_t table_gfn;
-
-   shadow_ent = ((u64 *)__va(shadow_addr)) + index;
-   if (level == PT_PAGE_TABLE_LEVEL)
-   break;
-
-   if (largepage  level == PT_DIRECTORY_LEVEL)
-   break;
-
-   if (is_shadow_present_pte(*shadow_ent)
-!is_large_pte(*shadow_ent)) {
-   shadow_addr = *shadow_ent  PT64_BASE_ADDR_MASK;
-   continue;
-   }
+   if (is_shadow_present_pte(*sptep)  !is_large_pte(*sptep))
+   return 0;
 
-   if (is_large_pte(*shadow_ent))
-   rmap_remove(vcpu-kvm, shadow_ent);
-
-   if (level - 1 == PT_PAGE_TABLE_LEVEL
-walker-level == PT_DIRECTORY_LEVEL) {
-   metaphysical = 1;
-   if (!is_dirty_pte(walker-ptes[level - 1]))
-   access = ~ACC_WRITE_MASK;
-   table_gfn = gpte_to_gfn(walker-ptes[level - 1]);
-   } else {
-   metaphysical = 0;
-   table_gfn = walker-table_gfn[level - 2];
-   }
-   shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
-  metaphysical, access,
-

[PATCH 29/39] KVM: Allocate guest memory as MAP_PRIVATE, not MAP_SHARED

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

There is no reason to share internal memory slots with fork()ed instances.

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/x86.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bfc7c33..675d010 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4296,7 +4296,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
userspace_addr = do_mmap(NULL, 0,
 npages * PAGE_SIZE,
 PROT_READ | PROT_WRITE,
-MAP_SHARED | MAP_ANONYMOUS,
+MAP_PRIVATE | MAP_ANONYMOUS,
 0);
up_write(current-mm-mmap_sem);
 
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 32/39] KVM: MMU: Account for npt/ept/realmode page faults

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

Now that two-dimensional paging is becoming common, account for tdp page
faults.

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/mmu.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a1ca4ff..a24da8f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1283,6 +1283,7 @@ static int direct_map_entry(struct kvm_shadow_walk *_walk,
mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
 0, walk-write, 1, walk-pt_write,
 walk-largepage, gfn, walk-pfn, false);
+   ++vcpu-stat.pf_fixed;
return 1;
}
 
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 30/39] KVM: Don't call get_user_pages(.force = 1)

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

This is esoteric and only needed to break COW on MAP_SHARED mappings.  Since
KVM no longer does these sorts of mappings, breaking COW on them is no longer
necessary.

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 virt/kvm/kvm_main.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0309571..de3b029 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -734,7 +734,7 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
return page_to_pfn(bad_page);
}
 
-   npages = get_user_pages(current, current-mm, addr, 1, 1, 1, page,
+   npages = get_user_pages(current, current-mm, addr, 1, 1, 0, page,
NULL);
 
if (unlikely(npages != 1)) {
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 33/39] KVM: MMU: Add locking around kvm_mmu_slot_remove_write_access()

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

It was generally safe due to slots_lock being held for write, but it wasn't
very nice.

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/mmu.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a24da8f..5052acd 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2097,6 +2097,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, 
int slot)
 {
struct kvm_mmu_page *sp;
 
+   spin_lock(kvm-mmu_lock);
list_for_each_entry(sp, kvm-arch.active_mmu_pages, link) {
int i;
u64 *pt;
@@ -2110,6 +2111,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, 
int slot)
if (pt[i]  PT_WRITABLE_MASK)
pt[i] = ~PT_WRITABLE_MASK;
}
+   spin_unlock(kvm-mmu_lock);
 }
 
 void kvm_mmu_zap_all(struct kvm *kvm)
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 31/39] KVM: x86 emulator: Add mov r, imm instructions (opcodes 0xb0-0xbf)

2008-09-25 Thread Avi Kivity

From: Mohammed Gamal [EMAIL PROTECTED]

The emulator only supported one instance of mov r, imm instruction
(opcode 0xb8), this adds the rest of these instructions.

Signed-off-by: Mohammed Gamal [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/x86_emulate.c |   15 +++
 1 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index ae30435..66e0bd6 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -154,9 +154,16 @@ static u16 opcode_table[256] = {
0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
ByteOp | ImplicitOps | String, ImplicitOps | String,
-   /* 0xB0 - 0xBF */
-   0, 0, 0, 0, 0, 0, 0, 0,
-   DstReg | SrcImm | Mov, 0, 0, 0, 0, 0, 0, 0,
+   /* 0xB0 - 0xB7 */
+   ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
+   ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
+   ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
+   ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
+   /* 0xB8 - 0xBF */
+   DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
+   DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
+   DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
+   DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
/* 0xC0 - 0xC7 */
ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
0, ImplicitOps | Stack, 0, 0,
@@ -1660,7 +1667,7 @@ special_insn:
case 0xae ... 0xaf: /* scas */
DPRINTF(Urk! I don't handle SCAS.\n);
goto cannot_emulate;
-   case 0xb8: /* mov r, imm */
+   case 0xb0 ... 0xbf: /* mov r, imm */
goto mov;
case 0xc0 ... 0xc1:
emulate_grp2(ctxt);
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 34/39] KVM: MMU: Flush tlbs after clearing write permission when accessing dirty log

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

Otherwise, the cpu may allow writes to the tracked pages, and we lose
some display bits or fail to migrate correctly.

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/mmu.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5052acd..853a288 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2111,6 +2111,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, 
int slot)
if (pt[i]  PT_WRITABLE_MASK)
pt[i] = ~PT_WRITABLE_MASK;
}
+   kvm_flush_remote_tlbs(kvm);
spin_unlock(kvm-mmu_lock);
 }
 
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 27/39] KVM: MMU: Convert direct maps to use the generic shadow walker

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/mmu.c |   93 ++-
 1 files changed, 55 insertions(+), 38 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8b95cf7..a1ca4ff 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1260,49 +1260,66 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
 {
 }
 
-static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
-   int largepage, gfn_t gfn, pfn_t pfn)
-{
-   hpa_t table_addr = vcpu-arch.mmu.root_hpa;
-   int pt_write = 0;
-   int level = vcpu-arch.mmu.shadow_root_level;
-
-   for (; ; level--) {
-   u32 index = PT64_INDEX(v, level);
-   u64 *table;
+struct direct_shadow_walk {
+   struct kvm_shadow_walk walker;
+   pfn_t pfn;
+   int write;
+   int largepage;
+   int pt_write;
+};
 
-   ASSERT(VALID_PAGE(table_addr));
-   table = __va(table_addr);
+static int direct_map_entry(struct kvm_shadow_walk *_walk,
+   struct kvm_vcpu *vcpu,
+   gva_t addr, u64 *sptep, int level)
+{
+   struct direct_shadow_walk *walk =
+   container_of(_walk, struct direct_shadow_walk, walker);
+   struct kvm_mmu_page *sp;
+   gfn_t pseudo_gfn;
+   gfn_t gfn = addr  PAGE_SHIFT;
+
+   if (level == PT_PAGE_TABLE_LEVEL
+   || (walk-largepage  level == PT_DIRECTORY_LEVEL)) {
+   mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
+0, walk-write, 1, walk-pt_write,
+walk-largepage, gfn, walk-pfn, false);
+   return 1;
+   }
 
-   if (level == 1 || (largepage  level == 2)) {
-   mmu_set_spte(vcpu, table[index], ACC_ALL, ACC_ALL,
-0, write, 1, pt_write, largepage,
-gfn, pfn, false);
-   return pt_write;
+   if (*sptep == shadow_trap_nonpresent_pte) {
+   pseudo_gfn = (addr  PT64_DIR_BASE_ADDR_MASK)  PAGE_SHIFT;
+   sp = kvm_mmu_get_page(vcpu, pseudo_gfn, addr, level - 1,
+ 1, ACC_ALL, sptep);
+   if (!sp) {
+   pgprintk(nonpaging_map: ENOMEM\n);
+   kvm_release_pfn_clean(walk-pfn);
+   return -ENOMEM;
}
 
-   if (table[index] == shadow_trap_nonpresent_pte) {
-   struct kvm_mmu_page *new_table;
-   gfn_t pseudo_gfn;
-
-   pseudo_gfn = (v  PT64_DIR_BASE_ADDR_MASK)
-PAGE_SHIFT;
-   new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
-v, level - 1,
-1, ACC_ALL, table[index]);
-   if (!new_table) {
-   pgprintk(nonpaging_map: ENOMEM\n);
-   kvm_release_pfn_clean(pfn);
-   return -ENOMEM;
-   }
-
-   set_shadow_pte(table[index],
-  __pa(new_table-spt)
-  | PT_PRESENT_MASK | PT_WRITABLE_MASK
-  | shadow_user_mask | shadow_x_mask);
-   }
-   table_addr = table[index]  PT64_BASE_ADDR_MASK;
+   set_shadow_pte(sptep,
+  __pa(sp-spt)
+  | PT_PRESENT_MASK | PT_WRITABLE_MASK
+  | shadow_user_mask | shadow_x_mask);
}
+   return 0;
+}
+
+static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
+   int largepage, gfn_t gfn, pfn_t pfn)
+{
+   int r;
+   struct direct_shadow_walk walker = {
+   .walker = { .entry = direct_map_entry, },
+   .pfn = pfn,
+   .largepage = largepage,
+   .write = write,
+   .pt_write = 0,
+   };
+
+   r = walk_shadow(walker.walker, vcpu, (gva_t)gfn  PAGE_SHIFT);
+   if (r  0)
+   return r;
+   return walker.pt_write;
 }
 
 static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 35/39] KVM: MMU: Fix setting the accessed bit on non-speculative sptes

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

The accessed bit was accidentally turned on in a random flag word, rather
than, the spte itself, which was lucky, since it used the non-EPT compatible
PT_ACCESSED_MASK.

Fix by turning the bit on in the spte and changing it to use the portable
accessed mask.

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/mmu.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 853a288..866d713 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1192,7 +1192,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 
*shadow_pte,
 */
spte = shadow_base_present_pte | shadow_dirty_mask;
if (!speculative)
-   pte_access |= PT_ACCESSED_MASK;
+   spte |= shadow_accessed_mask;
if (!dirty)
pte_access = ~ACC_WRITE_MASK;
if (pte_access  ACC_EXEC_MASK)
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 23/39] KVM: MMU: Unify direct map 4K and large page paths

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

The two paths are equivalent except for one argument, which is already
available.  Merge the two codepaths.

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/mmu.c |   11 +++
 1 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 51d4cd7..3ee856f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1240,15 +1240,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, 
int write,
ASSERT(VALID_PAGE(table_addr));
table = __va(table_addr);
 
-   if (level == 1) {
+   if (level == 1 || (largepage  level == 2)) {
mmu_set_spte(vcpu, table[index], ACC_ALL, ACC_ALL,
-0, write, 1, pt_write, 0, gfn, pfn, 
false);
-   return pt_write;
-   }
-
-   if (largepage  level == 2) {
-   mmu_set_spte(vcpu, table[index], ACC_ALL, ACC_ALL,
-0, write, 1, pt_write, 1, gfn, pfn, 
false);
+0, write, 1, pt_write, largepage,
+gfn, pfn, false);
return pt_write;
}
 
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 36/39] KVM: SVM: No need to unprotect memory during event injection when using npt

2008-09-25 Thread Avi Kivity

From: Avi Kivity [EMAIL PROTECTED]

No memory is protected anyway.

Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/svm.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index be86c09..6022888 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1021,7 +1021,7 @@ static int pf_interception(struct vcpu_svm *svm, struct 
kvm_run *kvm_run)
if (npt_enabled)
svm_flush_tlb(svm-vcpu);
 
-   if (event_injection)
+   if (!npt_enabled  event_injection)
kvm_mmu_unprotect_page_virt(svm-vcpu, fault_address);
return kvm_mmu_page_fault(svm-vcpu, fault_address, error_code);
 }
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 37/39] KVM: add MC5_MISC msr read support

2008-09-25 Thread Avi Kivity

From: Joerg Roedel [EMAIL PROTECTED]

Currently KVM implements MC0-MC4_MISC read support. When booting Linux this
results in KVM warnings in the kernel log when the guest tries to read
MC5_MISC. Fix this warnings with this patch.

Signed-off-by: Joerg Roedel [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/x86/kvm/x86.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 675d010..e3b8966 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -991,6 +991,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 
*pdata)
case MSR_IA32_MC0_MISC+8:
case MSR_IA32_MC0_MISC+12:
case MSR_IA32_MC0_MISC+16:
+   case MSR_IA32_MC0_MISC+20:
case MSR_IA32_UCODE_REV:
case MSR_IA32_EBL_CR_POWERON:
case MSR_IA32_DEBUGCTLMSR:
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 02/39] KVM: remove unused field from the assigned dev struct

2008-09-25 Thread Avi Kivity

From: Ben-Ami Yassour [EMAIL PROTECTED]

Remove unused field: struct kvm_assigned_pci_dev assigned_dev
from struct: struct kvm_assigned_dev_kernel

Signed-off-by: Ben-Ami Yassour [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 include/asm-x86/kvm_host.h |1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index fb7d7b7..1161af1 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -341,7 +341,6 @@ struct kvm_assigned_dev_kernel {
struct kvm_irq_ack_notifier ack_notifier;
struct work_struct interrupt_work;
struct list_head list;
-   struct kvm_assigned_pci_dev assigned_dev;
int assigned_dev_id;
int host_busnr;
int host_devfn;
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 38/39] KVM: s390: Make facility bits future-proof

2008-09-25 Thread Avi Kivity

From: Christian Borntraeger [EMAIL PROTECTED]

Heiko Carstens pointed out, that its safer to activate working facilities
instead of disabling problematic facilities. The new code uses the host
facility bits and masks it with known good ones.

Signed-off-by: Christian Borntraeger [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/s390/kvm/priv.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index d1faf5c..cce40ff 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -157,8 +157,8 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
int rc;
 
vcpu-stat.instruction_stfl++;
-   facility_list = ~(1UL24); /* no stfle */
-   facility_list = ~(1UL23); /* no large pages */
+   /* only pass the facility bits, which we can handle */
+   facility_list = 0xfe00fff3;
 
rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
   facility_list, sizeof(facility_list));
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 39/39] KVM: s390: change help text of guest Kconfig

2008-09-25 Thread Avi Kivity

From: Christian Borntraeger [EMAIL PROTECTED]

The current help text for CONFIG_S390_GUEST is not very helpful.
Lets add more text.

Signed-off-by: Christian Borntraeger [EMAIL PROTECTED]
Signed-off-by: Avi Kivity [EMAIL PROTECTED]
---
 arch/s390/Kconfig |7 +--
 1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8d41908..c9bfed9 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -564,13 +564,16 @@ config ZFCPDUMP
  Refer to file:Documentation/s390/zfcpdump.txt for more details on 
this.
 
 config S390_GUEST
-bool s390 guest support (EXPERIMENTAL)
+bool s390 guest support for KVM (EXPERIMENTAL)
depends on 64BIT  EXPERIMENTAL
select VIRTIO
select VIRTIO_RING
select VIRTIO_CONSOLE
help
- Select this option if you want to run the kernel under s390 linux
+ Select this option if you want to run the kernel as a guest under
+ the KVM hypervisor. This will add detection for KVM as well  as a
+ virtio transport. If KVM is detected, the virtio console will be
+ the default console.
 endmenu
 
 source net/Kconfig
-- 
1.6.0.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH] VT-d: Fix iommu map page for mmio pages

2008-09-25 Thread Han, Weidong

Avi Kivity wrote:
 Han, Weidong wrote:
 Don't need to map mmio pages for iommu. When find mmio pages in
 kvm_iommu_map_pages(), don't map them, and shouldn't return error
 due to it's not an error. If return error (such as -EINVAL), device
 assigment will fail. 
 
 
 
 
 I don't understand.  Why don't we need to map mmio pages?  We
 certainly don't want them emulated.

mmio pages need not to be mapped in VT-d page table, which only
translate DMA addresses. Amit's userspace patch register memslot for
mmios of assigned devices, it doesn't emulate them.

 
 @@ -36,14 +36,13 @@ int kvm_iommu_map_pages(struct kvm *kvm,  {
  gfn_t gfn = base_gfn;
  pfn_t pfn;
 -int i, r;
 +int i, r = 0;
  struct dmar_domain *domain = kvm-arch.intel_iommu_domain;
 
  /* check if iommu exists and in use */
  if (!domain)
  return 0;
 
 -r = -EINVAL;
  for (i = 0; i  npages; i++) {
  /* check if already mapped */
  pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
 @@ -60,13 +59,14 @@ int kvm_iommu_map_pages(struct kvm *kvm,

   DMA_PTE_READ |
DMA_PTE_WRITE);
  if (r) {
 -printk(KERN_DEBUG kvm_iommu_map_pages:
 +printk(KERN_ERR kvm_iommu_map_pages:
 iommu failed to map pfn=%lx\n,
 pfn);
  goto unmap_pages;
  }
  } else {
 -printk(KERN_DEBUG kvm_iommu_map_page:
 -   invalid pfn=%lx\n, pfn);
 +printk(KERN_DEBUG kvm_iommu_map_pages:
 +   invalid pfn=%lx, iommu needn't map 
 +   MMIO pages!\n, pfn);
  goto unmap_pages;
  }
 
 If a slot has a mix of mmio and non-mmio pages, you will unmap the
 non-mmio pages, yet return no error.
 

I didn't consider this mix case. In this mix case, we don't goto
unmap_pages, actually we should remove else {} block. That maps non-mmio
pages while don't map mmio pages. I will resend the patch.

Randy (Weidong)

 --
 error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH] VT-d: Fix iommu map page for mmio pages

2008-09-25 Thread Han, Weidong

Avi Kivity wrote:
 Han, Weidong wrote:
 Avi Kivity wrote:
 
 Han, Weidong wrote:
 
 Don't need to map mmio pages for iommu. When find mmio pages in
 kvm_iommu_map_pages(), don't map them, and shouldn't return error
 due to it's not an error. If return error (such as -EINVAL),
 device assigment will fail. 
 
 
 
 I don't understand.  Why don't we need to map mmio pages?  We
 certainly don't want them emulated.
 
 
 mmio pages need not to be mapped in VT-d page table, which only
 translate DMA addresses.
 
 Right, I forgot the iommu is only for dma, not cpu accesses.
 
 I suppose one could DMA into an mmio page.  Is there a reason not to
 map? 

Is it possible DMA into an mmio page? If yes, we also need to map mmio
pages, and is_mmio_pfn() check is not neccessary here.

Randy (Weidong)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 10/11] VMX: work around lacking VNMI support

2008-09-25 Thread Jan Kiszka

Avi Kivity wrote:
 Jan Kiszka wrote:
 Jan Kiszka wrote:
 ..
  
 Index: b/arch/x86/kvm/vmx.c
 ===
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -90,6 +90,11 @@ struct vcpu_vmx {
  } rmode;
  int vpid;
  bool emulation_required;
 +
 +/* Support for vnmi-less CPUs */
 +int soft_vnmi_blocked;
 +ktime_t entry_time;
 +s64 vnmi_blocked_time;
 

 I meanwhile realized that these states (except entry_time) and probably
 also arch.nmi_pending/injected are things that should be considered when
 the vcpu state is saved and restored, right? What is the right interface
 for this? An extension of kvm_sregs?

   
 
 kvm_sregs can't be extended because that would break the ABI, so we have
 to add a new ioctl.
 
 I have some patches that allow ioctls to be extended, so if that's
 accepted, we can avoid the new ioctl.

OK.

 
 BTW, via which channel is GUEST_INTERRUPTIBILITY_INFO from the vmcs
 saved/restored? I'm currently not seeing any related, CPU-specific code.
   
 
 Looks like it's missing.

As a workaround (or safety bag), is it imaginable to delay or deny VCPU
snapshots at not yet fully restorable points (like
GUEST_INTERRUPTIBILITY_INFO != 0)? Or stick-your-head-into-the-sand for now?

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] VT-d: Fix iommu map page for mmio pages

2008-09-25 Thread Avi Kivity


Han, Weidong wrote:
Is it possible DMA into an mmio page? 


I don't see why not.


If yes, we also need to map mmio
pages, and is_mmio_pfn() check is not neccessary here.
  


So we get simpler code as well.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] VT-d: Fix iommu map page for mmio pages

2008-09-25 Thread Anthony Liguori


Avi Kivity wrote:

Han, Weidong wrote:
Is it possible DMA into an mmio page? 


I don't see why not.


Yeah, it is.  I mentioned this a long time ago.  We definitely need to 
map mmio pages into the VT-d mapping.


Regards,

Anthony Liguori


If yes, we also need to map mmio
pages, and is_mmio_pfn() check is not neccessary here.
  


So we get simpler code as well.



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH] VT-d: Fix iommu map page for mmio pages

2008-09-25 Thread Han, Weidong

Avi and Anthony,

I will resend the patch soon. Thanks.

Randy (Weidong)

Anthony Liguori wrote:
 Avi Kivity wrote:
 Han, Weidong wrote:
 Is it possible DMA into an mmio page?
 
 I don't see why not.
 
 Yeah, it is.  I mentioned this a long time ago.  We definitely need to
 map mmio pages into the VT-d mapping.
 
 Regards,
 
 Anthony Liguori
 
 If yes, we also need to map mmio
 pages, and is_mmio_pfn() check is not neccessary here.
 
 
 So we get simpler code as well.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] [RESEND] VT-d: Fix iommu map page for mmio pages

2008-09-25 Thread Han, Weidong

From 61028d958dc7c57ee02de32ea89b025dccb9650d Mon Sep 17 00:00:00 2001
From: Weidong Han [EMAIL PROTECTED]
Date: Thu, 25 Sep 2008 23:32:02 +0800
Subject: [PATCH] Map mmio pages into VT-d page table

Assigned device could DMA to mmio pages, so also need to map mmio pages
into VT-d page table.

Signed-off-by: Weidong Han [EMAIL PROTECTED]
---
 arch/x86/kvm/vtd.c |   29 +++--
 include/asm-x86/kvm_host.h |2 --
 virt/kvm/kvm_main.c|2 +-
 3 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
index 667bf3f..a770874 100644
--- a/arch/x86/kvm/vtd.c
+++ b/arch/x86/kvm/vtd.c
@@ -36,37 +36,30 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 {
gfn_t gfn = base_gfn;
pfn_t pfn;
-   int i, r;
+   int i, r = 0;
struct dmar_domain *domain = kvm-arch.intel_iommu_domain;
 
/* check if iommu exists and in use */
if (!domain)
return 0;
 
-   r = -EINVAL;
for (i = 0; i  npages; i++) {
/* check if already mapped */
pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
 gfn_to_gpa(gfn));
-   if (pfn  !is_mmio_pfn(pfn))
+   if (pfn)
continue;
 
pfn = gfn_to_pfn(kvm, gfn);
-   if (!is_mmio_pfn(pfn)) {
-   r = intel_iommu_page_mapping(domain,
-gfn_to_gpa(gfn),
-pfn_to_hpa(pfn),
-PAGE_SIZE,
-DMA_PTE_READ |
-DMA_PTE_WRITE);
-   if (r) {
-   printk(KERN_DEBUG kvm_iommu_map_pages:
-  iommu failed to map pfn=%lx\n,
pfn);
-   goto unmap_pages;
-   }
-   } else {
-   printk(KERN_DEBUG kvm_iommu_map_page:
-  invalid pfn=%lx\n, pfn);
+   r = intel_iommu_page_mapping(domain,
+gfn_to_gpa(gfn),
+pfn_to_hpa(pfn),
+PAGE_SIZE,
+DMA_PTE_READ |
+DMA_PTE_WRITE);
+   if (r) {
+   printk(KERN_ERR kvm_iommu_map_pages:
+  iommu failed to map pfn=%lx\n, pfn);
goto unmap_pages;
}
gfn++;
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index d1175b8..357dd20 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -495,8 +495,6 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t
gpa,
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
  gpa_t addr, unsigned long *ret);
 
-int is_mmio_pfn(pfn_t pfn);
-
 extern bool tdp_enabled;
 
 enum emulation_result {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6cf0427..98cd916 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -76,7 +76,7 @@ static inline int valid_vcpu(int n)
return likely(n = 0  n  KVM_MAX_VCPUS);
 }
 
-inline int is_mmio_pfn(pfn_t pfn)
+static inline int is_mmio_pfn(pfn_t pfn)
 {
if (pfn_valid(pfn))
return PageReserved(pfn_to_page(pfn));
-- 
1.5.1


0001-Map-mmio-pages-into-VT-d-page-table.patch
Description: 0001-Map-mmio-pages-into-VT-d-page-table.patch

Re: [kvm] Re: [PATCH 0/5] bios: 4G updates

2008-09-25 Thread Alex Williamson

On Wed, 2008-09-24 at 10:17 -0600, Alex Williamson wrote:
 On Wed, 2008-09-24 at 14:07 +0300, Avi Kivity wrote:
  
  The patches all look good, however renaming and reformatting will lead 
  to merge headaches later on.  We haven't been good at working with bochs 
  bios upstream.
  
  Can you peek in bochs upstream and see if it's worth merging?  If not, 
  I'll just merge these patches.
 
 I'll take a look.  It seemed like they added support for putting the
 ACPI processor objects in an SSDT last I checked, but the AML for their
 processors is fairly trivial.  I'll see if there's anything else
 worthwhile.  Thanks,

I guess the SSDT support was prior to the last merge, and appropriately
dropped.  The most interesting new feature is a boot menu to allow the
user to override the boot device.  That seems pretty useful.  Other
things like better printing of the devices and support for PIIX4 could
come in handy too.  So yeah, it looks worth merging.  I can drop my
first two patches and rework the others so we don't cause unnecessary
merge problems.

Alex

-- 
Alex Williamson HP Open Source  Linux Org.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: KVM for Sparc?

2008-09-25 Thread Blue Swirl

On 9/24/08, David Miller [EMAIL PROTECTED] wrote:
 From: Blue Swirl [EMAIL PROTECTED]

 Date: Wed, 24 Sep 2008 21:06:21 +0300

   Now I found the relevant part in the manuals. The extra sun4v bit is
   not taken into account from user mode, so we can't catch privileged to
   hyperprivileged mode traps easily.

 That's right, the top bit is ignored in user mode.

The hypervisor uses traps 0x80, 0x83, 0x84, 0x85, and 0xff. Looking at
how these alias to low number traps: first four are unused or used for
resets (SIR, RED, XIR), so they are not in the fast path. 0xff aliases
to 0x7f, which is part of Fill 7 otherwin trap. Maybe that is not
performance critical? The Fill 7 trap entry could be shortened with
off-table jumps.

I'm thinking that we could disassemble the calling instruction on
entry to the lower traps and detect what was the true cause of the
trap.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 7/9] Add VMRUN handler v3

2008-09-25 Thread Alexander Graf






Am 19.09.2008 um 17:59 schrieb Joerg Roedel [EMAIL PROTECTED]:


On Wed, Sep 17, 2008 at 03:41:24PM +0200, Alexander Graf wrote:

This patch implements VMRUN. VMRUN enters a virtual CPU and runs that
in the same context as the normal guest CPU would run.
So basically it is implemented the same way, a normal CPU would do  
it.


We also prepare all intercepts that get OR'ed with the original
intercepts, as we do not allow a level 2 guest to be intercepted less
than the first level guest.

v2 implements the following improvements:

- fixes the CPL check
- does not allocate iopm when not used
- remembers the host's IF in the HIF bit in the hflags

v3:

- make use of the new permission checking
- add support for V_INTR_MASKING_MASK

Signed-off-by: Alexander Graf [EMAIL PROTECTED]
---
arch/x86/kvm/kvm_svm.h |9 ++
arch/x86/kvm/svm.c |  198 ++ 
+-

include/asm-x86/kvm_host.h |2 +
3 files changed, 207 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 76ad107..2afe0ce 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -43,6 +43,15 @@ struct vcpu_svm {
   u32 *msrpm;

   u64 nested_hsave;
+u64 nested_vmcb;
+
+/* These are the merged vectors */
+u32 *nested_msrpm;
+u32 *nested_iopm;
+
+/* gpa pointers to the real vectors */
+u64 nested_vmcb_msrpm;
+u64 nested_vmcb_iopm;
};

#endif
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0aa22e5..3601e75 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -51,6 +51,9 @@ MODULE_LICENSE(GPL);
/* Turn on to get debugging output*/
/* #define NESTED_DEBUG */

+/* Not needed until device passthrough */
+/* #define NESTED_KVM_MERGE_IOPM */
+
#ifdef NESTED_DEBUG
#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
#else
@@ -76,6 +79,11 @@ static inline struct vcpu_svm *to_svm(struct  
kvm_vcpu *vcpu)

   return container_of(vcpu, struct vcpu_svm, vcpu);
}

+static inline bool is_nested(struct vcpu_svm *svm)
+{
+return svm-nested_vmcb;
+}
+
static unsigned long iopm_base;

struct kvm_ldttss_desc {
@@ -614,6 +622,7 @@ static void init_vmcb(struct vcpu_svm *svm)
   force_new_asid(svm-vcpu);

   svm-nested_hsave = 0;
+svm-nested_vmcb = 0;
   svm-vcpu.arch.hflags = HF_GIF_MASK;
}

@@ -639,6 +648,10 @@ static struct kvm_vcpu *svm_create_vcpu(struct  
kvm *kvm, unsigned int id)

   struct vcpu_svm *svm;
   struct page *page;
   struct page *msrpm_pages;
+struct page *nested_msrpm_pages;
+#ifdef NESTED_KVM_MERGE_IOPM
+struct page *nested_iopm_pages;
+#endif
   int err;

   svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -661,9 +674,25 @@ static struct kvm_vcpu *svm_create_vcpu(struct  
kvm *kvm, unsigned int id)

   msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
   if (!msrpm_pages)
   goto uninit;
+
+nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+if (!nested_msrpm_pages)
+goto uninit;
+
+#ifdef NESTED_KVM_MERGE_IOPM
+nested_iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
+if (!nested_iopm_pages)
+goto uninit;
+#endif
+
   svm-msrpm = page_address(msrpm_pages);
   svm_vcpu_init_msrpm(svm-msrpm);

+svm-nested_msrpm = page_address(nested_msrpm_pages);
+#ifdef NESTED_KVM_MERGE_IOPM
+svm-nested_iopm = page_address(nested_iopm_pages);
+#endif
+
   svm-vmcb = page_address(page);
   clear_page(svm-vmcb);
   svm-vmcb_pa = page_to_pfn(page)  PAGE_SHIFT;
@@ -693,6 +722,10 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)

   __free_page(pfn_to_page(svm-vmcb_pa  PAGE_SHIFT));
   __free_pages(virt_to_page(svm-msrpm), MSRPM_ALLOC_ORDER);
+__free_pages(virt_to_page(svm-nested_msrpm),  
MSRPM_ALLOC_ORDER);

+#ifdef NESTED_KVM_MERGE_IOPM
+__free_pages(virt_to_page(svm-nested_iopm), IOPM_ALLOC_ORDER);
+#endif
   kvm_vcpu_uninit(vcpu);
   kmem_cache_free(kvm_vcpu_cache, svm);
}
@@ -1230,6 +1263,138 @@ static int nested_svm_do(struct vcpu_svm  
*svm,

   return retval;
}

+
+static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
+  void *arg2, void *opaque)
+{
+int i;
+u32 *nested_msrpm = (u32*)arg1;
+for (i=0; i PAGE_SIZE * (1  MSRPM_ALLOC_ORDER) / 4; i++)
+svm-nested_msrpm[i] = svm-msrpm[i] | nested_msrpm[i];
+svm-vmcb-control.msrpm_base_pa = __pa(svm-nested_msrpm);
+
+return 0;
+}
+
+#ifdef NESTED_KVM_MERGE_IOPM
+static int nested_svm_vmrun_iopm(struct vcpu_svm *svm, void *arg1,
+ void *arg2, void *opaque)
+{
+int i;
+u32 *nested_iopm = (u32*)arg1;
+u32 *iopm = (u32*)__va(iopm_base);
+for (i=0; i PAGE_SIZE * (1  IOPM_ALLOC_ORDER) / 4; i++)
+svm-nested_iopm[i] = iopm[i] | nested_iopm[i];
+svm-vmcb-control.iopm_base_pa = __pa(svm-nested_iopm);
+
+return 0;
+}
+#endif
+
+static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
+void *arg2, void *opaque)
+{
+struct vmcb *nested_vmcb =

Re: [PATCH 7/9] Add VMRUN handler v3

2008-09-25 Thread Joerg Roedel

On Thu, Sep 25, 2008 at 07:32:55PM +0200, Alexander Graf wrote:
 This is a big security hole. With this we give the guest access to its
 own VMCB. The guest can take over or crash the whole host machine by
 rewriting its VMCB. We should be more selective what we save in the
 hsave area.
 
 Oh, right. I didn't even think of a case where the nested guest would
 have acvess to the hsave of itself. Since the hsave can never be used
 twice on one vcpu, we could just allocate our own memory for the hsave
 in the vcpu context and leave the nested hsave empty.

I think we could also gain performance by only saving the important
parts of the VMCB and not the whole page.

Joerg

-- 
   |   AMD Saxony Limited Liability Company  Co. KG
 Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
 System|  Register Court Dresden: HRA 4896
 Research  |  General Partner authorized to represent:
 Center| AMD Saxony LLC (Wilmington, Delaware, US)
   | General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Add USB sys file-system support (v6)

2008-09-25 Thread Anthony Liguori


TJ wrote:

This patch adds support for host USB devices discovered via:

/sys/bus/usb/devices/* and opened from /dev/bus/usb/*/*
/dev/bus/usb/devices and opened from /dev/bus/usb/*/*

in addition to the existing discovery via:

/proc/bus/usb/devices and opened from /proc/bus/usb/*/*

Signed-off-by: TJ [EMAIL PROTECTED]
---
--- a/usb-linux.c   2008-09-17 22:39:38.0 +0100
+++ b/usb-linux.c   2008-09-23 02:28:48.0 +0100
@@ -7,6 +7,10 @@
  *  Support for host device auto connect  disconnect
  *  Major rewrite to support fully async operation
  *
+ * Copyright 2008 TJ [EMAIL PROTECTED]
+ *  Added flexible support for /dev/bus/usb /sys/bus/usb/devices in 
addition
+ *  to the legacy /proc/bus/usb USB device discovery and handling
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the Software), to 
deal
  * in the Software without restriction, including without limitation the rights
@@ -72,9 +76,20 @@
 #define dprintf(...)
 #endif
 
-#define USBDEVFS_PATH /proc/bus/usb

+#define USBPROCBUS_PATH /proc/bus/usb
 #define PRODUCT_NAME_SZ 32
 #define MAX_ENDPOINTS 16
+#define USBDEVBUS_PATH /dev/bus/usb
+#define USBSYSBUS_PATH /sys/bus/usb
+
+static char *usb_host_device_path;
+
+#define USB_FS_NONE 0
+#define USB_FS_PROC 1
+#define USB_FS_DEV 2
+#define USB_FS_SYS 3
+
+static int usb_fs_type = 0;
 
 /* endpoint association data */

 struct endp_data {
@@ -890,13 +905,18 @@
 
 printf(husb: open device %d.%d\n, bus_num, addr);
 
-snprintf(buf, sizeof(buf), USBDEVFS_PATH /%03d/%03d,

+   if (!usb_host_device_path) {
+   perror(husb: USB Host Device Path not set);
+   goto fail;
+   }
+snprintf(buf, sizeof(buf), %s/%03d/%03d, usb_host_device_path,
  bus_num, addr);
  


You have tabs here.


 fd = open(buf, O_RDWR | O_NONBLOCK);
 if (fd  0) {
 perror(buf);
 goto fail;
 }
+dprintf(husb: opened %s\n, buf);
 
 /* read the device description */

 dev-descr_len = read(fd, dev-descr, sizeof(dev-descr));
@@ -1038,23 +1058,29 @@
 return q - buf;
 }
 
-static int usb_host_scan(void *opaque, USBScanFunc *func)

+/*
+ Use /proc/bus/usb/devices or /dev/bus/usb/devices file to determine
+ host's USB devices. This is legacy support since many distributions
+ are moving to /sys/bus/usb
+*/
+static int usb_host_scan_dev(void *opaque, USBScanFunc *func)
 {
-FILE *f;
+FILE *f = 0;
 char line[1024];
 char buf[1024];
 int bus_num, addr, speed, device_count, class_id, product_id, vendor_id;
-int ret;
 char product_name[512];
+int ret = 0;
 
-f = fopen(USBDEVFS_PATH /devices, r);

+snprintf(line, sizeof(line), %s/devices, usb_host_device_path);
+f = fopen(line, r);
 if (!f) {
-term_printf(husb: could not open %s\n, USBDEVFS_PATH /devices);
-return 0;
+   perror(husb: cannot open devices file);
+   goto the_end;
 }
  


And here and almost everywhere.


+
 device_count = 0;
 bus_num = addr = speed = class_id = product_id = vendor_id = 0;
-ret = 0;
 for(;;) {
 if (fgets(line, sizeof(line), f) == NULL)
 break;
@@ -1106,12 +1132,191 @@
 fail: ;
 }
 if (device_count  (vendor_id || product_id)) {
-/* Add the last device.  */
-ret = func(opaque, bus_num, addr, class_id, vendor_id,
-   product_id, product_name, speed);
+   /* Add the last device.  */
+   ret = func(opaque, bus_num, addr, class_id, vendor_id,
+   product_id, product_name, speed);
+}
+ the_end:
+if (f) fclose(f);
+return ret;
+}
+
+/*
+ Use /sys/bus/usb/devices/ directory to determine host's USB devices.
+
+ This code is taken from Robert Schiele's original patches posted to the
+ Novell bug-tracker https://bugzilla.novell.com/show_bug.cgi?id=241950
+*/
+static int usb_host_scan_sys(void *opaque, USBScanFunc *func)
+{
+FILE *f;
+DIR *dir = 0;
+char line[1024];
+int bus_num, addr, speed, class_id, product_id, vendor_id;
+int ret = 0;
+char product_name[512];
+struct dirent* de;
+
+dir = opendir(USBSYSBUS_PATH /devices);
+if (!dir) {
+   perror(husb: cannot open devices directory);
+   goto the_end;
+}
+
+while ((de = readdir(dir))) {
+   if (de-d_name[0] != '.'  ! strchr(de-d_name, ':')) {
+   char filename[PATH_MAX];
+   char* tmpstr = de-d_name;
+   if (!strncmp(de-d_name, usb, 3))
+   tmpstr += 3;
  


This is indented wrong.


+
+   bus_num = atoi(tmpstr);
+   snprintf(filename, PATH_MAX, USBSYSBUS_PATH /devices/%s/devnum, 
de-d_name);
+   f = fopen(filename, r);
+   if (!f) {
+   term_printf(Could not open %s\n, filename);
+   goto the_end;
+

[PATCH 0/4] bios: 4G updates (take2)

2008-09-25 Thread Alex Williamson


This set removes the code churn so we don't make it extra difficult to
sync with bochs.

[1/4] Cleanup the previous kvm patch to add above 4G MTRRs
[2/4] Add SMBIOS info for memory above 4G
[3/4] Fix SMBIOS type 19  20 range end
[4/4] Switch default MTRR type to WB and only specify an MTRR for MMIO

Thanks,

Alex


-- 
Alex Williamson HP Open Source  Linux Org.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/4] kvm: bios: cleanup/consolidate above 4G memory parsing

2008-09-25 Thread Alex Williamson

kvm: bios: cleanup/consolidate above 4G memory parsing

Signed-off-by: Alex Williamson [EMAIL PROTECTED]
---

 bios/rombios32.c |   17 +
 1 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/bios/rombios32.c b/bios/rombios32.c
index c57e967..07c858c 100755
--- a/bios/rombios32.c
+++ b/bios/rombios32.c
@@ -416,7 +416,7 @@ uint32_t cpuid_signature;
 uint32_t cpuid_features;
 uint32_t cpuid_ext_features;
 unsigned long ram_size;
-uint64_t above4g_ram_size;
+uint64_t ram_end;
 uint8_t bios_uuid[16];
 #ifdef BX_USE_EBDA_TABLES
 unsigned long ebda_cur_addr;
@@ -531,9 +531,9 @@ void setup_mtrr(void)
 wrmsr_smp(MTRRphysMask_MSR(i), (~vmask  0xfff000ull) | 0x800);
 vbase += vmask + 1;
 }
-for (vbase = 1ull  32; i  vcnt  vbase  above4g_ram_size; ++i) {
+for (vbase = 1ull  32; i  vcnt  vbase  ram_end; ++i) {
 vmask = (1ull  40) - 1;
-while (vbase + vmask + 1  above4g_ram_size)
+while (vbase + vmask + 1  ram_end)
 vmask = 1;
 wrmsr_smp(MTRRphysBase_MSR(i), vbase | 6);
 wrmsr_smp(MTRRphysMask_MSR(i), (~vmask  0xfff000ull) | 0x800);
@@ -551,17 +551,18 @@ void ram_probe(void)
 ram_size = (cmos_readb(0x17) | (cmos_readb(0x18)  8)) * 1024;
 
   if (cmos_readb(0x5b) | cmos_readb(0x5c) | cmos_readb(0x5d))
-above4g_ram_size = ((uint64_t)cmos_readb(0x5b)  16) |
-((uint64_t)cmos_readb(0x5c)  24) | ((uint64_t)cmos_readb(0x5d)  
32);
+ram_end = (((uint64_t)cmos_readb(0x5b)  16) |
+   ((uint64_t)cmos_readb(0x5c)  24) |
+   ((uint64_t)cmos_readb(0x5d)  32)) + (1ull  32);
+  else
+ram_end = ram_size;
 
-  if (above4g_ram_size)
-above4g_ram_size += 1ull  32;
+  BX_INFO(end of ram=%ldMB\n, ram_end  20);
 
 #ifdef BX_USE_EBDA_TABLES
 ebda_cur_addr = ((*(uint16_t *)(0x40e))  4) + 0x380;
 #endif
 BX_INFO(ram_size=0x%08lx\n, ram_size);
-BX_INFO(top of ram %ldMB\n, above4g_ram_size  20);
   setup_mtrr();
 }
 


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/4] kvm: bios: fix SMBIOS end address range reporting

2008-09-25 Thread Alex Williamson

kvm: bios: fix SMBIOS end address range reporting

The -1 seems to be in the wrong place here.

Signed-off-by: Alex Williamson [EMAIL PROTECTED]
---

 bios/rombios32.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bios/rombios32.c b/bios/rombios32.c
index be4c25f..f8edf18 100755
--- a/bios/rombios32.c
+++ b/bios/rombios32.c
@@ -1950,7 +1950,7 @@ smbios_type_19_init(void *start, uint32_t memory_size_mb)
 p-header.handle = 0x1300;
 
 p-starting_address = 0;
-p-ending_address = (memory_size_mb-1) * 1024;
+p-ending_address = (memory_size_mb * 1024) - 1;
 p-memory_array_handle = 0x1000;
 p-partition_width = 1;
 
@@ -1971,7 +1971,7 @@ smbios_type_20_init(void *start, uint32_t memory_size_mb)
 p-header.handle = 0x1400;
 
 p-starting_address = 0;
-p-ending_address = (memory_size_mb-1)*1024;
+p-ending_address = (memory_size_mb * 1024) - 1;
 p-memory_device_handle = 0x1100;
 p-memory_array_mapped_address_handle = 0x1300;
 p-partition_row_position = 1;


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 4/4] kvm: bios: switch MTRRs to cover only the PCI range and default to WB

2008-09-25 Thread Alex Williamson

kvm: bios: switch MTRRs to cover only the PCI range and default to WB

This matches how some bare metal machines report MTRRs and avoids
the problem of running out of MTRRs to cover all of RAM.

Signed-off-by: Alex Williamson [EMAIL PROTECTED]
---

 bios/rombios32.c |   24 
 1 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/bios/rombios32.c b/bios/rombios32.c
index f8edf18..592abf9 100755
--- a/bios/rombios32.c
+++ b/bios/rombios32.c
@@ -494,7 +494,6 @@ void setup_mtrr(void)
 uint8_t valb[8];
 uint64_t val;
 } u;
-uint64_t vbase, vmask;
 
 mtrr_cap = rdmsr(MSR_MTRRcap);
 vcnt = mtrr_cap  0xff;
@@ -521,25 +520,10 @@ void setup_mtrr(void)
 wrmsr_smp(MSR_MTRRfix4K_E8000, 0);
 wrmsr_smp(MSR_MTRRfix4K_F, 0);
 wrmsr_smp(MSR_MTRRfix4K_F8000, 0);
-vbase = 0;
---vcnt; /* leave one mtrr for VRAM */
-for (i = 0; i  vcnt  vbase  ram_size; ++i) {
-vmask = (1ull  40) - 1;
-while (vbase + vmask + 1  ram_size)
-vmask = 1;
-wrmsr_smp(MTRRphysBase_MSR(i), vbase | 6);
-wrmsr_smp(MTRRphysMask_MSR(i), (~vmask  0xfff000ull) | 0x800);
-vbase += vmask + 1;
-}
-for (vbase = 1ull  32; i  vcnt  vbase  ram_end; ++i) {
-vmask = (1ull  40) - 1;
-while (vbase + vmask + 1  ram_end)
-vmask = 1;
-wrmsr_smp(MTRRphysBase_MSR(i), vbase | 6);
-wrmsr_smp(MTRRphysMask_MSR(i), (~vmask  0xfff000ull) | 0x800);
-vbase += vmask + 1;
-}
-wrmsr_smp(MSR_MTRRdefType, 0xc00);
+/* Mark 3.5-4GB as UC, anything not specified defaults to WB */
+wrmsr_smp(MTRRphysBase_MSR(0), 0xe000ull | 0);
+wrmsr_smp(MTRRphysMask_MSR(0), ~(0x2000ull - 1) | 0x800);
+wrmsr_smp(MSR_MTRRdefType, 0xc06);
 }
 
 void ram_probe(void)


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

KVM: MMU: fix largepage shadow accounting with oos

2008-09-25 Thread Marcelo Tosatti


There's no need to increase the largepage shadow count when syncing
since there's no count decrement on unsync, only on destruction.

Signed-off-by: Marcelo Tosatti [EMAIL PROTECTED]

Index: kvm/arch/x86/kvm/mmu.c
===
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -661,8 +661,6 @@ static void rmap_write_protect(struct kv
 
if (write_protected)
kvm_flush_remote_tlbs(kvm);
-
-   account_shadowed(kvm, gfn);
 }
 
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
@@ -1130,8 +1128,10 @@ static struct kvm_mmu_page *kvm_mmu_get_
sp-gfn = gfn;
sp-role = role;
hlist_add_head(sp-hash_link, bucket);
-   if (!metaphysical)
+   if (!metaphysical) {
rmap_write_protect(vcpu-kvm, gfn);
+   account_shadowed(vcpu-kvm, gfn);
+   }
if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
vcpu-arch.mmu.prefetch_page(vcpu, sp);
else
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 4/9] Implement GIF, clgi and stgi v3

2008-09-25 Thread Joerg Roedel

I had another possible idea for performance improvement here. Since we
only inject normal interrupts and exceptions (and not NMI and such) we
can patch clgi to cli and stgi to sti to save these two intercepts in
the guests vmrun path.
Any objections/problems with this?

On Wed, Sep 17, 2008 at 03:41:21PM +0200, Alexander Graf wrote:
 This patch implements the GIF flag and the clgi and stgi instructions that
 set this flag. Only if the flag is set (default), interrupts can be received 
 by
 the CPU.
 
 To keep the information about that somewhere, this patch adds a new hidden
 flags vector. that is used to store information that does not go into the
 vmcb, but is SVM specific.
 
 v2 moves the hflags to x86 generic code
 v3 makes use of the new permission helper
 
 Signed-off-by: Alexander Graf [EMAIL PROTECTED]
 ---
  arch/x86/kvm/svm.c |   42 +++---
  include/asm-x86/kvm_host.h |3 +++
  2 files changed, 42 insertions(+), 3 deletions(-)
 
 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
 index c72e728..469ecc5 100644
 --- a/arch/x86/kvm/svm.c
 +++ b/arch/x86/kvm/svm.c
 @@ -612,6 +612,8 @@ static void init_vmcb(struct vcpu_svm *svm)
   save-cr4 = 0;
   }
   force_new_asid(svm-vcpu);
 +
 + svm-vcpu.arch.hflags = HF_GIF_MASK;
  }
  
  static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
 @@ -1227,6 +1229,36 @@ static int nested_svm_do(struct vcpu_svm *svm,
   return retval;
  }
  
 +static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +{
 + if (nested_svm_check_permissions(svm))
 + return 1;
 +
 + svm-next_rip = kvm_rip_read(svm-vcpu) + 3;
 + skip_emulated_instruction(svm-vcpu);
 +
 + svm-vcpu.arch.hflags |= HF_GIF_MASK;
 +
 + return 1;
 +}
 +
 +static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 +{
 + if (nested_svm_check_permissions(svm))
 + return 1;
 +
 + svm-next_rip = kvm_rip_read(svm-vcpu) + 3;
 + skip_emulated_instruction(svm-vcpu);
 +
 + svm-vcpu.arch.hflags = ~HF_GIF_MASK;
 +
 + /* After a CLGI no interrupts should come */
 + svm_clear_vintr(svm);
 + svm-vmcb-control.int_ctl = ~V_IRQ_MASK;
 +
 + return 1;
 +}
 +
  static int invalid_op_interception(struct vcpu_svm *svm,
  struct kvm_run *kvm_run)
  {
 @@ -1521,8 +1553,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
   [SVM_EXIT_VMMCALL]  = vmmcall_interception,
   [SVM_EXIT_VMLOAD]   = invalid_op_interception,
   [SVM_EXIT_VMSAVE]   = invalid_op_interception,
 - [SVM_EXIT_STGI] = invalid_op_interception,
 - [SVM_EXIT_CLGI] = invalid_op_interception,
 + [SVM_EXIT_STGI] = stgi_interception,
 + [SVM_EXIT_CLGI] = clgi_interception,
   [SVM_EXIT_SKINIT]   = invalid_op_interception,
   [SVM_EXIT_WBINVD]   = emulate_on_interception,
   [SVM_EXIT_MONITOR]  = invalid_op_interception,
 @@ -1669,6 +1701,9 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
   if (!kvm_cpu_has_interrupt(vcpu))
   goto out;
  
 + if (!(svm-vcpu.arch.hflags  HF_GIF_MASK))
 + goto out;
 +
   if (!(vmcb-save.rflags  X86_EFLAGS_IF) ||
   (vmcb-control.int_state  SVM_INTERRUPT_SHADOW_MASK) ||
   (vmcb-control.event_inj  SVM_EVTINJ_VALID)) {
 @@ -1720,7 +1755,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
  
   svm-vcpu.arch.interrupt_window_open =
   (!(control-int_state  SVM_INTERRUPT_SHADOW_MASK) 
 -  (svm-vmcb-save.rflags  X86_EFLAGS_IF));
 +  (svm-vmcb-save.rflags  X86_EFLAGS_IF) 
 +  (svm-vcpu.arch.hflags  HF_GIF_MASK));
  
   if (svm-vcpu.arch.interrupt_window_open  svm-vcpu.arch.irq_summary)
   /*
 diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
 index 982b6b2..3e25004 100644
 --- a/include/asm-x86/kvm_host.h
 +++ b/include/asm-x86/kvm_host.h
 @@ -245,6 +245,7 @@ struct kvm_vcpu_arch {
   unsigned long cr3;
   unsigned long cr4;
   unsigned long cr8;
 + u32 hflags;
   u64 pdptrs[4]; /* pae */
   u64 shadow_efer;
   u64 apic_base;
 @@ -734,6 +735,8 @@ enum {
   TASK_SWITCH_GATE = 3,
  };
  
 +#define HF_GIF_MASK  (1  0)
 +
  /*
   * Hardware virtualization extension instructions may fault if a
   * reboot turns off virtualization while processes are running.
 -- 
 1.5.6
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] VT-d: Fix iommu map page for mmio pages

2008-09-25 Thread Muli Ben-Yehuda

On Thu, Sep 25, 2008 at 05:45:30PM +0300, Avi Kivity wrote:
 Han, Weidong wrote:
 Is it possible DMA into an mmio page? 

 I don't see why not.

Two reasons. First it makes no sense. MMIO pages don't have RAM
backing them, they have another device's register window. So the
effect of DMA'ing into an MMIO page would be for one device to DMA
into the register window of another device, which sounds to me insane.

Second, and more importantly, I've seen systems where doing the above
caused a nice, immediate, reboot. So I think that unless someone comes
with a valid scenario where we need to support it or something breaks,
we'd better err on the side of caution and not map pages that should
not be DMA targets.

Cheers,
Muli
-- 
The First Workshop on I/O Virtualization (WIOV '08)
Dec 2008, San Diego, CA, http://www.usenix.org/wiov08/
  xxx
SYSTOR 2009---The Israeli Experimental Systems Conference
http://www.haifa.il.ibm.com/conferences/systor2009/
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 7/9] Add VMRUN handler v3

2008-09-25 Thread [EMAIL PROTECTED]

On Thu, Sep 25, 2008 at 10:00:17PM +0200, Alexander Graf wrote:
 
 On 25.09.2008, at 19:37, Joerg Roedel wrote:
 
 On Thu, Sep 25, 2008 at 07:32:55PM +0200, Alexander Graf wrote:
 This is a big security hole. With this we give the guest access to  
 its
 own VMCB. The guest can take over or crash the whole host machine by
 rewriting its VMCB. We should be more selective what we save in the
 hsave area.
 
 Oh, right. I didn't even think of a case where the nested guest would
 have acvess to the hsave of itself. Since the hsave can never be used
 twice on one vcpu, we could just allocate our own memory for the  
 hsave
 in the vcpu context and leave the nested hsave empty.
 
 I think we could also gain performance by only saving the important
 parts of the VMCB and not the whole page.
 
 Is copying one page really that expensive? Is there any accelerated  
 function available for that that copies it with SSE or so? :-)

Copying data in memory is always expensive because the accesses may miss
in the caches and data must be fetched from memory. As far as I know
this can be around 150 cycles per cache line.

Joerg

 -- 
   |   AMD Saxony Limited Liability Company  Co. KG
 Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
 System|  Register Court Dresden: HRA 4896
 Research  |  General Partner authorized to represent:
 Center| AMD Saxony LLC (Wilmington, Delaware, US)
   | General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe,  
 Thomas McCoy
 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] VT-d: Fix iommu map page for mmio pages

2008-09-25 Thread Anthony Liguori


Muli Ben-Yehuda wrote:

On Thu, Sep 25, 2008 at 05:45:30PM +0300, Avi Kivity wrote:
  

Han, Weidong wrote:

Is it possible DMA into an mmio page? 
  

I don't see why not.



Two reasons. First it makes no sense. MMIO pages don't have RAM
backing them, they have another device's register window. So the
effect of DMA'ing into an MMIO page would be for one device to DMA
into the register window of another device, which sounds to me insane.
  


MMIO isn't just a register window.  It may be an on-device buffer.  For 
instance, all packets are stored in a buffer on the ne2k that's mapped 
via mmio.  It would seem entirely reasonable to me to program an IDE 
driver to DMA directly into the devices packet buffer.



Second, and more importantly, I've seen systems where doing the above
caused a nice, immediate, reboot. So I think that unless someone comes
with a valid scenario where we need to support it or something breaks,
we'd better err on the side of caution and not map pages that should
not be DMA targets.
  


Xen maps the MMIO pages into the VT-d table.  The system you were using 
could have just been busted.  I think the burden is to prove that this 
is illegal (via the architecture specification).


Regards,

Anthony Liguori


Cheers,
Muli
  


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] unalias rework

2008-09-25 Thread Marcelo Tosatti


Hi Izik,

On Thu, Sep 04, 2008 at 05:13:20PM +0300, izik eidus wrote:

 + struct kvm_memory_slot *alias_slot = kvm-memslots[i];
 +
 + if (alias_slot-base_gfn == slot-base_gfn)
 + return 1;
 + }
 + return 0;
 +}
 +
 +static void update_alias_slots(struct kvm *kvm, struct kvm_memory_slot *free)
 +{
 + int i;
 +
 + if (is_aliased_slot(kvm, free))
 + return;
 +
 + for (i = KVM_MEMORY_SLOTS; i  KVM_MEMORY_SLOTS + KVM_ALIAS_SLOTS;
 +  ++i) {
 + struct kvm_memory_slot *alias_memslot = kvm-memslots[i];
 + unsigned long size = free-npages  PAGE_SHIFT;
 +
 + if (alias_memslot-userspace_addr = free-userspace_addr 
 + alias_memslot-userspace_addr  free-userspace_addr +
 + size) {
 + alias_memslot-flags = free-flags;
 + if (free-dirty_bitmap) {
 + unsigned long offset =
 + alias_memslot-userspace_addr -
 + free-userspace_addr;
 + unsigned dirty_offset;
 + unsigned long bitmap_addr;
 +
 + offset = offset  PAGE_SHIFT;
 + dirty_offset = ALIGN(offset, BITS_PER_LONG) / 8;
 + bitmap_addr = (unsigned long) 
 free-dirty_bitmap;
 + bitmap_addr += dirty_offset;
 + alias_memslot-dirty_bitmap = (unsigned long *) 
 bitmap_addr;
 + } else
 + alias_memslot-dirty_bitmap = NULL;
 + }
 + }
 +}
 +
  /*
   * Free any memory in @free but not in @dont.
   */
 -static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
 +static void kvm_free_physmem_slot(struct kvm *kvm,
 +   struct kvm_memory_slot *free,
 struct kvm_memory_slot *dont)
  {
   if (!dont || free-rmap != dont-rmap)
 @@ -385,10 +433,16 @@ static void kvm_free_physmem_slot(struct 
 kvm_memory_slot *free,
   if (!dont || free-lpage_info != dont-lpage_info)
   vfree(free-lpage_info);
  
 - free-npages = 0;
   free-dirty_bitmap = NULL;
   free-rmap = NULL;
   free-lpage_info = NULL;
 +
 +#ifdef CONFIG_X86
 + update_alias_slots(kvm, free);
 + if (dont)
 + update_alias_slots(kvm, dont);
 +#endif
 + free-npages = 0;

Why is this needed? I don't understand when would you free a memslot
without freeing any aliases that map it first?

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 5/7] KVM/userspace: Device Assignment: Support for assigning PCI devices to guests

2008-09-25 Thread Yang, Sheng

On Tuesday 23 September 2008 22:54:53 Amit Shah wrote:
 +static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t
 address, +int len)
 +{
 +   uint32_t val = 0;
 +   int fd, r;
 +
 +   if ((address = 0x10  address = 0x24) || address == 0x34 ||
 +   address == 0x3c || address == 0x3d) {
 +   val = pci_default_read_config(d, address, len);
 +   DEBUG((%x.%x): address=%04x val=0x%08x len=%d\n,
 + (d-devfn  3)  0x1F, (d-devfn  0x7), address,
 val, + len);
 +   return val;
 +   }
 +
 +   /* vga specific, remove later */
 +   if (address == 0xFC)
 +   goto do_log;
 +
 +   fd = ((AssignedDevice *)d)-real_device.config_fd;
 +   r = lseek(fd, address, SEEK_SET);
 +   if (r  0) {
 +   fprintf(stderr, %s: bad seek, errno = %d\n,
 +   __func__, errno);
 +   return val;
 +   }

This read from configuration space method got a little trouble: vender id and 
device id read from configuration space directly rather than vender 
and device file in the sysfs. That's cause trouble with some device that 
configuration space inconsistent with vender and device file, e.g. some 
fix up by host PCI subsystem in kernel. 

Maybe it can be delay a little for a following patch, but we should address 
this issue... Maybe we can use libpci? There are more fields than vender and 
device got this problem, like irq.

--
regards
Yang, Sheng
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/4] Separate update irq to a single function

2008-09-25 Thread Sheng Yang


Signed-off-by: Sheng Yang [EMAIL PROTECTED]
---
 arch/x86/kvm/x86.c |   78 
 1 files changed, 42 insertions(+), 36 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b9d15f7..43fb049 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -205,6 +205,42 @@ static void kvm_free_all_assigned_devices(struct kvm *kvm)
}
 }
 
+static int assigned_device_update_irq(struct kvm *kvm,
+   struct kvm_assigned_dev_kernel *assigned_dev,
+   struct kvm_assigned_irq *assigned_irq)
+{
+   if (assigned_dev-irq_requested) {
+   assigned_dev-guest_irq = assigned_irq-guest_irq;
+   assigned_dev-ack_notifier.gsi = assigned_irq-guest_irq;
+   return 0;
+   }
+   if (irqchip_in_kernel(kvm)) {
+   if (!capable(CAP_SYS_RAWIO))
+   return -EPERM;
+
+   if (assigned_irq-host_irq)
+   assigned_dev-host_irq = assigned_irq-host_irq;
+   else
+   assigned_dev-host_irq = assigned_dev-dev-irq;
+   assigned_dev-guest_irq = assigned_irq-guest_irq;
+   assigned_dev-ack_notifier.gsi = assigned_irq-guest_irq;
+   assigned_dev-ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
+   kvm_register_irq_ack_notifier(kvm, assigned_dev-ack_notifier);
+
+   /* Even though this is PCI, we don't want to use shared
+* interrupts. Sharing host devices with guest-assigned devices
+* on the same interrupt line is not a happy situation: there
+* are going to be long delays in accepting, acking, etc.
+*/
+   if (request_irq(assigned_dev-host_irq, kvm_assigned_dev_intr,
+   0, kvm_assigned_device, (void *)assigned_dev))
+   return -EIO;
+   }
+   assigned_dev-irq_requested = true;
+
+   return 0;
+}
+
 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
   struct kvm_assigned_irq
   *assigned_irq)
@@ -221,44 +257,14 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
return -EINVAL;
}
 
-   if (match-irq_requested) {
-   match-guest_irq = assigned_irq-guest_irq;
-   match-ack_notifier.gsi = assigned_irq-guest_irq;
-   mutex_unlock(kvm-lock);
-   return 0;
-   }
-
-   INIT_WORK(match-interrupt_work,
- kvm_assigned_dev_interrupt_work_handler);
-
-   if (irqchip_in_kernel(kvm)) {
-   if (!capable(CAP_SYS_RAWIO)) {
-   r = -EPERM;
-   goto out_release;
-   }
-
-   if (assigned_irq-host_irq)
-   match-host_irq = assigned_irq-host_irq;
-   else
-   match-host_irq = match-dev-irq;
-   match-guest_irq = assigned_irq-guest_irq;
-   match-ack_notifier.gsi = assigned_irq-guest_irq;
-   match-ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
-   kvm_register_irq_ack_notifier(kvm, match-ack_notifier);
+   if (!match-irq_requested)
+   INIT_WORK(match-interrupt_work,
+   kvm_assigned_dev_interrupt_work_handler);
 
-   /* Even though this is PCI, we don't want to use shared
-* interrupts. Sharing host devices with guest-assigned devices
-* on the same interrupt line is not a happy situation: there
-* are going to be long delays in accepting, acking, etc.
-*/
-   if (request_irq(match-host_irq, kvm_assigned_dev_intr, 0,
-   kvm_assigned_device, (void *)match)) {
-   r = -EIO;
-   goto out_release;
-   }
-   }
+   r = assigned_device_update_irq(kvm, match, assigned_irq);
+   if (r)
+   goto out_release;
 
-   match-irq_requested = true;
mutex_unlock(kvm-lock);
return r;
 out_release:
-- 
1.5.4.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/4] Enable MSI support for KVM VT-d

2008-09-25 Thread Sheng Yang

Hi, Avi

This patchset enable MSI support for KVM VT-d.

And here are only kernel space ones. The third patch would go to also goto x86
upstream.

The userspace code would looks like this:

assigned_irq_data.guest_msi_addr = *(uint32_t *)(d-msi_cap + 4);
assigned_irq_data.guest_msi_data = *(uint16_t *)(d-msi_cap + 8);
assigned_irq_data.flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
r = kvm_assign_irq(kvm_context, assigned_irq_data);

I've test the patchset with some userspace hack, it works well.

Thanks!
--
regards
Yang, Sheng
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/4] KVM: x86: Replace irq_requested with guest_intr_type

2008-09-25 Thread Sheng Yang


Signed-off-by: Sheng Yang [EMAIL PROTECTED]
---
 arch/x86/kvm/x86.c   |8 
 include/linux/kvm_host.h |3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 43fb049..4836323 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -170,7 +170,7 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 struct kvm_assigned_dev_kernel
 *assigned_dev)
 {
-   if (irqchip_in_kernel(kvm)  assigned_dev-irq_requested)
+   if (irqchip_in_kernel(kvm)  assigned_dev-guest_intr_type)
free_irq(assigned_dev-host_irq, (void *)assigned_dev);
 
kvm_unregister_irq_ack_notifier(kvm, assigned_dev-ack_notifier);
@@ -209,7 +209,7 @@ static int assigned_device_update_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev,
struct kvm_assigned_irq *assigned_irq)
 {
-   if (assigned_dev-irq_requested) {
+   if (assigned_dev-guest_intr_type == KVM_ASSIGNED_DEV_INTR) {
assigned_dev-guest_irq = assigned_irq-guest_irq;
assigned_dev-ack_notifier.gsi = assigned_irq-guest_irq;
return 0;
@@ -236,7 +236,7 @@ static int assigned_device_update_irq(struct kvm *kvm,
0, kvm_assigned_device, (void *)assigned_dev))
return -EIO;
}
-   assigned_dev-irq_requested = true;
+   assigned_dev-guest_intr_type = KVM_ASSIGNED_DEV_INTR;
 
return 0;
 }
@@ -257,7 +257,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
return -EINVAL;
}
 
-   if (!match-irq_requested)
+   if (!match-guest_intr_type)
INIT_WORK(match-interrupt_work,
kvm_assigned_dev_interrupt_work_handler);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6252802..e24280b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -301,7 +301,8 @@ struct kvm_assigned_dev_kernel {
int host_devfn;
int host_irq;
int guest_irq;
-   int irq_requested;
+#define KVM_ASSIGNED_DEV_INTR  1
+   int guest_intr_type;
struct pci_dev *dev;
struct kvm *kvm;
 };
-- 
1.5.4.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/4] x86: Add MSI delivery mode mask

2008-09-25 Thread Sheng Yang


Signed-off-by: Sheng Yang [EMAIL PROTECTED]
---
 include/asm-x86/msidef.h |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/include/asm-x86/msidef.h b/include/asm-x86/msidef.h
index 296f29c..fdeebbb 100644
--- a/include/asm-x86/msidef.h
+++ b/include/asm-x86/msidef.h
@@ -15,8 +15,11 @@
 MSI_DATA_VECTOR_MASK)
 
 #define MSI_DATA_DELIVERY_MODE_SHIFT   8
+#define MSI_DATA_DELIVERY_MODE_MASK0x700
 #define  MSI_DATA_DELIVERY_FIXED   (0  MSI_DATA_DELIVERY_MODE_SHIFT)
 #define  MSI_DATA_DELIVERY_LOWPRI  (1  MSI_DATA_DELIVERY_MODE_SHIFT)
+#define  MSI_DATA_DELIVERY_FIXED_VAL   0
+#define  MSI_DATA_DELIVERY_LOWPRI_VAL  1
 
 #define MSI_DATA_LEVEL_SHIFT   14
 #define MSI_DATA_LEVEL_DEASSERT(0  MSI_DATA_LEVEL_SHIFT)
-- 
1.5.4.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

80 matches

Mail list logo