date:20090701

[COMMIT master] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6

2009-07-01 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

Conflicts:
fs/eventfd.c

Signed-off-by: Avi Kivity a...@redhat.com
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] Disable msix save/load if msix is not supported

2009-07-01 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/hw/msix.c b/hw/msix.c
index 4ab6da6..754531f 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -286,6 +286,10 @@ void msix_save(PCIDevice *dev, QEMUFile *f)
 {
 unsigned nentries = (pci_get_word(dev-config + PCI_MSIX_FLAGS) 
  PCI_MSIX_FLAGS_QSIZE) + 1;
+
+if (!msix_supported)
+return;
+
 qemu_put_buffer(f, dev-msix_table_page, nentries * MSIX_ENTRY_SIZE);
 qemu_put_buffer(f, dev-msix_table_page + MSIX_PAGE_PENDING,
 (nentries + 7) / 8);
@@ -296,6 +300,9 @@ void msix_load(PCIDevice *dev, QEMUFile *f)
 {
 unsigned n = dev-msix_entries_nr;
 
+if (!msix_supported)
+return;
+
 if (!dev-cap_present  QEMU_PCI_CAP_MSIX)
 return;
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] Fix KVMs GET_SUPPORTED_CPUID feature usage

2009-07-01 Thread Avi Kivity

From: Andre Przywara andre.przyw...@amd.com

If we want to trim the user provided CPUID bits for KVM to be not greater
than that of the host, we should

a) not remove the bits _after_ we sent them to the kernel  and
b) not do the bitmangling on the original values while sending a copy

This fixes the masking of features that are not present on the host and
helps to use -cpu host.

Signed-off-by: Andre Przywara andre.przyw...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 1eb147e..a5c72e9 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -1191,7 +1191,6 @@ int kvm_arch_qemu_init_env(CPUState *cenv)
 
 qemu_kvm_load_lapic(cenv);
 
-copy = *cenv;
 
 #ifdef KVM_CPUID_SIGNATURE
 /* Paravirtualization CPUIDs */
@@ -1210,6 +1209,17 @@ int kvm_arch_qemu_init_env(CPUState *cenv)
 pv_ent-eax = get_para_features(kvm_context);
 #endif
 
+kvm_trim_features(cenv-cpuid_features,
+  kvm_arch_get_supported_cpuid(cenv, 1, R_EDX));
+kvm_trim_features(cenv-cpuid_ext_features,
+  kvm_arch_get_supported_cpuid(cenv, 1, R_ECX));
+kvm_trim_features(cenv-cpuid_ext2_features,
+  kvm_arch_get_supported_cpuid(cenv, 0x8001, R_EDX));
+kvm_trim_features(cenv-cpuid_ext3_features,
+  kvm_arch_get_supported_cpuid(cenv, 0x8001, R_ECX));
+
+copy = *cenv;
+
 copy.regs[R_EAX] = 0;
 qemu_kvm_cpuid_on_env(copy);
 limit = copy.regs[R_EAX];
@@ -1244,15 +1254,6 @@ int kvm_arch_qemu_init_env(CPUState *cenv)
 
 kvm_setup_cpuid2(cenv-kvm_cpu_state.vcpu_ctx, cpuid_nent, cpuid_ent);
 
-kvm_trim_features(cenv-cpuid_features,
-  kvm_arch_get_supported_cpuid(cenv, 1, R_EDX));
-kvm_trim_features(cenv-cpuid_ext_features,
-  kvm_arch_get_supported_cpuid(cenv, 1, R_ECX));
-kvm_trim_features(cenv-cpuid_ext2_features,
-  kvm_arch_get_supported_cpuid(cenv, 0x8001, R_EDX));
-kvm_trim_features(cenv-cpuid_ext3_features,
-  kvm_arch_get_supported_cpuid(cenv, 0x8001, R_ECX));
-
 return 0;
 }
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: switch coalesced mmio changes to slots_lock

2009-07-01 Thread Avi Kivity

From: Michael S. Tsirkin m...@redhat.com

switch coalesced mmio slots_lock.  slots_lock is already taken for read
everywhere, so we only need to take it for write when changing zones.
This is in preparation to removing in_range and kvm-lock around it.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 397f419..7b7cc9f 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -102,7 +102,7 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
kvm_iodevice_init(dev-dev, coalesced_mmio_ops);
dev-kvm = kvm;
kvm-coalesced_mmio_dev = dev;
-   kvm_io_bus_register_dev(kvm-mmio_bus, dev-dev);
+   kvm_io_bus_register_dev(kvm, kvm-mmio_bus, dev-dev);
 
return 0;
 }
@@ -115,16 +115,16 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
if (dev == NULL)
return -EINVAL;
 
-   mutex_lock(kvm-lock);
+   down_write(kvm-slots_lock);
if (dev-nb_zones = KVM_COALESCED_MMIO_ZONE_MAX) {
-   mutex_unlock(kvm-lock);
+   up_write(kvm-slots_lock);
return -ENOBUFS;
}
 
dev-zone[dev-nb_zones] = *zone;
dev-nb_zones++;
 
-   mutex_unlock(kvm-lock);
+   up_write(kvm-slots_lock);
return 0;
 }
 
@@ -138,7 +138,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
if (dev == NULL)
return -EINVAL;
 
-   mutex_lock(kvm-lock);
+   down_write(kvm-slots_lock);
 
i = dev-nb_zones;
while(i) {
@@ -156,7 +156,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
i--;
}
 
-   mutex_unlock(kvm-lock);
+   up_write(kvm-slots_lock);
 
return 0;
 }
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: document locking for kvm_io_device_ops

2009-07-01 Thread Avi Kivity

From: Michael S. Tsirkin m...@redhat.com

slots_lock is taken everywhere when device ops are called.
Document this as we will use this to rework locking for io.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h
index 2c67f5a..06e38b2 100644
--- a/virt/kvm/iodev.h
+++ b/virt/kvm/iodev.h
@@ -20,6 +20,9 @@
 
 struct kvm_io_device;
 
+/**
+ * kvm_io_device_ops are called under kvm slots_lock.
+ **/
 struct kvm_io_device_ops {
void (*read)(struct kvm_io_device *this,
 gpa_t addr,
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: switch pit creation to slots_lock

2009-07-01 Thread Avi Kivity

From: Michael S. Tsirkin m...@redhat.com

switch pit creation to slots_lock. slots_lock is already taken for read
everywhere, so we only need to take it for write when creating pit.
This is in preparation to removing in_range and kvm-lock around it.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index af53f64..65d0bc5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2189,7 +2189,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
   sizeof(struct kvm_pit_config)))
goto out;
create_pit:
-   mutex_lock(kvm-lock);
+   down_write(kvm-slots_lock);
r = -EEXIST;
if (kvm-arch.vpit)
goto create_pit_unlock;
@@ -2198,7 +2198,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
if (kvm-arch.vpit)
r = 0;
create_pit_unlock:
-   mutex_unlock(kvm-lock);
+   up_write(kvm-slots_lock);
break;
case KVM_IRQ_LINE_STATUS:
case KVM_IRQ_LINE: {
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: remove in_range from io devices

2009-07-01 Thread Avi Kivity

From: Michael S. Tsirkin m...@redhat.com

This changes bus accesses to use high-level kvm_io_bus_read/kvm_io_bus_write
functions. in_range now becomes unused so it is removed from device ops in
favor of read/write callbacks performing range checks internally.

This allows aliasing (mostly for in-kernel virtio), as well as better error
handling by making it possible to pass errors up to userspace.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index c1c5cb6..8054d7b 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -210,16 +210,6 @@ int kvm_dev_ioctl_check_extension(long ext)
 
 }
 
-static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
-   gpa_t addr, int len, int is_write)
-{
-   struct kvm_io_device *dev;
-
-   dev = kvm_io_bus_find_dev(vcpu-kvm-mmio_bus, addr, len, is_write);
-
-   return dev;
-}
-
 static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
kvm_run-exit_reason = KVM_EXIT_UNKNOWN;
@@ -231,6 +221,7 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
 {
struct kvm_mmio_req *p;
struct kvm_io_device *mmio_dev;
+   int r;
 
p = kvm_get_vcpu_ioreq(vcpu);
 
@@ -247,16 +238,13 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
kvm_run-exit_reason = KVM_EXIT_MMIO;
return 0;
 mmio:
-   mmio_dev = vcpu_find_mmio_dev(vcpu, p-addr, p-size, !p-dir);
-   if (mmio_dev) {
-   if (!p-dir)
-   kvm_iodevice_write(mmio_dev, p-addr, p-size,
-   p-data);
-   else
-   kvm_iodevice_read(mmio_dev, p-addr, p-size,
-   p-data);
-
-   } else
+   if (p-dir)
+   r = kvm_io_bus_read(vcpu-kvm-mmio_bus, p-addr,
+   p-size, p-data);
+   else
+   r = kvm_io_bus_write(vcpu-kvm-mmio_bus, p-addr,
+p-size, p-data);
+   if (r)
printk(KERN_ERRkvm: No iodevice found! addr:%lx\n, p-addr);
p-state = STATE_IORESP_READY;
 
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index c0528f1..14bbaae 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -355,8 +355,14 @@ static inline struct kvm_pit *speaker_to_pit(struct 
kvm_io_device *dev)
return container_of(dev, struct kvm_pit, speaker_dev);
 }
 
-static void pit_ioport_write(struct kvm_io_device *this,
-gpa_t addr, int len, const void *data)
+static inline int pit_in_range(gpa_t addr)
+{
+   return ((addr = KVM_PIT_BASE_ADDRESS) 
+   (addr  KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
+}
+
+static int pit_ioport_write(struct kvm_io_device *this,
+   gpa_t addr, int len, const void *data)
 {
struct kvm_pit *pit = dev_to_pit(this);
struct kvm_kpit_state *pit_state = pit-pit_state;
@@ -364,6 +370,8 @@ static void pit_ioport_write(struct kvm_io_device *this,
int channel, access;
struct kvm_kpit_channel_state *s;
u32 val = *(u32 *) data;
+   if (!pit_in_range(addr))
+   return -EOPNOTSUPP;
 
val  = 0xff;
addr = KVM_PIT_CHANNEL_MASK;
@@ -426,16 +434,19 @@ static void pit_ioport_write(struct kvm_io_device *this,
}
 
mutex_unlock(pit_state-lock);
+   return 0;
 }
 
-static void pit_ioport_read(struct kvm_io_device *this,
-   gpa_t addr, int len, void *data)
+static int pit_ioport_read(struct kvm_io_device *this,
+  gpa_t addr, int len, void *data)
 {
struct kvm_pit *pit = dev_to_pit(this);
struct kvm_kpit_state *pit_state = pit-pit_state;
struct kvm *kvm = pit-kvm;
int ret, count;
struct kvm_kpit_channel_state *s;
+   if (!pit_in_range(addr))
+   return -EOPNOTSUPP;
 
addr = KVM_PIT_CHANNEL_MASK;
s = pit_state-channels[addr];
@@ -490,37 +501,36 @@ static void pit_ioport_read(struct kvm_io_device *this,
memcpy(data, (char *)ret, len);
 
mutex_unlock(pit_state-lock);
+   return 0;
 }
 
-static int pit_in_range(struct kvm_io_device *this, gpa_t addr,
-   int len, int is_write)
-{
-   return ((addr = KVM_PIT_BASE_ADDRESS) 
-   (addr  KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
-}
-
-static void speaker_ioport_write(struct kvm_io_device *this,
-gpa_t addr, int len, const void *data)
+static int speaker_ioport_write(struct kvm_io_device *this,
+   gpa_t addr, int len, const void *data)
 {
struct kvm_pit *pit = speaker_to_pit(this);
struct kvm_kpit_state *pit_state =

[COMMIT master] KVM: document lock nesting rule

2009-07-01 Thread Avi Kivity

From: Michael S. Tsirkin m...@redhat.com

Document kvm-lock nesting within kvm-slots_lock

Signed-off-by: Michael S. Tsirkin m...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index db2a116..05b6bc7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -68,7 +68,7 @@ MODULE_LICENSE(GPL);
 /*
  * Ordering of locks:
  *
- * kvm-lock -- kvm-irq_lock
+ * kvm-slots_lock -- kvm-lock -- kvm-irq_lock
  */
 
 DEFINE_SPINLOCK(kvm_lock);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: Fix IA32e Windows can't boot issue

2009-07-01 Thread Avi Kivity

From: Sheng Yang sh...@linux.intel.com

One typo in commit: KVM: convert custom marker based tracing to
event traces caused:

set_cr8: #GP, reserved bits 0x7f

when try to boot up IA32e Windows guest.

One value has been written to R8 instead of specific register, then the
value of specific register can be invalid as CR8. Guest RMW it, then got
trouble.

Signed-off-by: Sheng Yang sh...@linux.intel.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 119fa47..3a75db3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2903,7 +2903,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
return 1;
case 8:
val = kvm_get_cr8(vcpu);
-   kvm_register_write(vcpu, cr, val);
+   kvm_register_write(vcpu, reg, val);
trace_kvm_cr_read(cr, val);
skip_emulated_instruction(vcpu);
return 1;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: fix lock imbalance

2009-07-01 Thread Avi Kivity

From: Jiri Slaby jirisl...@gmail.com

There is a missing unlock on one fail path in ioapic_mmio_write,
fix that.

Signed-off-by: Jiri Slaby jirisl...@gmail.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index e4deae0..124ecf7 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -292,7 +292,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, 
gpa_t addr, int len,
data = *(u32 *) val;
else {
printk(KERN_WARNING ioapic: Unsupported size %d\n, len);
-   return 0;
+   goto unlock;
}
 
addr = 0xff;
@@ -313,6 +313,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, 
gpa_t addr, int len,
default:
break;
}
+unlock:
mutex_unlock(ioapic-kvm-irq_lock);
return 0;
 }
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: Trace mmio

2009-07-01 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 18f5c93..6263991 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -37,6 +37,8 @@
 #include linux/iommu.h
 #include linux/intel-iommu.h
 #include linux/cpufreq.h
+#include trace/events/kvm.h
+#undef TRACE_INCLUDE_FILE
 #define CREATE_TRACE_POINTS
 #include trace.h
 
@@ -2426,6 +2428,8 @@ static int emulator_read_emulated(unsigned long addr,
 
if (vcpu-mmio_read_completed) {
memcpy(val, vcpu-mmio_data, bytes);
+   trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
+  vcpu-mmio_phys_addr, *(u64 *)val);
vcpu-mmio_read_completed = 0;
return X86EMUL_CONTINUE;
}
@@ -2446,8 +2450,12 @@ mmio:
/*
 * Is this MMIO handled locally?
 */
-   if (!vcpu_mmio_read(vcpu, gpa, bytes, val))
+   if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
+   trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
return X86EMUL_CONTINUE;
+   }
+
+   trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
 
vcpu-mmio_needed = 1;
vcpu-mmio_phys_addr = gpa;
@@ -2491,6 +2499,7 @@ static int emulator_write_emulated_onepage(unsigned long 
addr,
return X86EMUL_CONTINUE;
 
 mmio:
+   trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
/*
 * Is this MMIO handled locally?
 */
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 0735b56..2c29235 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -56,6 +56,39 @@ TRACE_EVENT(kvm_ack_irq,
 
 
 #endif /* defined(__KVM_HAVE_IOAPIC)  defined(__KVM_HAVE_PIT) */
+
+#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
+#define KVM_TRACE_MMIO_READ 1
+#define KVM_TRACE_MMIO_WRITE 2
+
+#define kvm_trace_symbol_mmio \
+   { KVM_TRACE_MMIO_READ_UNSATISFIED, unsatisfied-read }, \
+   { KVM_TRACE_MMIO_READ, read }, \
+   { KVM_TRACE_MMIO_WRITE, write }
+
+TRACE_EVENT(kvm_mmio,
+   TP_PROTO(int type, int len, u64 gpa, u64 val),
+   TP_ARGS(type, len, gpa, val),
+
+   TP_STRUCT__entry(
+   __field(u32,type)
+   __field(u32,len )
+   __field(u64,gpa )
+   __field(u64,val )
+   ),
+
+   TP_fast_assign(
+   __entry-type   = type;
+   __entry-len= len;
+   __entry-gpa= gpa;
+   __entry-val= val;
+   ),
+
+   TP_printk(mmio %s len %u gpa 0x%llx val 0x%llx,
+ __print_symbolic(__entry-type, kvm_trace_symbol_mmio),
+ __entry-len, __entry-gpa, __entry-val)
+);
+
 #endif /* _TRACE_KVM_MAIN_H */
 
 /* This part must be outside protection */
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: Trace irq level and source id

2009-07-01 Thread Avi Kivity

From: Avi Kivity a...@redhat.com

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 5e69489..0735b56 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -9,18 +9,23 @@
 
 #if defined(__KVM_HAVE_IOAPIC)  defined(__KVM_HAVE_PIT)
 TRACE_EVENT(kvm_set_irq,
-   TP_PROTO(unsigned int gsi),
-   TP_ARGS(gsi),
+   TP_PROTO(unsigned int gsi, int level, int irq_source_id),
+   TP_ARGS(gsi, level, irq_source_id),
 
TP_STRUCT__entry(
__field(unsigned int,   gsi )
+   __field(int,level   )
+   __field(int,irq_source_id   )
),
 
TP_fast_assign(
__entry-gsi= gsi;
+   __entry-level  = level;
+   __entry-irq_source_id  = irq_source_id;
),
 
-   TP_printk(gsi %u, __entry-gsi)
+   TP_printk(gsi %u level %d source %d,
+ __entry-gsi, __entry-level, __entry-irq_source_id)
 );
 
 
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 1711777..c380bf0 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -126,7 +126,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int 
irq, int level)
unsigned long *irq_state, sig_level;
int ret = -1;
 
-   trace_kvm_set_irq(irq);
+   trace_kvm_set_irq(irq, level, irq_source_id);
 
WARN_ON(!mutex_is_locked(kvm-irq_lock));
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[COMMIT master] KVM: Ignore PCI ECS I/O enablement

2009-07-01 Thread Avi Kivity

From: Andre Przywara andre.przyw...@amd.com

Linux guests will try to enable access to the extended PCI config space
via the I/O ports 0xCF8/0xCFC on AMD Fam10h CPU. Since we (currently?)
don't use ECS, simply ignore write and read attempts.

Signed-off-by: Andre Przywara andre.przyw...@amd.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4e57c48..18f5c93 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -844,6 +844,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 
data)
return 1;
}
break;
+   case MSR_AMD64_NB_CFG:
+   break;
case MSR_IA32_DEBUGCTLMSR:
if (!data) {
/* We support the non-activated case already */
@@ -1049,6 +1051,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, 
u64 *pdata)
case MSR_P6_EVNTSEL1:
case MSR_K7_EVNTSEL0:
case MSR_K8_INT_PENDING_MSG:
+   case MSR_AMD64_NB_CFG:
data = 0;
break;
case MSR_MTRRcap:
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Exception handling between QEMU and KVM

2009-07-01 Thread Alexander Graf



On 01.07.2009, at 05:28, Christoffer Dall wrote:


Hi all.

We are still working on our ARM port of KVM and we are managing to let
the guest decompress the kernel image and run from relocated address
and we can support enabling MMU before this.

Howver, to debug relocated micro-hypervisor for exception handling, we
are trying to implement some NOT_IMPLEMENTED() macros and ASSERT()
macros.

What we have done so far is simply to exit QEMU roughly after
returning -EINVAL from the KVM_RUN system call, but before we start
supporting an interrupt cycle we have to improve on this. Setting
kvm_run-exit_reason = KVM_EXIT_EXCEPTION or kvm_run-exit_reason =
KVM_EXIT_SHUTDOWN just results in QEMU looping in the cpu execution
loop.

Can someone point us in the direction of what we're missing here in
order to exit the whole QEMU process from an exception occurring
inside the guest?


I'm still having a hard time understanding what exactly you're trying  
to do.


If you need to have your machine quit, just set exit_reason to some  
value you want to use for that purpose and make it behave accordingly  
in target-arm/kvm.c.


But I really can't think of any scenarios where you'd need it.  
Shouldn't all exceptions be handled in the kernel module? What exactly  
are you trying to implement here?


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

KVM and 32-bit hosts -- still supposed to work?

2009-07-01 Thread Duck

I'm on 32-bit Linux, kernel 2.6.27.7-smp. When I moved from kvm-83 to 
kvm-87 plus kvm-kmod-devel-87, my Linux host VMs ran fine. But my XP0 host 
simply ran too slowly to be useable at all, and my Windows 7 host wouldn't 
boot -- just crashed and restarted early in the boot process.


Seeing the module version -devel-87 I tried kvm-87 with the old kvm-83 
kernel module (I have no idea whether this is supposed to work) and my XP 
image worked better, but a task such as opening Device Manager (to see if 
any hardware has changed) took several _minutes_ (it should take a second 
or so)) and still didn't work correctly.


Kvm-87 with the -83 kernel module also persuaded Windows 7 to boot, to 
report new devices installed', and thereafter to work with kvm-87 plus 
kvm-kmod-devel-87. (Don't you love Windows's driver inflexibility :-)


So I presume that this is all down to virtual hardware changes since -83.

Before I rebuild my tired old XP0 image, however, and adopt kvm-87 for 
evermore, I just want to know if it's _supposed_ to work on 32-bit (Avi's 
post about the broken 32-bit compile of -87 due to no 32-bit test build 
system seemed to imply that 32-bit hosts are considered passe).


Or should I stick with the older kvm until I upgrade my OS to 64-bit?
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] qemu-kvm: remove CPUID host hacks

2009-07-01 Thread Andre Przywara

KVM provides an in-kernel feature to disable CPUID bits that are not
present in the current host. So there is no need here to duplicate this
work. Additionally allows 3DNow! on capable processors, since the
restriction seems to apply to QEMU/TCG only.

Signed-off-by: Andre Przywara andre.przyw...@amd.com
---
 target-i386/helper.c |   22 +-
 1 files changed, 1 insertions(+), 21 deletions(-)

This is a rebased version of the qemu.git patch, which caused merge conflicts.
Please apply!

Regards,
Andre.

diff --git a/target-i386/helper.c b/target-i386/helper.c
index d76c224..30e1d77 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1645,29 +1645,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 *edx = env-cpuid_ext2_features;
 
 if (kvm_enabled()) {
-uint32_t h_eax, h_edx;
-
-host_cpuid(index, 0, h_eax, NULL, NULL, h_edx);
-
-/* disable CPU features that the host does not support */
-
-/* long mode */
-if ((h_edx  0x2000) == 0 /* || !lm_capable_kernel */)
-*edx = ~0x2000;
-/* syscall */
-if ((h_edx  0x0800) == 0)
-*edx = ~0x0800;
-/* nx */
-if ((h_edx  0x0010) == 0)
-*edx = ~0x0010;
-
-/* disable CPU features that KVM cannot support */
-
-/* svm */
+/* disable nested svm if not explicitly requested */
 if (!kvm_nested)
 *ecx = ~CPUID_EXT3_SVM;
-/* 3dnow */
-*edx = ~0xc000;
 } else {
 /* AMD 3DNow! is not supported in QEMU */
 *edx = ~(CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT);
-- 
1.6.1.3


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] qemu-kvm: remove CPUID host hacks

2009-07-01 Thread Alexander Graf



On 01.07.2009, at 09:22, Andre Przywara wrote:


KVM provides an in-kernel feature to disable CPUID bits that are not
present in the current host. So there is no need here to duplicate  
this

work. Additionally allows 3DNow! on capable processors, since the
restriction seems to apply to QEMU/TCG only.

Signed-off-by: Andre Przywara andre.przyw...@amd.com
---
target-i386/helper.c |   22 +-
1 files changed, 1 insertions(+), 21 deletions(-)

This is a rebased version of the qemu.git patch, which caused merge  
conflicts.

Please apply!

Regards,
Andre.

diff --git a/target-i386/helper.c b/target-i386/helper.c
index d76c224..30e1d77 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1645,29 +1645,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t  
index, uint32_t count,

*edx = env-cpuid_ext2_features;

if (kvm_enabled()) {
-uint32_t h_eax, h_edx;
-
-host_cpuid(index, 0, h_eax, NULL, NULL, h_edx);
-
-/* disable CPU features that the host does not support */
-
-/* long mode */
-if ((h_edx  0x2000) == 0 /* || !lm_capable_kernel  
*/)

-*edx = ~0x2000;
-/* syscall */
-if ((h_edx  0x0800) == 0)
-*edx = ~0x0800;
-/* nx */
-if ((h_edx  0x0010) == 0)
-*edx = ~0x0010;
-
-/* disable CPU features that KVM cannot support */
-
-/* svm */
+/* disable nested svm if not explicitly requested */
if (!kvm_nested)
*ecx = ~CPUID_EXT3_SVM;
-/* 3dnow */
-*edx = ~0xc000;


I don't know why this is in, but it's definitely in a kvm_enabled() if  
statement, so it's not because of TCG :-).
Also kvm_nested should control the svm bit in the qemu description  
already. Preferably before the +/-feature bits are interpreted.


... this would lead to total removal of that hack function :-).

Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: pci_stub and kvm

2009-07-01 Thread Avi Kivity


On 07/01/2009 07:18 AM, Yinghai Lu wrote:

[ 1966.343286]
[ 1966.343288] ===
[ 1966.356756] [ INFO: possible circular locking dependency detected ]
[ 1966.356759] 2.6.31-rc1-tip-00978-g99123e5-dirty #438
[ 1966.356761] ---
[ 1966.356764] events/0/387 is trying to acquire lock:
[ 1966.356766]  (kvm-lock){+.+.+.}, at: [8100af27]
kvm_assigned_dev_interrupt_work_handler+0x42/0x13a
[ 1966.356786]
[ 1966.356787] but task is already holding lock:
[ 1966.356789]  (match-interrupt_work){+.+...}, at:
[810986e9] worker_thread+0x175/0x2f6
[ 1966.356797]
[ 1966.356798] which lock already depends on the new lock.
[ 1966.356799]
[ 1966.356800]
[ 1966.356801] the existing dependency chain (in reverse order) is:
[ 1966.356803]
[ 1966.356803] -  #1 (match-interrupt_work){+.+...}:
[ 1966.356809][810b3bf6] __lock_acquire+0x1396/0x1710
[ 1966.356817][810b403c] lock_acquire+0xcc/0x104
[ 1966.356821][810994a8] __cancel_work_timer+0x121/0x247
[ 1966.356825][8109962c] cancel_work_sync+0x23/0x39
[ 1966.356828][8100b280] kvm_deassign_irq+0xf1/0x183
[ 1966.356832][8100db6c] kvm_vm_ioctl+0x8c8/0xc1a
[ 1966.356837][81156e56] vfs_ioctl+0x3e/0xa3
[ 1966.356846][8115741c] do_vfs_ioctl+0x4be/0x511
[ 1966.356850][811574c5] sys_ioctl+0x56/0x8d
[ 1966.356854][81034fdb] system_call_fastpath+0x16/0x1b
[ 1966.356860][] 0x
[ 1966.356869]
[ 1966.356870] -  #0 (kvm-lock){+.+.+.}:
[ 1966.356872][810b392b] __lock_acquire+0x10cb/0x1710
[ 1966.356875][810b403c] lock_acquire+0xcc/0x104
[ 1966.356878][81cde487] mutex_lock_nested+0x75/0x2fa
[ 1966.356886][8100af27]
kvm_assigned_dev_interrupt_work_handler+0x42/0x13a
[ 1966.356890][81098743] worker_thread+0x1cf/0x2f6
[ 1966.356892][8109e335] kthread+0xa8/0xb0
[ 1966.356899][8103609a] child_rip+0xa/0x20
[ 1966.356906][] 0x
   


This is already fixed in kvm.git.  I'm not sure about merging it to 
2.6.30 since the race is very rare and involves device assignment (which 
is not very mainstream), while the fix touches the core kvm parts.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] KVM: Fix IA32e Windows can't boot issue

2009-07-01 Thread Sheng Yang

One typo in commit: KVM: convert custom marker based tracing to
event traces caused:

set_cr8: #GP, reserved bits 0x7f

when try to boot up IA32e Windows guest.

One value has been written to R8 instead of specific register, then the
value of specific register can be invalid as CR8. Guest RMW it, then got
trouble.

Signed-off-by: Sheng Yang sh...@linux.intel.com
---
 arch/x86/kvm/vmx.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 119fa47..3a75db3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2903,7 +2903,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
return 1;
case 8:
val = kvm_get_cr8(vcpu);
-   kvm_register_write(vcpu, cr, val);
+   kvm_register_write(vcpu, reg, val);
trace_kvm_cr_read(cr, val);
skip_emulated_instruction(vcpu);
return 1;
-- 
1.5.4.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCHv5 0/6] kvm: locking and API rework for iosignalfd

2009-07-01 Thread Avi Kivity


On 06/29/2009 10:23 PM, Michael S. Tsirkin wrote:

This series switches kvm_io_device to slots_lock from kvm-lock mutex,
and uses that to rework io bus API. This takes less locks on data path,
and uses less lines of code. These changes will also be useful to serve
as basis for Greg's iosignalfd work.
   


Applied all, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: kvm-87 static build failure: cannot find -lpci

2009-07-01 Thread Antoine Martin

-BEGIN PGP SIGNED MESSAGE-
Hash: SHA512

Michael Tokarev wrote:
 Antoine Martin wrote:
 []
   LINK  x86_64-softmmu/qemu-system-x86_64
 /usr/lib/gcc/x86_64-pc-linux-gnu/4.3.2/../../../../x86_64-pc-linux-gnu/bin/ld:

 cannot find -lpci

 What gives?
 
 Is it your system or who's?  Why the system does not have a
 usable libpci.a is a question for you, not for kvm...  I guess.
It seems like an old bug re-surfacing (.so present, .a missing):
http://bugs.gentoo.org/138035
Sorry for the line noise.

Antoine
-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.9 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iEYEAREKAAYFAkpLGfYACgkQGK2zHPGK1rtFRwCePaqg0l3Oy9XAQwhwhDeeClLr
NS8An20azMQ9dbwR1wxOCNRjcDZYU61m
=8tgM
-END PGP SIGNATURE-
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] KVM: fix lock imbalance

2009-07-01 Thread Avi Kivity


On 06/29/2009 07:05 PM, Jiri Slaby wrote:

There is a missing unlock on one fail path in ioapic_mmio_write,
fix that.
   


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] KVM: Fix IA32e Windows can't boot issue

2009-07-01 Thread Avi Kivity


On 07/01/2009 11:07 AM, Sheng Yang wrote:

One typo in commit: KVM: convert custom marker based tracing to
event traces caused:

set_cr8: #GP, reserved bits 0x7f

when try to boot up IA32e Windows guest.

One value has been written to R8 instead of specific register, then the
value of specific register can be invalid as CR8. Guest RMW it, then got
trouble.
   


Applied, thanks.  Strange how I didn't see it - I have both Windows XP 
x64 and Vista x64 in my test suite.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] KVM: Fix IA32e Windows can't boot issue

2009-07-01 Thread Sheng Yang

On Wednesday 01 July 2009 16:21:13 Avi Kivity wrote:
 On 07/01/2009 11:07 AM, Sheng Yang wrote:
  One typo in commit: KVM: convert custom marker based tracing to
  event traces caused:
 
  set_cr8: #GP, reserved bits 0x7f
 
  when try to boot up IA32e Windows guest.
 
  One value has been written to R8 instead of specific register, then the
  value of specific register can be invalid as CR8. Guest RMW it, then got
  trouble.

 Applied, thanks.  Strange how I didn't see it - I have both Windows XP
 x64 and Vista x64 in my test suite.

Forgot to mention, it only happened in some of our machines here...

-- 
regards
Yang, Sheng

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3] enable x2APIC without interrupt remapping under KVM

2009-07-01 Thread Avi Kivity


On 06/30/2009 10:36 PM, Eric W. Biederman wrote:

The short version is I don't know what work arounds we will ultimately
decide to deploy to work with real hardware.

I have been seriously contemplating causing a cpu hot-unplug request
to fail if we are in ioapic mode and we have irqs routed to the cpu
that is being unplugged.

   

Well, obviously we need to disassociate any irqs from such a cpu.  Could be done
from the kernel or only enforced by the kernel.
 


Using the normal irq migration path we can move irqs off of a cpu reliably
there just aren't any progress guarantees.
   


Program the ioapic to the new cpu.  Wait a few milliseconds.  If it 
takes more than that to get an interrupt from the ioapic to the local 
apic, the machine has much bigger problems.



Even with perfectly working hardware it is not possible in the general
case to migrate an ioapic irq from one cpu to another outside of an
interrupt handler without without risking dropping an interrupt.

   

Can't you generate a spurious interrupt immediately after the migration?  An
extra interrupt shouldn't hurt.
 


Nope.  The ioapics can't be told to send an interrupt.
   


You can program the local apic ICR to generate an interrupt with the 
same vector.



There is no general way to know you have seen the last interrupt
floating around your system.  PCI ordering rules don't help because
the ioapics can potentially take an out of band channel.

   

Can you describe the problem scenario? an ioapic-lapic message delivered to a
dead cpu?
 


Dropped irqs..  Driver hangs because it is waiting for an irq.  Hardware
hangs because it is waiting for the cpu to process the irq.

Potentially we get a level triggered irq that is never acked by
the cpu that won't arm until the cpu send an ack, and we can't
send an ack from another cpu.

   


I think a spurious interrupt generated through the local apic solves 
that problem.  For level-triggered interrupts, mask them before 
offlining the cpu.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Newbie, struggling with graphics and qemu monitor

2009-07-01 Thread Avi Kivity


On 06/30/2009 09:33 PM, Michael Jinks wrote:

In an attempt to get past unrelated interference I just set up an X
session with only an xterm, used that to launch VNC connecting to my
KVM test host, and tried to launch my test guest again.  Same
behavior: Ctl-Alt patterns are ignored, as are any other keystrokes I
try to send to the qemu console window, which won't release my mouse.

I've looked but haven't found any way to attach a monitor process to
an already-running guest.  Is this possible?  Or do we only get a
monitor as a child process of the initial kvm invocation?
   


Look up the -monitor option.


Given the way qemu and screen interact, using the same keystrokes to
initiate commands, it would be really useful to have some other way to
detach and re-attach to a kvm guest on the fly.

   


If you run qemu in screen, use -vnc.  X and screen don't mix.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [KVM PATCH v7 0/5] irqfd fixes and enhancements

2009-07-01 Thread Avi Kivity


On 06/29/2009 09:28 PM, Gregory Haskins wrote:

(Applies to kvm.git/master:4631e094)

The following is the latest attempt to fix the races in irqfd/eventfd, as
well as restore DEASSIGN support.  For more details, please read the patch
headers.  I've restored the slow-work variant of the logic as a separate
patch (5/5) so we can get a better idea of the true difference between
the workqueue approach and the slow-work style.  I am personally in favor
of the slow-work approach since it doesnt require a mostly-idle+dedicated
thread to hang around.  But that is just me.  5/5 can be ignored, folded
in to 3/5+4/5 as appropriate, or merged as is per the whim of Avi et. al.
   


I think a good compromise would be to create the workqueue when the 
first VM is launched.



As always, this series has been tested against the kvm-eventfd unit test
with both 5/5 applied and unapplied, and everything appears to be
functioning properly. You can download this test here:

ftp://ftp.novell.com/dev/ghaskins/kvm-eventfd.tar.bz2

I've included version 4 of Davide's eventfd patch (ported to kvm.git) so
that its a complete reviewable series.  Note, however, that there may be
later versions of his patch to consider for merging, so we should
coordinate with him.
   


Davide's patch was merged upstream.  Please take a look to make sure 
this patchset is compatible with what was merged.


Patchset looks good to me, but I'd appreciate an ack from Michael (or 
anyone else who's interested).


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Exception handling between QEMU and KVM

2009-07-01 Thread Avi Kivity


On 07/01/2009 06:28 AM, Christoffer Dall wrote:

Hi all.

We are still working on our ARM port of KVM and we are managing to let
the guest decompress the kernel image and run from relocated address
and we can support enabling MMU before this.

Howver, to debug relocated micro-hypervisor for exception handling, we
are trying to implement some NOT_IMPLEMENTED() macros and ASSERT()
macros.

What we have done so far is simply to exit QEMU roughly after
returning -EINVAL from the KVM_RUN system call, but before we start
supporting an interrupt cycle we have to improve on this. Setting
kvm_run-exit_reason = KVM_EXIT_EXCEPTION or kvm_run-exit_reason =
KVM_EXIT_SHUTDOWN just results in QEMU looping in the cpu execution
loop.

Can someone point us in the direction of what we're missing here in
order to exit the whole QEMU process from an exception occurring
inside the guest?
   


Not sure I understand.  Can't you call exit() or abort() after kvm 
returns an error?


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: KVM and 32-bit hosts -- still supposed to work?

2009-07-01 Thread Avi Kivity


On 07/01/2009 08:57 AM, Duck wrote:
I'm on 32-bit Linux, kernel 2.6.27.7-smp. When I moved from kvm-83 to 
kvm-87 plus kvm-kmod-devel-87, my Linux host VMs ran fine. But my XP0 
host simply ran too slowly to be useable at all, and my Windows 7 host 
wouldn't boot -- just crashed and restarted early in the boot process.


Are you sure kvm was loaded?  too slowly often means qemu is emulating.

Check 'info kvm' in the monitor.

Seeing the module version -devel-87 I tried kvm-87 with the old 
kvm-83 kernel module (I have no idea whether this is supposed to work) 
and my XP image worked better, but a task such as opening Device 
Manager (to see if any hardware has changed) took several _minutes_ 
(it should take a second or so)) and still didn't work correctly.


Kvm-87 with the -83 kernel module also persuaded Windows 7 to boot, to 
report new devices installed', and thereafter to work with kvm-87 
plus kvm-kmod-devel-87. (Don't you love Windows's driver inflexibility 
:-)


So I presume that this is all down to virtual hardware changes since -83.

Before I rebuild my tired old XP0 image, however, and adopt kvm-87 for 
evermore, I just want to know if it's _supposed_ to work on 32-bit 
(Avi's post about the broken 32-bit compile of -87 due to no 32-bit 
test build system seemed to imply that 32-bit hosts are considered 
passe).


Or should I stick with the older kvm until I upgrade my OS to 64-bit?


kvm is supported on 32-bit hosts.  Unfortunately since moving to 
kvm-autotest I no longer test on 32-bit, I'll try to improve the 
situation there.


If someone has spare cycles and can run kvm-autotest on their hardware, 
that would improve kvm quality measurably.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [KVM PATCH v7 0/5] irqfd fixes and enhancements

2009-07-01 Thread Michael S. Tsirkin

On Wed, Jul 01, 2009 at 11:53:26AM +0300, Avi Kivity wrote:
 On 06/29/2009 09:28 PM, Gregory Haskins wrote:
 (Applies to kvm.git/master:4631e094)

 The following is the latest attempt to fix the races in irqfd/eventfd, as
 well as restore DEASSIGN support.  For more details, please read the patch
 headers.  I've restored the slow-work variant of the logic as a separate
 patch (5/5) so we can get a better idea of the true difference between
 the workqueue approach and the slow-work style.  I am personally in favor
 of the slow-work approach since it doesnt require a mostly-idle+dedicated
 thread to hang around.  But that is just me.  5/5 can be ignored, folded
 in to 3/5+4/5 as appropriate, or merged as is per the whim of Avi et. al.


 I think a good compromise would be to create the workqueue when the  
 first VM is launched.

 As always, this series has been tested against the kvm-eventfd unit test
 with both 5/5 applied and unapplied, and everything appears to be
 functioning properly. You can download this test here:

 ftp://ftp.novell.com/dev/ghaskins/kvm-eventfd.tar.bz2

 I've included version 4 of Davide's eventfd patch (ported to kvm.git) so
 that its a complete reviewable series.  Note, however, that there may be
 later versions of his patch to consider for merging, so we should
 coordinate with him.


 Davide's patch was merged upstream.  Please take a look to make sure  
 this patchset is compatible with what was merged.

 Patchset looks good to me, but I'd appreciate an ack from Michael (or  
 anyone else who's interested).

For patches 1,3,4:
Acked-by: Michael S. Tsirkin m...@redhat.com

As for 5: IMHO, let's keep it simple meanwhile.


-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: BUG Report: config KVM selects PREEMPT_NOTIFIERS, but it should also select PREEMPT (Vanilla Kernel 2.6.30)

2009-07-01 Thread Avi Kivity


On 06/29/2009 06:18 PM, Walther Maldonado wrote:

Good day,

Today when attempting to compile and install the KVM module (kernel 2.6.30), 
inserting the module would error with a number of unresolved symbols such as 
preempt_notifier_register() not found.

Some code-browsing revealed that, even though the kvm KConfig setting KVM 
does select PREEMPT_NOTIFIERS, in include/linux/preempt.h, said define will not cause the 
expected functions to be defined unless CONFIG_PREEMPT is also selected.

   


Not in my copy of v2.6.30.  preempt.h only guards the preempt notifier 
symbols with CONFIG_PREEMPT_NOTIFIER.


I built sched.o with CONFIG_PREEMPT_NONE=y and see:

[...@cleopatra linux-2.6 (v2.6.30)]$ nm kernel/sched.o  | grep 
preempt_notifier

c559bf4c A __crc_preempt_notifier_register
8be61cbb A __crc_preempt_notifier_unregister
0028 r __kcrctab_preempt_notifier_register
0020 r __kcrctab_preempt_notifier_unregister
0232 r __kstrtab_preempt_notifier_register
0216 r __kstrtab_preempt_notifier_unregister
0050 r __ksymtab_preempt_notifier_register
0040 r __ksymtab_preempt_notifier_unregister
0c77 T preempt_notifier_register
0caf T preempt_notifier_unregister

Perhaps you have a stale kernel installed?  Try reinstalling the kernel 
and modules and rebooting.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [KVM PATCH v6 3/4] KVM: Fix races in irqfd using new eventfd_kref_get interface

2009-07-01 Thread Avi Kivity


On 06/29/2009 07:52 PM, Gregory Haskins wrote:



   One idea I had to make it even clearer was to have a shutdown list
of irqfds per-kvm, together with the items list, and make work_struct for
shutdown global, not per-irqfd.  We can then unconditionally do
list_move + schedule_work to shut down an irqfd, and it's safe to do
even if it is already on the shutdown list - it just gets moved to tail.

 


Hmm..I'm not sure that churn really buys us anything, tho.  Technically
the active bit is redundant with list_del_init()+list_empty() that I
employed in previous versions.  However, I made it explicit with the
active bit to be more self-documenting.  IMO, the latest code is pretty
clear, and the change you are proposing is moving towards a slightly
trickier variant like I originally had.  I'd say lets leave this as is.
   


Could retain self documentation by introducing a helper irqfd_active() 
which does the list_blah() magic.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] qemu-kvm: fix KVMs GET_SUPPORTED_CPUID feature usage

2009-07-01 Thread Avi Kivity


On 06/25/2009 04:12 PM, Andre Przywara wrote:

If we want to trim the user provided CPUID bits for KVM to be not greater
than that of the host, we should
a) not remove the bits _after_ we sent them to the kernel  and
b) not do the bitmangling on the original values while sending a copy
This fixes the masking of features that are not present on the host and
helps to use -cpu host.

   


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 3/4] ignore PCI ECS I/O enablement

2009-07-01 Thread Avi Kivity


On 06/24/2009 04:37 PM, Andre Przywara wrote:

Linux guests will try to enable access to the extended PCI config space
via the I/O ports 0xCF8/0xCFC on AMD Fam10h CPU. Since we (currently?)
don't use ECS, simply ignore write and read attempts.
   


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] qemu-kvm: fix typo in configure

2009-07-01 Thread Michael S. Tsirkin

Kill extra \). Also escape ! for clarity and bourne shell
compatibility.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---

This patch fixes configure on next for me

 configure |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/configure b/configure
index f76f511..b62e3d7 100755
--- a/configure
+++ b/configure
@@ -2141,10 +2141,10 @@ configure_kvm() {
 }
 
 # Make sure the target and host cpus are compatible
-if test ! \( $target_cpu = $cpu -o \
+if test \! \( $target_cpu = $cpu -o \
   \( $target_cpu = ppcemb -a $cpu = ppc \) -o \
-  \( $target_cpu = x86_64 -a $cpu = i386   \) -o \
-  \( $target_cpu = i386   -a $cpu = x86_64 \) \) -o \
+  \( $target_cpu = x86_64 -a $cpu = i386 \) -o \
+  \( $target_cpu = i386   -a $cpu = x86_64 \) -o \
   \( $target_cpu = ia64   -a $cpu = ia64 \) \) ; then
   target_kvm=no
 fi
-- 
1.6.2.2
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: KVM and 32-bit hosts -- still supposed to work?

2009-07-01 Thread Alexander Graf






On 01.07.2009, at 11:05, Avi Kivity a...@redhat.com wrote:


On 07/01/2009 08:57 AM, Duck wrote:
I'm on 32-bit Linux, kernel 2.6.27.7-smp. When I moved from kvm-83  
to kvm-87 plus kvm-kmod-devel-87, my Linux host VMs ran fine. But  
my XP0 host simply ran too slowly to be useable at all, and my  
Windows 7 host wouldn't boot -- just crashed and restarted early in  
the boot process.


Are you sure kvm was loaded?  too slowly often means qemu is  
emulating.


Check 'info kvm' in the monitor.

Seeing the module version -devel-87 I tried kvm-87 with the old  
kvm-83 kernel module (I have no idea whether this is supposed to  
work) and my XP image worked better, but a task such as opening  
Device Manager (to see if any hardware has changed) took several  
_minutes_ (it should take a second or so)) and still didn't work  
correctly.


Kvm-87 with the -83 kernel module also persuaded Windows 7 to boot,  
to report new devices installed', and thereafter to work with  
kvm-87 plus kvm-kmod-devel-87. (Don't you love Windows's driver  
inflexibility :-)


So I presume that this is all down to virtual hardware changes  
since -83.


Before I rebuild my tired old XP0 image, however, and adopt kvm-87  
for evermore, I just want to know if it's _supposed_ to work on 32- 
bit (Avi's post about the broken 32-bit compile of -87 due to no 32- 
bit test build system seemed to imply that 32-bit hosts are  
considered passe).


Or should I stick with the older kvm until I upgrade my OS to 64-bit?


kvm is supported on 32-bit hosts.  Unfortunately since moving to kvm- 
autotest I no longer test on 32-bit, I'll try to improve the  
situation there.


If someone has spare cycles and can run kvm-autotest on their  
hardware, that would improve kvm quality measurably.


Can't you just run the tests in a 32 bit VM? :)

Alex



--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [KVM-AUTOTEST PATCH] Adding iperf test

2009-07-01 Thread Alexey Eremenko

On Tue, Jun 30, 2009 at 11:52 AM, sudhir kumarsmalik...@gmail.com wrote:
 On Wed, Jun 17, 2009 at 2:59 AM, Lucas Meneghel Rodriguesl...@redhat.com 
 wrote:
 Adding iperf network performance test. Basically it tests
 networking functionality, stability and performance of guest OSes.
 This test is cross-platform -- i.e. it works on both Linux and
 Windows VMs.


 I have a question here. Why are we adding iperf in a way different
 than other tests ? We have client/tests/different_tests directory
 for each test which contains the python modules and the test tarball.
 Then why in case of iperf we are putting it under client/tests/kvm and
 modifying kvm.py instead of putting the testsuit as part of
 autotest(run_autotest is not enough?)? Even if we do not want to touch
 the existing iperf test in autotest we can use a separate name like
 kvm_iperf. Somehow I have a feeling that there was a discussion on the
 list for keeping tests under a particular directory. But still I feel
 that should be only for tests specific to KVM and not the guest. Is
 there any disadvantage of using the current approach of executing
 these testsuits ?

The reason to put my test under kvm/ test, is because it depends on
KVM-Autotest framework, not just on generic Autotest framework.

In addition, the test is cross-platform on the guest side, currently
supporting Windows and Linux guests, with possibility to support
Solaris and BSD in future.

LMR: me too, hate putting binaries in source tree, but the alternative
option is to provide separate *.tar.bz2 for all the binary utils, and
I don't sure which way is better.

-- 
-Alexey Eromenko
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/2] qemu: fix up load for msi-x

2009-07-01 Thread Michael S. Tsirkin

Add missing load of control vector in virtio

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/virtio.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/hw/virtio.c b/hw/virtio.c
index 843be86..e9dd7a0 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -665,6 +665,9 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f)
 vdev-config_len = qemu_get_be32(f);
 qemu_get_buffer(f, vdev-config, vdev-config_len);
 
+if (vdev-nvectors)
+qemu_get_be16s(f, vdev-config_vector);
+
 num = qemu_get_be32(f);
 
 for (i = 0; i  num; i++) {
-- 
1.6.2.2
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/2] fix segfault in msix_save

2009-07-01 Thread Michael S. Tsirkin

This fixes segfault reported by Kevin Wolf,
and simplifies the code in msix_save.

Reported-by: Kevin Wolf kw...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/msix.c |   12 +++-
 1 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/hw/msix.c b/hw/msix.c
index 4ab6da6..98c62a5 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -284,11 +284,13 @@ int msix_uninit(PCIDevice *dev)
 
 void msix_save(PCIDevice *dev, QEMUFile *f)
 {
-unsigned nentries = (pci_get_word(dev-config + PCI_MSIX_FLAGS) 
- PCI_MSIX_FLAGS_QSIZE) + 1;
-qemu_put_buffer(f, dev-msix_table_page, nentries * MSIX_ENTRY_SIZE);
-qemu_put_buffer(f, dev-msix_table_page + MSIX_PAGE_PENDING,
-(nentries + 7) / 8);
+unsigned n = dev-msix_entries_nr;
+
+if (!dev-cap_present  QEMU_PCI_CAP_MSIX)
+return;
+
+qemu_put_buffer(f, dev-msix_table_page, n * MSIX_ENTRY_SIZE);
+qemu_put_buffer(f, dev-msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
 }
 
 /* Should be called after restoring the config space. */
-- 
1.6.2.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/2] save/load fixes

2009-07-01 Thread Michael S. Tsirkin

Fixes a couple of save/load issues with qemu.git

Michael S. Tsirkin (2):
  fix segfault in msix_save
  qemu: fix up load for msi-x

 hw/msix.c   |   12 +++-
 hw/virtio.c |3 +++
 2 files changed, 10 insertions(+), 5 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCHv5 0/6] kvm: locking and API rework for iosignalfd

2009-07-01 Thread Gregory Haskins

Avi Kivity wrote:
 On 06/29/2009 10:23 PM, Michael S. Tsirkin wrote:
 This series switches kvm_io_device to slots_lock from kvm-lock mutex,
 and uses that to rework io bus API. This takes less locks on data path,
 and uses less lines of code. These changes will also be useful to serve
 as basis for Greg's iosignalfd work.


 Applied all, thanks.

I'll get to work rebasing iosignalfd now.  Thanks guys!

-Greg



signature.asc
Description: OpenPGP digital signature

Re: [PATCH 1/2] fix segfault in msix_save

2009-07-01 Thread Michael S. Tsirkin

On Wed, Jul 01, 2009 at 02:50:14PM +0300, Michael S. Tsirkin wrote:
 This fixes segfault reported by Kevin Wolf,
 and simplifies the code in msix_save.
 
 Reported-by: Kevin Wolf kw...@redhat.com
 Signed-off-by: Michael S. Tsirkin m...@redhat.com

For qemu-kvm, this supercedes e347f89a3d4773dfc22d8874c9906453d54768c7.

-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/2] fix segfault in msix_save

2009-07-01 Thread Kevin Wolf

Michael S. Tsirkin schrieb:
 This fixes segfault reported by Kevin Wolf,
 and simplifies the code in msix_save.
 
 Reported-by: Kevin Wolf kw...@redhat.com
 Signed-off-by: Michael S. Tsirkin m...@redhat.com

I can confirm that this fixes the segfault for me.

Kevin
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCHv2 0/2] save/load fixes

2009-07-01 Thread Michael S. Tsirkin

Fixes a couple of save/load issues with qemu.git
Changes since v2:
  Includes a better fix for virtio.

Michael S. Tsirkin (2):
  fix segfault in msix_save
  qemu: remove control vector save

 hw/msix.c   |   12 +++-
 hw/virtio.c |3 ---
 2 files changed, 7 insertions(+), 8 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCHv2 2/2] qemu: remove control vector save

2009-07-01 Thread Michael S. Tsirkin

control vector is saved/restored by virtio-pci,
it does not belong in virtio.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/virtio.c |3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/hw/virtio.c b/hw/virtio.c
index 843be86..41e7ca2 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -626,9 +626,6 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
 qemu_put_be32(f, vdev-config_len);
 qemu_put_buffer(f, vdev-config, vdev-config_len);
 
-if (vdev-nvectors)
-qemu_put_be16s(f, vdev-config_vector);
-
 for (i = 0; i  VIRTIO_PCI_QUEUE_MAX; i++) {
 if (vdev-vq[i].vring.num == 0)
 break;
-- 
1.6.2.2
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCHv2 1/2] fix segfault in msix_save

2009-07-01 Thread Michael S. Tsirkin

This fixes segfault reported by Kevin Wolf,
and simplifies the code in msix_save.

Reported-by: Kevin Wolf kw...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/msix.c |   12 +++-
 1 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/hw/msix.c b/hw/msix.c
index 4ab6da6..98c62a5 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -284,11 +284,13 @@ int msix_uninit(PCIDevice *dev)
 
 void msix_save(PCIDevice *dev, QEMUFile *f)
 {
-unsigned nentries = (pci_get_word(dev-config + PCI_MSIX_FLAGS) 
- PCI_MSIX_FLAGS_QSIZE) + 1;
-qemu_put_buffer(f, dev-msix_table_page, nentries * MSIX_ENTRY_SIZE);
-qemu_put_buffer(f, dev-msix_table_page + MSIX_PAGE_PENDING,
-(nentries + 7) / 8);
+unsigned n = dev-msix_entries_nr;
+
+if (!dev-cap_present  QEMU_PCI_CAP_MSIX)
+return;
+
+qemu_put_buffer(f, dev-msix_table_page, n * MSIX_ENTRY_SIZE);
+qemu_put_buffer(f, dev-msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
 }
 
 /* Should be called after restoring the config space. */
-- 
1.6.2.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: BUG Report: config KVM selects PREEMPT_NOTIFIERS, but it should also select PREEMPT (Vanilla Kernel 2.6.30)

2009-07-01 Thread Walther Maldonado

Good day,

Alright, my bad. After closer studying I realized I had misread where the 
defines were delimiting each function. :/ Sorry for the invalid report (and 
yes, I had other issues setting up and running the modules, but I was pretty 
sure the missing symbols were caused because of the define, which turned out to 
be false).

Cheers~
Walther.

On Wed, 1 Jul 2009 11:41:38 +0200
Avi Kivity a...@redhat.com wrote:

 On 06/29/2009 06:18 PM, Walther Maldonado wrote:
  Good day,
 
  Today when attempting to compile and install the KVM module (kernel 
  2.6.30), inserting the module would error with a number of unresolved 
  symbols such as preempt_notifier_register() not found.
 
  Some code-browsing revealed that, even though the kvm KConfig setting KVM 
  does select PREEMPT_NOTIFIERS, in include/linux/preempt.h, said define will 
  not cause the expected functions to be defined unless CONFIG_PREEMPT is 
  also selected.
 
 
 
 Not in my copy of v2.6.30.  preempt.h only guards the preempt notifier 
 symbols with CONFIG_PREEMPT_NOTIFIER.
 
 I built sched.o with CONFIG_PREEMPT_NONE=y and see:
 
 [...@cleopatra linux-2.6 (v2.6.30)]$ nm kernel/sched.o  | grep 
 preempt_notifier
 c559bf4c A __crc_preempt_notifier_register
 8be61cbb A __crc_preempt_notifier_unregister
 0028 r __kcrctab_preempt_notifier_register
 0020 r __kcrctab_preempt_notifier_unregister
 0232 r __kstrtab_preempt_notifier_register
 0216 r __kstrtab_preempt_notifier_unregister
 0050 r __ksymtab_preempt_notifier_register
 0040 r __ksymtab_preempt_notifier_unregister
 0c77 T preempt_notifier_register
 0caf T preempt_notifier_unregister
 
 Perhaps you have a stale kernel installed?  Try reinstalling the kernel 
 and modules and rebooting.
 
 -- 
 error compiling committee.c: too many arguments to function
 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v5] enable x2APIC without interrupt remapping under KVM

2009-07-01 Thread Gleb Natapov

KVM would like to provide x2APIC interface to a guest without emulating
interrupt remapping device. The reason KVM prefers guest to use x2APIC
is that x2APIC interface is better virtualizable and provides better
performance than mmio xAPIC interface:

- msr exits are faster than mmio (no page table walk, emulation)
- no need to read back ICR to look at the busy bit
- one 64 bit ICR write instead of two 32 bit writes
- shared code with the Hyper-V paravirt interface

Included patch changes x2APIC enabling logic to enable it even if IR
initialization failed, but kernel runs under KVM and no apic id is
greater than 255 (if there is one spec requires BIOS to move to x2apic
mode before starting an OS).

Signed-off-by: Gleb Natapov g...@redhat.com
---

This version does not rely on x2apic_preenabled to determine max APIC id
since this logic will break after kexec. Use max_physical_apicid instead.
Also allow to override apic ops after call to enable_IR_x2apic() since
we may decide to use physical mode instead of cluster.

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 8c7c042..db0c07f 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -49,6 +49,7 @@
 #include asm/mtrr.h
 #include asm/smp.h
 #include asm/mce.h
+#include asm/kvm_para.h
 
 unsigned int num_processors;
 
@@ -1363,52 +1364,74 @@ void enable_x2apic(void)
 }
 #endif /* CONFIG_X86_X2APIC */
 
-void __init enable_IR_x2apic(void)
+int __init enable_IR(void)
 {
 #ifdef CONFIG_INTR_REMAP
int ret;
-   unsigned long flags;
-   struct IO_APIC_route_entry **ioapic_entries = NULL;
 
ret = dmar_table_init();
if (ret) {
pr_debug(dmar_table_init() failed with %d:\n, ret);
-   goto ir_failed;
+   return 0;
}
 
if (!intr_remapping_supported()) {
pr_debug(intr-remapping not supported\n);
-   goto ir_failed;
+   return 0;
}
 
-
if (!x2apic_preenabled  skip_ioapic_setup) {
pr_info(Skipped enabling intr-remap because of skipping 
io-apic setup\n);
-   return;
+   return 0;
}
 
+   if (enable_intr_remapping(x2apic_supported()))
+   return 0;
+
+   pr_info(Enabled Interrupt-remapping\n);
+
+   return 1;
+
+#endif
+   return 0;
+}
+
+void __init enable_IR_x2apic(void)
+{
+   unsigned long flags;
+   struct IO_APIC_route_entry **ioapic_entries = NULL;
+   int ret, x2apic_enabled = 0;
+
ioapic_entries = alloc_ioapic_entries();
if (!ioapic_entries) {
-   pr_info(Allocate ioapic_entries failed: %d\n, ret);
-   goto end;
+pr_info(Allocate ioapic_entries failed\n);
+goto out;
}
 
ret = save_IO_APIC_setup(ioapic_entries);
if (ret) {
pr_info(Saving IO-APIC state failed: %d\n, ret);
-   goto end;
+   goto out;
}
 
local_irq_save(flags);
-   mask_IO_APIC_setup(ioapic_entries);
mask_8259A();
+   mask_IO_APIC_setup(ioapic_entries);
 
-   ret = enable_intr_remapping(x2apic_supported());
-   if (ret)
-   goto end_restore;
+   ret = enable_IR();
+   if (!ret) {
+   /* IR is required if x2apic is enabled by BIOS even
+* when running in kvm since this indicates present
+* of APIC ID  255 */
+   if (max_physical_apicid  255 || !kvm_para_available())
+   goto nox2apic;
+   /* without IR all CPUs can be addressed by IOAPIC/MSI
+* only in physical mode */
+   x2apic_phys = 1;
+   }
 
-   pr_info(Enabled Interrupt-remapping\n);
+   x2apic_enabled = 1;
 
if (x2apic_supported()  !x2apic_mode) {
x2apic_mode = 1;
@@ -1416,41 +1439,30 @@ void __init enable_IR_x2apic(void)
pr_info(Enabled x2apic\n);
}
 
-end_restore:
-   if (ret)
-   /*
-* IR enabling failed
-*/
+nox2apic:
+   if (!ret) /* IR enabling failed */
restore_IO_APIC_setup(ioapic_entries);
-
unmask_8259A();
local_irq_restore(flags);
 
-end:
+out:
if (ioapic_entries)
free_ioapic_entries(ioapic_entries);
 
-   if (!ret)
+   if (x2apic_enabled)
return;
 
-ir_failed:
-   if (x2apic_preenabled)
+   if (x2apic_preenabled) {
+#ifdef CONFIG_INTR_REMAP
panic(x2apic enabled by bios. But IR enabling failed);
-   else if (cpu_has_x2apic)
-   pr_info(Not enabling x2apic,Intr-remapping\n);
 #else
-   if (!cpu_has_x2apic)
-   return;
-
-   if (x2apic_preenabled)
panic(x2apic enabled prior OS handover,
-  enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP);
+

Re: [PATCH v3] enable x2APIC without interrupt remapping under KVM

2009-07-01 Thread Eric W. Biederman

Avi Kivity a...@redhat.com writes:

 On 06/30/2009 10:36 PM, Eric W. Biederman wrote:
 The short version is I don't know what work arounds we will ultimately
 decide to deploy to work with real hardware.

 I have been seriously contemplating causing a cpu hot-unplug request
 to fail if we are in ioapic mode and we have irqs routed to the cpu
 that is being unplugged.


 Well, obviously we need to disassociate any irqs from such a cpu.  Could be 
 done
 from the kernel or only enforced by the kernel.
  

 Using the normal irq migration path we can move irqs off of a cpu reliably
 there just aren't any progress guarantees.


 Program the ioapic to the new cpu.  Wait a few milliseconds.  If it takes more
 than that to get an interrupt from the ioapic to the local apic, the machine 
 has
 much bigger problems.

In general you can not reprogram an ioapic safely unless the interrupt
is blocked at the source.  Which is why you either need the originating
device disabled or you have to do it in interrupt context.

I forget all of the details.  I just know in real hardware I experimented
with it a lot, and wound up hanging the ioapic state machine of both
intel and amd ioapics.

Migrating ioapic irqs in interrupt context sucks.  It just happens to
be what works reliably.

I do think the wait an eternity in computer time a short while in human
time is a valid technique when you can do nothing better.  If flushing the
interrupt was my only problem that would solve it.

 Even with perfectly working hardware it is not possible in the general
 case to migrate an ioapic irq from one cpu to another outside of an
 interrupt handler without without risking dropping an interrupt.


 Can't you generate a spurious interrupt immediately after the migration?  An
 extra interrupt shouldn't hurt.
  

 Nope.  The ioapics can't be told to send an interrupt.


 You can program the local apic ICR to generate an interrupt with the same
 vector.

But you can not program the apic ICR to generate a level triggered
interrupt with the same vector.  So you don't get the broadcast
behavior when you ack the apic.

 There is no general way to know you have seen the last interrupt
 floating around your system.  PCI ordering rules don't help because
 the ioapics can potentially take an out of band channel.


 Can you describe the problem scenario? an ioapic-lapic message delivered 
 to a
 dead cpu?
  

 Dropped irqs..  Driver hangs because it is waiting for an irq.  Hardware
 hangs because it is waiting for the cpu to process the irq.

 Potentially we get a level triggered irq that is never acked by
 the cpu that won't arm until the cpu send an ack, and we can't
 send an ack from another cpu.



 I think a spurious interrupt generated through the local apic solves that
 problem.  For level-triggered interrupts, mask them before offlining the cpu.

So now we have a masked unacked irq.  It doesn't help in the slightest
that the cpu migration code puts irq migration last and request that we
do it all with interrupts disabled.

You might be right that by application of extreme ingenuity and completely
in spec ioapics there is a path that makes this all work.  Currently I
don't have fully in spec ioapcis, and I don't have anyone interested enough
in cpu hotplug to be willing to rip things apart until interrupt migration
is a reasonable deal on x86.   Instead all I see are patches that mitigate
the worst of the brokenness.

At the same time with the interrupt remapping hardware because it doesn't
need the irq disabled at the source when we reprogram it I can make
everything stable much more easily.

Eric
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Newbie, struggling with graphics and qemu monitor

2009-07-01 Thread Michael Jinks

(Thanks Avi, and all.  Sorry if my questions are basic.  More of them below.)

On Wed, Jul 1, 2009 at 3:43 AM, Avi Kivitya...@redhat.com wrote:
 On 06/30/2009 09:33 PM, Michael Jinks wrote:

 I've looked but haven't found any way to attach a monitor process to
 an already-running guest.  Is this possible?  Or do we only get a
 monitor as a child process of the initial kvm invocation?

 Look up the -monitor option.

Aha: I'd been looking in the kvm man page, which on my system looks
a lot like, but isn't exactly like, the qemu man page.  The qemu one
has the -monitor option.  (Is that a bug?  Should I pester the package
maintainers for my distro?)

But now that I've read it I don't understand it.  Quoting:

   -monitor dev
   Redirect the monitor to host device dev (same devices as the serial
   port).  The default device is vc in graphical mode and stdio in
   non graphical mode.

On my KVM host machine, there's no /dev/vc, even though I do have one
guest running.  I have /dev/vcs, /dev/vcs(1-12) and /dev/vcsa(1-7),
all major number 7 and owned by the tty group so they look
promising, but fuser doesn't show any of them being in use.

At any rate assuming that I started up a guest with, say, -monitor
/dev/vcs1, how would I attach to that device afterward?  Does the kvm
command provide a way to do it or would I use a serial port handler,
like minicom?  Is there a default baud rate?

Again sorry if these are dumb questions, maybe I'm just not searching
on the right strings.

 If you run qemu in screen, use -vnc.  X and screen don't mix.

Okay, good to know.  But I'm still having the problem where vnc
clients crash unless the guest is in text-only mode, and I assume
that's going to be a problem when I try to install Windows guests.

Here's my VNC client output from booting a Gentoo install CD,
attaching VNC, and letting the guest boot into framebuffer mode.
Attachment works fine but the client crashes when the framebuffer
initializes, with Rect too large:

[...]
  True colour: max red 255 green 255 blue 255, shift red 16 green 8 blue 0
Using shared memory PutImage
Tunneling active: preferring tight encoding
Rect too large: 1024x1 at (0, 0)

...and here's the (trimmed) output from waiting until the system is
done booting before trying to attach VNC:

[...]
  True colour: max red 255 green 255 blue 255, shift red 16 green 8 blue 0
Using shared memory PutImage
Tunneling active: preferring tight encoding
Zero size rect - ignoring
Zero size rect - ignoring
Zero size rect - ignoring
Rect too large: 1x0 at (3, 1024)
ShmCleanup called


Any recommendations for clients or options to try?  Does the tightvnc
client have known issues when attaching to a qemu server?
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Exception handling between QEMU and KVM

2009-07-01 Thread Christoffer Dall

OK, my question is this:

If I want to tell QEMU to quit from within the KVM_RUN ioctl in a way
that causes QEMU to exit gracefully (e.g. free any allocated memory
etc.) what is the way to do that?

I have tried setting EXIT_REASON to both KVM_EXIT_EXCEPTION and
KVM_EXIT_SHUTDOWN, but QEMU stays in the execution loop and
re-executes the machine over and over.

Is this some logic that has to be done architecture specifically in
kvm_arch_post_run(...) or ?

And yes, any exceptions should be handled in KVM, but in the case of
an unrecoverable error I'm left with reporting this to the user and
end the QEMU process, right?

Thanks,
Christoffer

On Wed, Jul 1, 2009 at 5:00 AM, Avi Kivitya...@redhat.com wrote:
 On 07/01/2009 06:28 AM, Christoffer Dall wrote:

 Hi all.

 We are still working on our ARM port of KVM and we are managing to let
 the guest decompress the kernel image and run from relocated address
 and we can support enabling MMU before this.

 Howver, to debug relocated micro-hypervisor for exception handling, we
 are trying to implement some NOT_IMPLEMENTED() macros and ASSERT()
 macros.

 What we have done so far is simply to exit QEMU roughly after
 returning -EINVAL from the KVM_RUN system call, but before we start
 supporting an interrupt cycle we have to improve on this. Setting
 kvm_run-exit_reason = KVM_EXIT_EXCEPTION or kvm_run-exit_reason =
 KVM_EXIT_SHUTDOWN just results in QEMU looping in the cpu execution
 loop.

 Can someone point us in the direction of what we're missing here in
 order to exit the whole QEMU process from an exception occurring
 inside the guest?


 Not sure I understand.  Can't you call exit() or abort() after kvm returns
 an error?

 --
 error compiling committee.c: too many arguments to function


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Autotest] Adding kvm_subprocess

2009-07-01 Thread Lucas Meneghel Rodrigues

Hi Michael,

On Thu, 2009-06-18 at 11:27 -0400, Michael Goldish wrote:
 kvm_subprocess is a little weird in that it does two different things --
 handling of both non-interactive subprocesses and SSH sessions.
 With this approach I don't think we need to write an SSH subclass of
 kvm_spawn because it already does a lot of SSH stuff.

Fair enough. I just happen to like the approach took by pexpect folks to
separate SSH implementation on a subclass, because the generic problem
itself is 'interacting with interactive programs on a programatic way',
and handling SSH connections is a subset of this problem.

 If we do any subclassing at all -- I suggest that we remove all the SSH
 stuff from kvm_spawn and put it in a subclass somehow, so that kvm_spawn
 natively only handles non-interactive subprocess (with _tail(),
 get_output(), get_status() etc), and the subclass does everything else
 (read_up_to_prompt(), get_command_status_output() etc).  ssh_login()
 can remain an external function that creates and returns a kvm_spawn
 object.
 This will have to be done carefully because each 'user' of the kvm_spawn
 server needs a named pipe of its own, which will have to be handled by
 the constructor.
 If you think this is a good idea I'd rather make the necessary changes
 to kvm_subprocess myself.

Sounds good to me, I am totally OK with it.

 Does this make sense to you, or did I misunderstand what you meant by
 writing an SSH subclass?

No, you've nailed it. Sorry for the delay in answering.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

VM config file

2009-07-01 Thread Zhang Qian

Hi,

I'd like to know if there is a configuration file for each KVM virtual
machine? I can define a domain by virsh define command, but where
the configuraiton parameters are persisted for the VM?

Thanks in advance!



Regards,
Qian
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Newbie, struggling with graphics and qemu monitor

2009-07-01 Thread Avi Kivity


On 07/01/2009 04:56 PM, Michael Jinks wrote:

(Thanks Avi, and all.  Sorry if my questions are basic.  More of them below.)

On Wed, Jul 1, 2009 at 3:43 AM, Avi Kivitya...@redhat.com  wrote:
   

On 06/30/2009 09:33 PM, Michael Jinks wrote:
 

I've looked but haven't found any way to attach a monitor process to
an already-running guest.  Is this possible?  Or do we only get a
monitor as a child process of the initial kvm invocation?
   

Look up the -monitor option.
 


Aha: I'd been looking in the kvm man page, which on my system looks
a lot like, but isn't exactly like, the qemu man page.  The qemu one
has the -monitor option.  (Is that a bug?  Should I pester the package
maintainers for my distro?)

   


No idea, really.


But now that I've read it I don't understand it.  Quoting:

-monitor dev
Redirect the monitor to host device dev (same devices as the serial
port).  The default device is vc in graphical mode and stdio in
non graphical mode.

On my KVM host machine, there's no /dev/vc, even though I do have one
guest running.  I have /dev/vcs, /dev/vcs(1-12) and /dev/vcsa(1-7),
all major number 7 and owned by the tty group so they look
promising, but fuser doesn't show any of them being in use.

At any rate assuming that I started up a guest with, say, -monitor
/dev/vcs1, how would I attach to that device afterward?  Does the kvm
command provide a way to do it or would I use a serial port handler,
like minicom?  Is there a default baud rate?
   


vc is something internal to qemu (can reach it using alt-ctrl-2).  Try 
-monitor stdio or -monitor tcp::4321 (and telnet to it).



If you run qemu in screen, use -vnc.  X and screen don't mix.
 


Okay, good to know.  But I'm still having the problem where vnc
clients crash unless the guest is in text-only mode, and I assume
that's going to be a problem when I try to install Windows guests.

Here's my VNC client output from booting a Gentoo install CD,
attaching VNC, and letting the guest boot into framebuffer mode.
Attachment works fine but the client crashes when the framebuffer
initializes, with Rect too large:


   


vnc and broken vnc clients don't mix.


Any recommendations for clients or options to try?  Does the tightvnc
client have known issues when attaching to a qemu server?
   


I use vinagre.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Exception handling between QEMU and KVM

2009-07-01 Thread Avi Kivity


On 07/01/2009 04:59 PM, Christoffer Dall wrote:

OK, my question is this:

If I want to tell QEMU to quit from within the KVM_RUN ioctl in a way
that causes QEMU to exit gracefully (e.g. free any allocated memory
etc.) what is the way to do that?
   


Have KVM_RUN return -ESOMETHING, qemu should abort when it sees that.


I have tried setting EXIT_REASON to both KVM_EXIT_EXCEPTION and
KVM_EXIT_SHUTDOWN, but QEMU stays in the execution loop and
re-executes the machine over and over.

Is this some logic that has to be done architecture specifically in
kvm_arch_post_run(...) or ?
   


I don't remember exactly.  It also depends on what version of the source 
you're looking at.



And yes, any exceptions should be handled in KVM, but in the case of
an unrecoverable error I'm left with reporting this to the user and
end the QEMU process, right?
   


Yes.  Current sources pause the vm so you can inspect guest state via 
the monitor.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: KVM and 32-bit hosts -- still supposed to work?

2009-07-01 Thread Avi Kivity


On 07/01/2009 02:38 PM, Alexander Graf wrote:
kvm is supported on 32-bit hosts.  Unfortunately since moving to 
kvm-autotest I no longer test on 32-bit, I'll try to improve the 
situation there.


If someone has spare cycles and can run kvm-autotest on their 
hardware, that would improve kvm quality measurably.


Can't you just run the tests in a 32 bit VM? :)


I'm afraid.  I don't know what I'm more afraid of: that it won't work, 
or that it will work.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Device assignment hotplug broken

2009-07-01 Thread Markus Armbruster

Sheng Yang sh...@linux.intel.com writes:

 On Tuesday 30 June 2009 20:28:08 Markus Armbruster wrote:
 Sheng Yang sh...@linux.intel.com writes:
  On Friday 26 June 2009 01:24:03 Avi Kivity wrote:
  The impact of the hotplug changes on device assignment were too
  difficult for me to fix up during the merge, so I disabled it
  temporarily.  Please take a look at qemu-kvm.git commit a3b371477e3.
 
  When the device assignment can come back? 4 day passed and no message
  from Markus Armbruster till now.

 I'm having difficulties testing my patch.  If it works for you, please
 let me know.  Once I'm satisfied it works, I'll post it properly.

 Hi Markus

 In my (limited) test, the patch works well.

 Thanks!

Many thanks for testing this.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] Update registers after INIT/SIPI

2009-07-01 Thread Gleb Natapov

Load updated register into kernel after INIT/SIPI. Otherwise
vcpu starts at the wrong address after SIPI.

Signed-off-by: Gleb Natapov g...@redhat.com
diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 1eb147e..5d3025a 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -1578,8 +1578,10 @@ uint32_t kvm_arch_get_supported_cpuid(CPUState *env, 
uint32_t function,
 
 void kvm_arch_process_irqchip_events(CPUState *env)
 {
+kvm_arch_save_regs(env);
 if (env-interrupt_request  CPU_INTERRUPT_INIT)
 do_cpu_init(env);
 if (env-interrupt_request  CPU_INTERRUPT_SIPI)
 do_cpu_sipi(env);
+kvm_arch_load_regs(env);
 }
--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Update registers after INIT/SIPI

2009-07-01 Thread Avi Kivity


On 07/01/2009 06:48 PM, Gleb Natapov wrote:

Load updated register into kernel after INIT/SIPI. Otherwise
vcpu starts at the wrong address after SIPI.
   


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [KVM-AUTOTEST PATCH] Adding iperf test

2009-07-01 Thread Lucas Meneghel Rodrigues

On Wed, 2009-07-01 at 14:43 +0300, Alexey Eremenko wrote:
 LMR: me too, hate putting binaries in source tree, but the alternative
 option is to provide separate *.tar.bz2 for all the binary utils, and
 I don't sure which way is better.

Yes, I don't have a clear idea as well. It's currently under
discussion...

Lucas

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[KVM PATCH v8 0/3] irqfd fixes and enhancements

2009-07-01 Thread Gregory Haskins

(Applies to kvm.git/master:beeaacd1)

The following is the latest attempt to fix the races in irqfd/eventfd, as
well as restore DEASSIGN support.  For more details, please read the patch
headers.

As always, this series has been tested against the kvm-eventfd unit test
and everything appears to be functioning properly. You can download this
test here:

ftp://ftp.novell.com/dev/ghaskins/kvm-eventfd.tar.bz2

Kind Regards,
-Greg


[Changelog:

v8:
   *) Rebased to kvm.git/master:beeaacd1)
   *) Dropped Davide's patch (2/5 in v7) since it's now upstream
   *) Folded v7's 1/5 and 3/5 together, and added a single
  eventfd hunk to convert wake_up_locked_polled to wake_up_polled
   *) Dropped irqfd-active bit in favor of irqfd_is_active() function
   *) Cleaned up comments in 1/3
   *) Dropped v7's 5/5 (slow-work)
   *) Added new patch (3/3) which makes the cleanup-wq's creation
  dynamic so to avoid the resource penalty for guests that do
  not use irqfd.

v7:
   *) Addressed minor-nit feedback from Michael
   *) Cleaned up patch headers
   *) Re-added separate slow-work feature patch to end for comparison

v6:
   *) Removed slow-work in favor of using a dedicated single-thread
  workqueue.
   *) Condensed cleanup path to always use deferred shutdown
   *) Saved about 56 lines over v5, with the following diffstat:

   include/linux/kvm_host.h |2 
   virt/kvm/eventfd.c   |  248 
++-
   2 files changed, 97 insertions(+), 153 deletions(-)
v5:
   Untracked..
]

---

Gregory Haskins (3):
  KVM: create irqfd-cleanup-wq on demand
  KVM: add irqfd DEASSIGN feature
  KVM: Fix races in irqfd using new eventfd_kref_get interface


 fs/eventfd.c |7 -
 include/linux/kvm.h  |2 
 include/linux/kvm_host.h |6 +
 virt/kvm/eventfd.c   |  281 --
 4 files changed, 229 insertions(+), 67 deletions(-)

-- 
Signature
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[KVM PATCH v8 1/3] KVM: Fix races in irqfd using new eventfd_kref_get interface

2009-07-01 Thread Gregory Haskins

eventfd currently emits a POLLHUP wakeup on f_ops-release() to generate a
release callback.  This lets eventfd clients know if the eventfd is about
to go away and is very useful particularly for in-kernel clients.  However,
until recently it is not possible to use this feature of eventfd in a
race-free way.

This patch utilizes a new eventfd interface to rectify the problem.  It also
converts the eventfd POLLHUP generation code to use the locked variant
of wakeup.

Signed-off-by: Gregory Haskins ghask...@novell.com
CC: Davide Libenzi davi...@xmailserver.org
---

 fs/eventfd.c |7 --
 include/linux/kvm_host.h |5 +
 virt/kvm/eventfd.c   |  187 --
 3 files changed, 134 insertions(+), 65 deletions(-)

diff --git a/fs/eventfd.c b/fs/eventfd.c
index d9849a1..31d12de 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -105,12 +105,7 @@ static int eventfd_release(struct inode *inode, struct 
file *file)
 {
struct eventfd_ctx *ctx = file-private_data;
 
-   /*
-* No need to hold the lock here, since we are on the file cleanup
-* path and the ones still attached to the wait queue will be
-* serialized by wake_up_locked_poll().
-*/
-   wake_up_locked_poll(ctx-wqh, POLLHUP);
+   wake_up_poll(ctx-wqh, POLLHUP);
eventfd_ctx_put(ctx);
return 0;
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1a8952f..7605bc4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -141,7 +141,10 @@ struct kvm {
struct kvm_io_bus mmio_bus;
struct kvm_io_bus pio_bus;
 #ifdef CONFIG_HAVE_KVM_EVENTFD
-   struct list_head irqfds;
+   struct {
+   spinlock_tlock;
+   struct list_head  items;
+   } irqfds;
 #endif
struct kvm_vm_stat stat;
struct kvm_arch arch;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a9e7de7..05ce447 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -28,7 +28,6 @@
 #include linux/file.h
 #include linux/list.h
 #include linux/eventfd.h
-#include linux/srcu.h
 
 /*
  * 
@@ -37,66 +36,86 @@
  * Credit goes to Avi Kivity for the original idea.
  * 
  */
+
 struct _irqfd {
-   struct mutex  lock;
-   struct srcu_structsrcu;
struct kvm   *kvm;
+   struct eventfd_ctx   *eventfd;
int   gsi;
struct list_head  list;
poll_tablept;
wait_queue_head_t*wqh;
wait_queue_t  wait;
struct work_structinject;
+   struct work_structshutdown;
 };
 
+static struct workqueue_struct *irqfd_cleanup_wq;
+
 static void
 irqfd_inject(struct work_struct *work)
 {
struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
-   struct kvm *kvm;
-   int idx;
-
-   idx = srcu_read_lock(irqfd-srcu);
-
-   kvm = rcu_dereference(irqfd-kvm);
-   if (kvm) {
-   mutex_lock(kvm-irq_lock);
-   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 1);
-   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0);
-   mutex_unlock(kvm-irq_lock);
-   }
+   struct kvm *kvm = irqfd-kvm;
 
-   srcu_read_unlock(irqfd-srcu, idx);
+   mutex_lock(kvm-irq_lock);
+   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 1);
+   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0);
+   mutex_unlock(kvm-irq_lock);
 }
 
+/*
+ * Race-free decouple logic (ordering is critical)
+ */
 static void
-irqfd_disconnect(struct _irqfd *irqfd)
+irqfd_shutdown(struct work_struct *work)
 {
-   struct kvm *kvm;
+   struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
 
-   mutex_lock(irqfd-lock);
+   /*
+* Synchronize with the wait-queue and unhook ourselves to prevent
+* further events.
+*/
+   remove_wait_queue(irqfd-wqh, irqfd-wait);
+
+   /*
+* We know no new events will be scheduled at this point, so block
+* until all previously outstanding events have completed
+*/
+   flush_work(irqfd-inject);
+
+   /*
+* It is now safe to release the object's resources
+*/
+   eventfd_ctx_put(irqfd-eventfd);
+   kfree(irqfd);
+}
 
-   kvm = rcu_dereference(irqfd-kvm);
-   rcu_assign_pointer(irqfd-kvm, NULL);
 
-   mutex_unlock(irqfd-lock);
+/* assumes kvm-irqfds.lock is held */
+static bool
+irqfd_is_active(struct _irqfd *irqfd)
+{
+   return list_empty(irqfd-list) ? false : true;
+}
 
-   if (!kvm)
-   return;
+/*
+ * Mark the irqfd as inactive and schedule it for removal
+ *
+ * assumes kvm-irqfds.lock is held
+ */
+static void

[KVM PATCH v8 2/3] KVM: add irqfd DEASSIGN feature

2009-07-01 Thread Gregory Haskins

DEASSIGN allows us to optionally disassociate an IRQFD from its underlying
eventfd without destroying the eventfd in the process.  This is useful
for conditions like live-migration which may have an eventfd associated
with a device and an IRQFD.  We need to be able to decouple the guest
from the event source while not perturbing the event source itself.

Signed-off-by: Gregory Haskins ghask...@novell.com
CC: Michael S. Tsirkin m...@redhat.com
---

 include/linux/kvm.h |2 ++
 virt/kvm/eventfd.c  |   46 --
 2 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 69d3d73..76c6408 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -461,6 +461,8 @@ struct kvm_x86_mce {
 };
 #endif
 
+#define KVM_IRQFD_FLAG_DEASSIGN (1  0)
+
 struct kvm_irqfd {
__u32 fd;
__u32 gsi;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 05ce447..0fd200c 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -161,8 +161,8 @@ irqfd_ptable_queue_proc(struct file *file, 
wait_queue_head_t *wqh,
add_wait_queue(wqh, irqfd-wait);
 }
 
-int
-kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
+static int
+kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
 {
struct _irqfd *irqfd;
struct file *file = NULL;
@@ -241,6 +241,48 @@ kvm_irqfd_init(struct kvm *kvm)
 }
 
 /*
+ * shutdown any irqfd's that match fd+gsi
+ */
+static int
+kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
+{
+   struct _irqfd *irqfd, *tmp;
+   struct eventfd_ctx *eventfd;
+
+   eventfd = eventfd_ctx_fdget(fd);
+   if (IS_ERR(eventfd))
+   return PTR_ERR(eventfd);
+
+   spin_lock_irq(kvm-irqfds.lock);
+
+   list_for_each_entry_safe(irqfd, tmp, kvm-irqfds.items, list) {
+   if (irqfd-eventfd == eventfd  irqfd-gsi == gsi)
+   irqfd_deactivate(irqfd);
+   }
+
+   spin_unlock_irq(kvm-irqfds.lock);
+   eventfd_ctx_put(eventfd);
+
+   /*
+* Block until we know all outstanding shutdown jobs have completed
+* so that we guarantee there will not be any more interrupts on this
+* gsi once this deassign function returns.
+*/
+   flush_workqueue(irqfd_cleanup_wq);
+
+   return 0;
+}
+
+int
+kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
+{
+   if (flags  KVM_IRQFD_FLAG_DEASSIGN)
+   return kvm_irqfd_deassign(kvm, fd, gsi);
+
+   return kvm_irqfd_assign(kvm, fd, gsi);
+}
+
+/*
  * This function is called as the kvm VM fd is being released. Shutdown all
  * irqfds that still remain open
  */

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[KVM PATCH v8 3/3] KVM: create irqfd-cleanup-wq on demand

2009-07-01 Thread Gregory Haskins

We currently create this wq on module_init, which may be wasteful if the
host never creates a guest that uses irqfd.  This patch changes the
algorithm so that the workqueue is only created when at least one guest
is using irqfd.  The queue is cleaned up when the last guest using irqfd
is shutdown.

To keep things simple, we only check whether the guest has tried to create
an irqfd, not whether there are actually irqfds active.

Signed-off-by: Gregory Haskins ghask...@novell.com
---

 include/linux/kvm_host.h |1 
 virt/kvm/eventfd.c   |  100 ++
 2 files changed, 75 insertions(+), 26 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7605bc4..0b0b6ac 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -144,6 +144,7 @@ struct kvm {
struct {
spinlock_tlock;
struct list_head  items;
+   int   init:1;
} irqfds;
 #endif
struct kvm_vm_stat stat;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 0fd200c..87f615b 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -49,7 +49,16 @@ struct _irqfd {
struct work_structshutdown;
 };
 
-static struct workqueue_struct *irqfd_cleanup_wq;
+struct _irqfd_cleanup {
+   struct mutex lock;
+   int  refs;
+   struct workqueue_struct *wq;
+};
+
+static struct _irqfd_cleanup irqfd_cleanup = {
+   .lock = __MUTEX_INITIALIZER(irqfd_cleanup.lock),
+   .refs = 0,
+};
 
 static void
 irqfd_inject(struct work_struct *work)
@@ -110,7 +119,7 @@ irqfd_deactivate(struct _irqfd *irqfd)
 
list_del_init(irqfd-list);
 
-   queue_work(irqfd_cleanup_wq, irqfd-shutdown);
+   queue_work(irqfd_cleanup.wq, irqfd-shutdown);
 }
 
 /*
@@ -161,6 +170,62 @@ irqfd_ptable_queue_proc(struct file *file, 
wait_queue_head_t *wqh,
add_wait_queue(wqh, irqfd-wait);
 }
 
+/*
+ * create a host-wide workqueue for issuing deferred shutdown requests
+ * aggregated from all vm* instances. We need our own isolated single-thread
+ * queue to prevent deadlock against flushing the normal work-queue.
+ */
+static int
+irqfd_cleanup_init(struct kvm *kvm)
+{
+   int ret = 0;
+
+   mutex_lock(irqfd_cleanup.lock);
+
+   /*
+* Check the current init state from within the lock so that we
+* sync all users to the thread creation.
+*/
+   if (kvm-irqfds.init)
+   goto out;
+
+   if (!irqfd_cleanup.refs) {
+   struct workqueue_struct *wq;
+
+   wq = create_singlethread_workqueue(kvm-irqfd-cleanup);
+   if (!wq) {
+   ret = -ENOMEM;
+   goto out;
+   }
+
+   irqfd_cleanup.wq = wq;
+   }
+
+   irqfd_cleanup.refs++;
+   kvm-irqfds.init = true;
+
+out:
+   mutex_unlock(irqfd_cleanup.lock);
+
+   return ret;
+}
+
+static void
+irqfd_cleanup_release(struct kvm *kvm)
+{
+   if (!kvm-irqfds.init)
+   return;
+
+   mutex_lock(irqfd_cleanup.lock);
+
+   if (!(--irqfd_cleanup.refs))
+   destroy_workqueue(irqfd_cleanup.wq);
+
+   mutex_unlock(irqfd_cleanup.lock);
+
+   kvm-irqfds.init = false;
+}
+
 static int
 kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
 {
@@ -170,6 +235,10 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
int ret;
unsigned int events;
 
+   ret = irqfd_cleanup_init(kvm);
+   if (ret  0)
+   return ret;
+
irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
if (!irqfd)
return -ENOMEM;
@@ -268,7 +337,7 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
 * so that we guarantee there will not be any more interrupts on this
 * gsi once this deassign function returns.
 */
-   flush_workqueue(irqfd_cleanup_wq);
+   flush_workqueue(irqfd_cleanup.wq);
 
return 0;
 }
@@ -302,28 +371,7 @@ kvm_irqfd_release(struct kvm *kvm)
 * Block until we know all outstanding shutdown jobs have completed
 * since we do not take a kvm* reference.
 */
-   flush_workqueue(irqfd_cleanup_wq);
-
-}
-
-/*
- * create a host-wide workqueue for issuing deferred shutdown requests
- * aggregated from all vm* instances. We need our own isolated single-thread
- * queue to prevent deadlock against flushing the normal work-queue.
- */
-static int __init irqfd_module_init(void)
-{
-   irqfd_cleanup_wq = create_singlethread_workqueue(kvm-irqfd-cleanup);
-   if (!irqfd_cleanup_wq)
-   return -ENOMEM;
-
-   return 0;
-}
+   flush_workqueue(irqfd_cleanup.wq);
+   irqfd_cleanup_release(kvm);
 
-static void __exit irqfd_module_exit(void)
-{
-   destroy_workqueue(irqfd_cleanup_wq);
 }
-
-module_init(irqfd_module_init);
-module_exit(irqfd_module_exit);

--
To unsubscribe from this list:

Re: [Autotest] [KVM-AUTOTEST PATCH] Adding iperf test

2009-07-01 Thread Martin Bligh

On Wed, Jul 1, 2009 at 8:57 AM, Lucas Meneghel Rodriguesl...@redhat.com wrote:
 On Wed, 2009-07-01 at 14:43 +0300, Alexey Eremenko wrote:
 LMR: me too, hate putting binaries in source tree, but the alternative
 option is to provide separate *.tar.bz2 for all the binary utils, and
 I don't sure which way is better.

 Yes, I don't have a clear idea as well. It's currently under
 discussion...

Is KVM x86_64 only?
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] Fix up device assignment hotplug and re-enable it

2009-07-01 Thread Markus Armbruster

Disabled in merge commit a3b371477e3.

Signed-off-by: Markus Armbruster arm...@redhat.com
---
 hw/device-assignment.c |   11 +--
 hw/pci.c   |2 +-
 hw/pci.h   |1 +
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index e282498..88c3baf 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -1126,9 +1126,8 @@ static int assigned_dev_register_msix_mmio(AssignedDevice 
*dev)
 struct PCIDevice *init_assigned_device(AssignedDevInfo *adev,
const char *devaddr)
 {
-printf(init_assigned_device: fix me please\n);
-return NULL;
-#if 0
+PCIBus *bus;
+int devfn;
 int r;
 AssignedDevice *dev;
 PCIDevice *pci_dev;
@@ -1138,8 +1137,9 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo 
*adev,
 DEBUG(Registering real physical device %s (bus=%x dev=%x func=%x)\n,
   adev-name, adev-bus, adev-dev, adev-func);
 
+bus = pci_get_bus_devfn(devfn, devaddr);
 pci_dev = pci_register_device(bus, adev-name,
-  sizeof(AssignedDevice), -1, assigned_dev_pci_read_config,
+  sizeof(AssignedDevice), devfn, assigned_dev_pci_read_config,
   assigned_dev_pci_write_config);
 dev = container_of(pci_dev, AssignedDevice, dev);
 
@@ -1203,7 +1203,6 @@ assigned_out:
 out:
 free_assigned_device(adev);
 return NULL;
-#endif
 }
 
 /*
@@ -1268,7 +1267,7 @@ void add_assigned_devices(PCIBus *bus, const char 
**devices, int n_devices)
 exit(1);
 }
 
-if (!init_assigned_device(adev, bus)) {
+if (!init_assigned_device(adev, NULL)) {
 fprintf(stderr, Failed to initialize assigned device %s\n,
 devices[i]);
 exit(1);
diff --git a/hw/pci.c b/hw/pci.c
index 618582a..0107096 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -290,7 +290,7 @@ int pci_read_devaddr(const char *addr, int *domp, int 
*busp, unsigned *slotp)
 return pci_parse_devaddr(devaddr, domp, busp, slotp);
 }
 
-static PCIBus *pci_get_bus_devfn(int *devfnp, const char *devaddr)
+PCIBus *pci_get_bus_devfn(int *devfnp, const char *devaddr)
 {
 int dom, bus;
 unsigned slot;
diff --git a/hw/pci.h b/hw/pci.h
index 7d8df83..a2c3a23 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -239,6 +239,7 @@ int pci_read_devaddr(const char *addr, int *domp, int 
*busp, unsigned *slotp);
 
 int pci_parse_host_devaddr(const char *addr, int *busp,
int *slotp, int *funcp);
+PCIBus *pci_get_bus_devfn(int *devfnp, const char *devaddr);
 
 void pci_info(Monitor *mon);
 PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
-- 
1.6.2.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: VM config file

2009-07-01 Thread Charles Duffy


Zhang Qian wrote:

I'd like to know if there is a configuration file for each KVM virtual
machine? I can define a domain by virsh define command, but where
the configuraiton parameters are persisted for the VM?


virsh is not a part of kvm, but rather libvirt; you should ask for 
support for it on the libvirt mailing list.


That said -- you can list defined VMs through virsh list --all, edit 
them through virsh edit, and dump their XML via virsh dumpxml. The 
config files are kept in /etc/libvirt/qemu, but you SHOULD NOT edit them 
directly -- just like /etc/sudoers should be edited only through visudo, 
/etc/libvirt/qemu/* should be edited only through libvirt methods (which 
can be accessed through the virsh command); this makes sure libvirtd is 
always aware of any changes, and prevents invalid configuration from 
ever being written to disk.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] Fix up ipf.c for recent merges

2009-07-01 Thread Markus Armbruster

Commit 42f0a928 merged support for addr=... in option argument of -net
nic, but failed to update ipf_init1().

Commit 7a8f3ed9 merged support for addr=... in option argument of
-drive if=virtio, but failed to update ipf_init1().

Untested.

Signed-off-by: Markus Armbruster arm...@redhat.com
---
 hw/ipf.c |7 +--
 1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/hw/ipf.c b/hw/ipf.c
index 8aec258..04b7b2c 100644
--- a/hw/ipf.c
+++ b/hw/ipf.c
@@ -384,6 +384,7 @@ static void ipf_init1(ram_addr_t ram_size,
 ram_addr_t ram_addr;
 ram_addr_t above_4g_mem_size = 0;
 PCIBus *pci_bus;
+PCIDevice *pci_dev;
 int piix3_devfn = -1;
 CPUState *env;
 qemu_irq *cpu_irq;
@@ -543,7 +544,7 @@ static void ipf_init1(ram_addr_t ram_size,
 if (!pci_enabled || (nd-model  strcmp(nd-model, ne2k_isa) == 0))
 pc_init_ne2k_isa(nd, i8259);
 else
-pci_nic_init(pci_bus, nd, -1, e1000);
+pci_nic_init(nd, e1000, NULL);
 }
 
 #undef USE_HYPERCALL  //Disable it now, need to implement later!
@@ -628,7 +629,9 @@ static void ipf_init1(ram_addr_t ram_size,
int unit_id = 0;
 
while ((index = drive_get_index(IF_VIRTIO, 0, unit_id)) != -1) {
-pci_create_simple(pci_bus, -1, virtio-blk-pci);
+pci_dev = pci_create(virtio-blk-pci,
+ drives_table[index].devaddr);
+qdev_init(pci_dev-qdev);
unit_id++;
}
 }
-- 
1.6.2.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/3 v3] Add X2APIC support.

2009-07-01 Thread Gleb Natapov

Add x2apic string to extended features name array to be recognizable
by -cpu cputype,+x2apic command line option. If kvm kernel module does
not support x2apic the option will be trimmed from cpuid.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 target-i386/helper.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/target-i386/helper.c b/target-i386/helper.c
index d76c224..87c04e5 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -45,7 +45,7 @@ static const char *feature_name[] = {
 static const char *ext_feature_name[] = {
 pni /* Intel,AMD sse3 */, NULL, NULL, monitor, ds_cpl, vmx, NULL 
/* Linux smx */, est,
 tm2, ssse3, cid, NULL, NULL, cx16, xtpr, NULL,
-NULL, NULL, dca, NULL, NULL, NULL, NULL, popcnt,
+NULL, NULL, dca, NULL, NULL, x2apic, NULL, popcnt,
 NULL, NULL, NULL, NULL, NULL, NULL, NULL, hypervisor,
 };
 static const char *ext2_feature_name[] = {
-- 
1.6.2.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/3 v3] x2APIC emulation for kvm

2009-07-01 Thread Gleb Natapov

This patch series implements x2APIC emulation for kvm. x2APIC is an MSR
interface to a local apic with performance/scalability enhancements. It
brings 32 bit apic ids (ids  255 cannot be used without interrupt
remapping since MSR/IOAPIC still support 8 bit destination IDs), 64bit
ICR access, reading of ICR after IPI is no longer required.

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/3 v3] Add Directed EOI support to APIC emulation

2009-07-01 Thread Gleb Natapov

Directed EOI is specified by x2APIC, but is available even when lapic is
in xAPIC mode.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 arch/x86/include/asm/apicdef.h |2 ++
 arch/x86/kvm/lapic.c   |   38 ++
 arch/x86/kvm/lapic.h   |1 +
 arch/x86/kvm/x86.c |4 ++--
 arch/x86/kvm/x86.h |4 
 5 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 7ddb36a..74ca38f 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -14,6 +14,7 @@
 
 #defineAPIC_LVR0x30
 #defineAPIC_LVR_MASK   0xFF00FF
+#defineAPIC_LVR_DIRECTED_EOI   (1  24)
 #defineGET_APIC_VERSION(x) ((x)  0xFFu)
 #defineGET_APIC_MAXLVT(x)  (((x)  16)  0xFFu)
 #ifdef CONFIG_X86_32
@@ -40,6 +41,7 @@
 #defineAPIC_DFR_CLUSTER0x0FFFul
 #defineAPIC_DFR_FLAT   0xul
 #defineAPIC_SPIV   0xF0
+#defineAPIC_SPIV_DIRECTED_EOI  (1  12)
 #defineAPIC_SPIV_FOCUS_DISABLED(1  9)
 #defineAPIC_SPIV_APIC_ENABLED  (1  8)
 #defineAPIC_ISR0x100
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2e02865..20c2366 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -35,6 +35,7 @@
 #include kvm_cache_regs.h
 #include irq.h
 #include trace.h
+#include x86.h
 
 #ifndef CONFIG_X86_64
 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
@@ -142,6 +143,21 @@ static inline int apic_lvt_nmi_mode(u32 lvt_val)
return (lvt_val  (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
 }
 
+void kvm_apic_set_version(struct kvm_vcpu *vcpu)
+{
+   struct kvm_lapic *apic = vcpu-arch.apic;
+   struct kvm_cpuid_entry2 *feat;
+   u32 v = APIC_VERSION;
+
+   if (!irqchip_in_kernel(vcpu-kvm))
+   return;
+
+   feat = kvm_find_cpuid_entry(apic-vcpu, 0x1, 0);
+   if (feat  (feat-ecx  (1  (X86_FEATURE_X2APIC  31
+   v |= APIC_LVR_DIRECTED_EOI;
+   apic_set_reg(apic, APIC_LVR, v);
+}
+
 static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */
LVT_MASK | APIC_MODE_MASK,  /* LVTTHMR */
@@ -442,9 +458,11 @@ static void apic_set_eoi(struct kvm_lapic *apic)
trigger_mode = IOAPIC_LEVEL_TRIG;
else
trigger_mode = IOAPIC_EDGE_TRIG;
-   mutex_lock(apic-vcpu-kvm-irq_lock);
-   kvm_ioapic_update_eoi(apic-vcpu-kvm, vector, trigger_mode);
-   mutex_unlock(apic-vcpu-kvm-irq_lock);
+   if (!(apic_get_reg(apic, APIC_SPIV)  APIC_SPIV_DIRECTED_EOI)) {
+   mutex_lock(apic-vcpu-kvm-irq_lock);
+   kvm_ioapic_update_eoi(apic-vcpu-kvm, vector, trigger_mode);
+   mutex_unlock(apic-vcpu-kvm-irq_lock);
+   }
 }
 
 static void apic_send_ipi(struct kvm_lapic *apic)
@@ -682,8 +700,11 @@ static void apic_mmio_write(struct kvm_io_device *this,
apic_set_reg(apic, APIC_DFR, val | 0x0FFF);
break;
 
-   case APIC_SPIV:
-   apic_set_reg(apic, APIC_SPIV, val  0x3ff);
+   case APIC_SPIV: {
+   u32 mask = 0x3ff;
+   if (apic_get_reg(apic, APIC_LVR)  APIC_LVR_DIRECTED_EOI)
+   mask |= APIC_SPIV_DIRECTED_EOI;
+   apic_set_reg(apic, APIC_SPIV, val  mask);
if (!(val  APIC_SPIV_APIC_ENABLED)) {
int i;
u32 lvt_val;
@@ -698,7 +719,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
}
break;
-
+   }
case APIC_ICR:
/* No delay here, so we always clear the pending bit */
apic_set_reg(apic, APIC_ICR, val  ~(1  12));
@@ -840,7 +861,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
hrtimer_cancel(apic-lapic_timer.timer);
 
apic_set_reg(apic, APIC_ID, vcpu-vcpu_id  24);
-   apic_set_reg(apic, APIC_LVR, APIC_VERSION);
+   kvm_apic_set_version(apic-vcpu);
 
for (i = 0; i  APIC_LVT_NUM; i++)
apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
@@ -1045,7 +1066,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 
apic-base_address = vcpu-arch.apic_base 
 MSR_IA32_APICBASE_BASE;
-   apic_set_reg(apic, APIC_LVR, APIC_VERSION);
+   kvm_apic_set_version(vcpu);
+
apic_update_ppr(apic);
hrtimer_cancel(apic-lapic_timer.timer);
update_divide_count(apic);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 3f3ecc6..bc1c524 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -29,6 +29,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
 void

[PATCH 2/3 v3] x2apic interface to lapic

2009-07-01 Thread Gleb Natapov

This patch implements MSR interface to local apic as defines by x2apic
Intel specification.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 arch/x86/kvm/lapic.c |  193 ++
 arch/x86/kvm/lapic.h |2 +
 arch/x86/kvm/x86.c   |7 ++-
 3 files changed, 154 insertions(+), 48 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 20c2366..52ab8c7 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -32,6 +32,7 @@
 #include asm/current.h
 #include asm/apicdef.h
 #include asm/atomic.h
+#include asm/apicdef.h
 #include kvm_cache_regs.h
 #include irq.h
 #include trace.h
@@ -158,6 +159,11 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
apic_set_reg(apic, APIC_LVR, v);
 }
 
+static inline int apic_x2apic_mode(struct kvm_lapic *apic)
+{
+   return apic-vcpu-arch.apic_base  X2APIC_ENABLE;
+}
+
 static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */
LVT_MASK | APIC_MODE_MASK,  /* LVTTHMR */
@@ -284,7 +290,12 @@ int kvm_apic_match_physical_addr(struct kvm_lapic *apic, 
u16 dest)
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
 {
int result = 0;
-   u8 logical_id;
+   u32 logical_id;
+
+   if (apic_x2apic_mode(apic)) {
+   logical_id = apic_get_reg(apic, APIC_LDR);
+   return logical_id  mda;
+   }
 
logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR));
 
@@ -477,7 +488,10 @@ static void apic_send_ipi(struct kvm_lapic *apic)
irq.level = icr_low  APIC_INT_ASSERT;
irq.trig_mode = icr_low  APIC_INT_LEVELTRIG;
irq.shorthand = icr_low  APIC_SHORT_MASK;
-   irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
+   if (apic_x2apic_mode(apic))
+   irq.dest_id = icr_high;
+   else
+   irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
 
apic_debug(icr_high 0x%x, icr_low 0x%x, 
   short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, 
@@ -538,6 +552,9 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int 
offset)
return 0;
 
switch (offset) {
+   case APIC_ID:
+   apic_get_reg(apic, offset);
+   break;
case APIC_ARBPRI:
printk(KERN_WARNING Access APIC ARBPRI register 
   which is for P6\n);
@@ -564,19 +581,26 @@ static inline struct kvm_lapic *to_lapic(struct 
kvm_io_device *dev)
return container_of(dev, struct kvm_lapic, dev);
 }
 
-static void apic_mmio_read(struct kvm_io_device *this,
-  gpa_t address, int len, void *data)
+static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
+   void *data)
 {
-   struct kvm_lapic *apic = to_lapic(this);
-   unsigned int offset = address - apic-base_address;
unsigned char alignment = offset  0xf;
u32 result;
+   /* this bitmask has a bit cleared for each reserver register */
+   static const u64 rmask = 0x43ff01ffe70cULL;
 
if ((alignment + len)  4) {
-   printk(KERN_ERR KVM_APIC_READ: alignment error %lx %d,
-  (unsigned long)address, len);
-   return;
+   printk(KERN_ERR KVM_APIC_READ: alignment error %x %d\n,
+   offset, len);
+   return 1;
}
+
+   if (offset  0x3f0 || !(rmask  (1ULL  (offset  4 {
+   printk(KERN_ERR KVM_APIC_READ: read reserved register %x\n,
+   offset);
+   return 1;
+   }
+
result = __apic_read(apic, offset  ~0xf);
 
trace_kvm_apic_read(offset, result);
@@ -592,6 +616,16 @@ static void apic_mmio_read(struct kvm_io_device *this,
   should be 1,2, or 4 instead\n, len);
break;
}
+   return 0;
+}
+
+static void apic_mmio_read(struct kvm_io_device *this,
+  gpa_t address, int len, void *data)
+{
+   struct kvm_lapic *apic = to_lapic(this);
+   u32 offset = address - apic-base_address;
+
+   apic_reg_read(apic, offset, len, data);
 }
 
 static void update_divide_count(struct kvm_lapic *apic)
@@ -647,40 +681,18 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic 
*apic, u32 lvt0_val)
apic-vcpu-kvm-arch.vapics_in_nmi_mode--;
 }
 
-static void apic_mmio_write(struct kvm_io_device *this,
-   gpa_t address, int len, const void *data)
+static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 {
-   struct kvm_lapic *apic = to_lapic(this);
-   unsigned int offset = address - apic-base_address;
-   unsigned char alignment = offset  0xf;
-   u32 val;
-
-   /*
-* APIC register must be aligned on 128-bits boundary.
-* 32/64/128 bits registers must be accessed thru 32 bits.
-* Refer SDM 8.4.1
-

Re: [Autotest] [KVM-AUTOTEST PATCH] Adding iperf test

2009-07-01 Thread Martin Bligh

 LMR: me too, hate putting binaries in source tree, but the alternative
 option is to provide separate *.tar.bz2 for all the binary utils, and
 I don't sure which way is better.


 Yes, I don't have a clear idea as well. It's currently under
 discussion...


 Is KVM x86_64 only?


 It's x86-64, i386, ia64, s390, and powerpc 44x/e500 only.

OK, then it's difficult to see using binaries? Can we not
compile these on the system at use time (see the client/deps
directory for other stuff we do this for)

M.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Qemu-devel] [PATCH] Warn if a qcow (not qcow2) file is opened

2009-07-01 Thread Andreas Färber



Am 30.06.2009 um 15:32 schrieb Anthony Liguori:


Kevin Wolf wrote:

Avi Kivity schrieb:

The qcow block driver format is no longer maintained and likely  
contains
serious data corruptors.  Urge users to stay away for it, and  
advertise

the new and improved replacement.

Signed-off-by: Avi Kivity a...@redhat.com


vvfat is using qcow internally, so the warning will appear there,  
too.

Not that warning against vvfat would be a bad thing, but this error
message could be confusing.

Maybe we're lucky enough and vvfat survives a s/qcow/qcow2/, but I
really never wanted to touch that code...


I'm not sure how I feel about this.  Can we prove qcow is broken?   
Is it only broken for writes and not reads?


If we're printing a warning, does that mean we want to deprecate  
qcow and eventually remove it (or remove write support, at least)?


I'm confused now. Only recently someone stepped up, saying that qcow2  
was broken and that qcow should be used instead for safety reasons.  
Now all of a sudden, it's the exact opposite, you're even considering  
replacing qcow with qcow2 for vvfat and dropping qcow support.


Andreas

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: ESX on KVM requirements

2009-07-01 Thread Ben Sanders

Finally got it to work on a 32 bit OS (Ubuntu 9.04), both on the
phenom 9950 and another machine.  I haven't tried running any guests
yet.

I suppose the TSC patch doesn't work on 64 bit hosts.

Thanks for all your help,

Ben

On Fri, Jun 26, 2009 at 1:38 AM, Alexander Grafag...@suse.de wrote:

 Well, your machine can do npt, right?

 Any ideas?

 Hm. ESX breaks because of

 ASSERT
 /build/mts/release/bora-123630/bora/vmkernel/sched/cpusched_alloc.c:3399

 which is just after the TSC check. So I'm pretty sure it's the Make vmport
 report the processor     speed patch that should make your scenario work.

 I can't really tell why it doesn't work for you.

 Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] Revert KVM: x86: check for cr3 validity in ioctl_set_sregs

2009-07-01 Thread Jan Kiszka

This reverts commit 6c20e1442bb1c62914bb85b7f4a38973d2a423ba.

To my understanding, it became obsolete with the advent of the more
robust check in mmu_alloc_roots (89da4ff17f). Moreover, it prevents
the conceptually safe pattern

 1. set sregs
 2. register mem-slots
 3. run vcpu

by setting a sticky triple fault during step 1.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---

 arch/x86/kvm/x86.c |8 +---
 1 files changed, 1 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index af53f64..8c5f7ef 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4355,13 +4355,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
vcpu-arch.cr2 = sregs-cr2;
mmu_reset_needed |= vcpu-arch.cr3 != sregs-cr3;
-
-   down_read(vcpu-kvm-slots_lock);
-   if (gfn_to_memslot(vcpu-kvm, sregs-cr3  PAGE_SHIFT))
-   vcpu-arch.cr3 = sregs-cr3;
-   else
-   set_bit(KVM_REQ_TRIPLE_FAULT, vcpu-requests);
-   up_read(vcpu-kvm-slots_lock);
+   vcpu-arch.cr3 = sregs-cr3;
 
kvm_set_cr8(vcpu, sregs-cr8);
 



signature.asc
Description: OpenPGP digital signature

Re: [KVM PATCH v8 1/3] KVM: Fix races in irqfd using new eventfd_kref_get interface

2009-07-01 Thread Gregory Haskins

Gregory Haskins wrote:
  
 + eventfd = eventfd_ctx_fileget(file);
 + if (IS_ERR(file)) {
 + ret = PTR_ERR(file);
 + goto fail;
 + }
 +
 + irqfd-eventfd = eventfd;
 +
   

sigh

Just noticed the typo (return eventfd but error-check file).  Looks
like I will need at least a v9.  Is there any other feedback before I
push out the fix for v8?

-Greg



signature.asc
Description: OpenPGP digital signature

Re: [Autotest] [KVM-AUTOTEST PATCH] Adding iperf test

2009-07-01 Thread Lucas Meneghel Rodrigues

On Wed, 2009-07-01 at 10:16 -0700, Martin Bligh wrote:
  LMR: me too, hate putting binaries in source tree, but the alternative
  option is to provide separate *.tar.bz2 for all the binary utils, and
  I don't sure which way is better.
 
 
  Yes, I don't have a clear idea as well. It's currently under
  discussion...
 
 
  Is KVM x86_64 only?
 
 
  It's x86-64, i386, ia64, s390, and powerpc 44x/e500 only.
 
 OK, then it's difficult to see using binaries? Can we not
 compile these on the system at use time (see the client/deps
 directory for other stuff we do this for)

Biggest trouble is compiling the test under windows hosts. We are
figuring out a way to work around this problem.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v5] enable x2APIC without interrupt remapping under KVM

2009-07-01 Thread Suresh Siddha

On Wed, 2009-07-01 at 06:30 -0700, Gleb Natapov wrote:
 KVM would like to provide x2APIC interface to a guest without emulating
 interrupt remapping device. The reason KVM prefers guest to use x2APIC
 is that x2APIC interface is better virtualizable and provides better
 performance than mmio xAPIC interface:
 
 - msr exits are faster than mmio (no page table walk, emulation)
 - no need to read back ICR to look at the busy bit
 - one 64 bit ICR write instead of two 32 bit writes
 - shared code with the Hyper-V paravirt interface
 
 Included patch changes x2APIC enabling logic to enable it even if IR
 initialization failed, but kernel runs under KVM and no apic id is
 greater than 255 (if there is one spec requires BIOS to move to x2apic
 mode before starting an OS).
 
 Signed-off-by: Gleb Natapov g...@redhat.com

Acked-by: Suresh Siddha suresh.b.sid...@intel.com

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: slow guest performance with build load, looking for ideas

2009-07-01 Thread Erik Jacobson

I wanted to post in to the thread the lastest test run.

Avi Kivity provided some ideas to try.  I had mixed luck.  I'd like to try
this again if we have any thoughts on the vpid/ept issue, or any other
ideas for drilling down on this.  Avi Kivity mentioned LVM in the thread.
I continued to just export the whole /dev/sdb to the guest. I'm happy to
try LVM in some form if we think it would help?

As indicated, I still had trouble locating information about ept and vpid
(see below).  Several Fedora11 packages were updated in both host and guest 
since the last run, so we're at current F11+updates.  I don't know enough
about some of these kvm settings to do much beyond what I'm told to try.

System hardware:
 * Same machines as used before, extensive system detail posted earlier in
   the thread.
 * Same Nehalem based XE270 system as before
 * Hyperthreading disabled
 * System was the same as before.  Host has 8 cores, 2 sockets, and is
   Nehalem.  (Intel(R) Xeon(R) CPU X5570  @ 2.93GHz)
 * root and workarea disks are nothing special no LVM used.
 * 8gb host memory

System Settings:
 * chkconfig ntpd off
 * service ntpd stop
 * $ cat /sys/devices/system/clocksource/clocksource0/current_clocksource
kvm-clock
 * ensured kvm_stat was available on the host
 * I could NOT find vpid and ept parameters on the host.  They weren't here:
   /sys/module/kvm_intel/parameters
   nor here
   /sys/module/kvm/parameters
   So the check for those parameters resulted in no information.
   Didn't see them elsewhere either:
   # pwd
   /sys
   # find . -name vpid -print
   # find . -name ept -print

 * Version information:
   kernel host and guest: 2.6.29.5-191.fc11.x86_64
   kvm: qemu-kvm-0.10.5-3.fc11.x86_64, qemu-system-x86-0.10.5-3.fc11.x86_64

 * Build area disk is the whole /dev/sdb drive exported to the guest.  I did 
   not use LVM.

 * Root is a raw disk image, pre-allocated

 * Host and guest are fedora11 with all current updates applied.

 * 8 cpu, 4gb memory exported to guest.

 * All disks exported virtio


I had done some stuff to set up the test including a build I didn't count.

GUEST time (make -j12  make -j12 modules), work area disk no cache param
--
kvm_stat output BEFORE running this test:

kvm statistics

 efer_reload 13   0
 exits 271450761142
 fpu_reload 1298729   0
 halt_exits 2152011 189
 halt_wakeup 494689 123
 host_state_reload  4998646 837
 hypercalls   0   0
 insn_emulation10165593 302
 insn_emulation_fail  0   0
 invlpg   0   0
 io_exits   2096834 643
 irq_exits  6469071   8
 irq_injections 4765189 190
 irq_window  279385   0
 largepages   0   0
 mmio_exits   0   0
 mmu_cache_miss   18670   0
 mmu_flooded  0   0
 mmu_pde_zapped   0   0
 mmu_pte_updated  0   0
 mmu_pte_write10440   0
 mmu_recycled 0   0


qemu-kvm command:
/usr/bin/qemu-kvm -M pc -m 4096 -smp 8 -name f11-test -uuid 
b7b4b7e4-9c07-22aa-0c95-d5c8a24176c5 -monitor pty -pidfile 
/var/run/libvirt/qemu//f11-test.pid -drive 
file=/var/lib/libvirt/images/f11-test.img,if=virtio,index=0,boot=on -drive 
file=/dev/sdb,if=virtio,index=1 -net nic,macaddr=54:52:00:46:48:0e,model=virtio 
-net user -serial pty -parallel none -usb -usbdevice tablet -vnc cct201:1 
-soundhw es1370 -redir tcp:::22

test run timing:
real12m36.165s
user27m28.976s
sys 8m32.245s


kvm_stat output after this test run
kvm statistics

 efer_reload 13   0
 exits 470979812003
 fpu_reload 2168308   0
 halt_exits 3378761 301
 halt_wakeup 707171 241
 host_state_reload  75459901538
 hypercalls   0   0
 insn_emulation17809066 462
 insn_emulation_fail  0   0
 invlpg   0   0
 io_exits   28012211232
 irq_exits 11959063   7
 irq_injections 8395980 304
 irq_window  531641   3
 largepages   0   0
 mmio_exits   0   0
 mmu_cache_miss   28419   0
 mmu_flooded  0   0
 mmu_pde_zapped   0   0
 mmu_pte_updated  0   0
 mmu_pte_write10440   0
 mmu_recycled  7193   0





GUEST time (make -j12  make -j12 modules), work area disk, cache=none
---
qemu-kvm command:
/usr/bin/qemu-kvm -M pc -m 4096 -smp 8 -name f11-test -uuid 
b7b4b7e4-9c07-22aa-0c95-d5c8a24176c5 -monitor pty -pidfile 
/var/run/libvirt/qemu//f11-test.pid -drive

Re: [PATCH v3] enable x2APIC without interrupt remapping under KVM

2009-07-01 Thread Suresh Siddha

On Tue, 2009-06-30 at 12:36 -0700, Eric W. Biederman wrote:
 Dropped irqs..  Driver hangs because it is waiting for an irq.  Hardware
 hangs because it is waiting for the cpu to process the irq.
 
 Potentially we get a level triggered irq that is never acked by
 the cpu that won't arm until the cpu send an ack, and we can't
 send an ack from another cpu.

Eric,

Among number of experiments you have tried in the past to fix this, have
you tried the experiment of explicitly clearing the remoteIRR by
changing the trigger mode to edge and then back to level.

Is there a problem with this?

We can send a spurious IPI (after the RTE migration) with the new vector
to another cpu and handler which services the level interrupt will check
if we saw the edge mode for a level interrupt and then the handler can
explicitly restore the level trigger and reset the remote IRR by mask
+edge and unmask+level.

We might have to work with some rough edges but do you recollect any
major issue with this approach..

thanks,
suresh

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] qemu-kvm: fix typo in configure

2009-07-01 Thread Dustin Kirkland

On Wed, Jul 1, 2009 at 4:27 AM, Michael S. Tsirkinm...@redhat.com wrote:
 Kill extra \). Also escape ! for clarity and bourne shell
 compatibility.

Agreed.  Looks like this one was introduced during the last merge from
qemu.  It bit me too.

:-Dustin

 Signed-off-by: Michael S. Tsirkin m...@redhat.com
 ---

 This patch fixes configure on next for me

  configure |    6 +++---
  1 files changed, 3 insertions(+), 3 deletions(-)

 diff --git a/configure b/configure
 index f76f511..b62e3d7 100755
 --- a/configure
 +++ b/configure
 @@ -2141,10 +2141,10 @@ configure_kvm() {
  }

  # Make sure the target and host cpus are compatible
 -if test ! \( $target_cpu = $cpu -o \
 +if test \! \( $target_cpu = $cpu -o \
   \( $target_cpu = ppcemb -a $cpu = ppc \) -o \
 -  \( $target_cpu = x86_64 -a $cpu = i386   \) -o \
 -  \( $target_cpu = i386   -a $cpu = x86_64 \) \) -o \
 +  \( $target_cpu = x86_64 -a $cpu = i386 \) -o \
 +  \( $target_cpu = i386   -a $cpu = x86_64 \) -o \
   \( $target_cpu = ia64   -a $cpu = ia64 \) \) ; then
   target_kvm=no
  fi
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: pci_stub and kvm

2009-07-01 Thread Yinghai Lu

On Wed, Jul 1, 2009 at 12:49 AM, Avi Kivitya...@redhat.com wrote:
 On 07/01/2009 07:18 AM, Yinghai Lu wrote:

 [ 1966.343286]
 [ 1966.343288] ===
 [ 1966.356756] [ INFO: possible circular locking dependency detected ]
 [ 1966.356759] 2.6.31-rc1-tip-00978-g99123e5-dirty #438
 [ 1966.356761] ---
 [ 1966.356764] events/0/387 is trying to acquire lock:
 [ 1966.356766]  (kvm-lock){+.+.+.}, at: [8100af27]
 kvm_assigned_dev_interrupt_work_handler+0x42/0x13a
 [ 1966.356786]
 [ 1966.356787] but task is already holding lock:
 [ 1966.356789]  (match-interrupt_work){+.+...}, at:
 [810986e9] worker_thread+0x175/0x2f6
 [ 1966.356797]
 [ 1966.356798] which lock already depends on the new lock.
 [ 1966.356799]
 [ 1966.356800]
 [ 1966.356801] the existing dependency chain (in reverse order) is:
 [ 1966.356803]
 [ 1966.356803] -  #1 (match-interrupt_work){+.+...}:
 [ 1966.356809]        [810b3bf6] __lock_acquire+0x1396/0x1710
 [ 1966.356817]        [810b403c] lock_acquire+0xcc/0x104
 [ 1966.356821]        [810994a8] __cancel_work_timer+0x121/0x247
 [ 1966.356825]        [8109962c] cancel_work_sync+0x23/0x39
 [ 1966.356828]        [8100b280] kvm_deassign_irq+0xf1/0x183
 [ 1966.356832]        [8100db6c] kvm_vm_ioctl+0x8c8/0xc1a
 [ 1966.356837]        [81156e56] vfs_ioctl+0x3e/0xa3
 [ 1966.356846]        [8115741c] do_vfs_ioctl+0x4be/0x511
 [ 1966.356850]        [811574c5] sys_ioctl+0x56/0x8d
 [ 1966.356854]        [81034fdb] system_call_fastpath+0x16/0x1b
 [ 1966.356860]        [] 0x
 [ 1966.356869]
 [ 1966.356870] -  #0 (kvm-lock){+.+.+.}:
 [ 1966.356872]        [810b392b] __lock_acquire+0x10cb/0x1710
 [ 1966.356875]        [810b403c] lock_acquire+0xcc/0x104
 [ 1966.356878]        [81cde487] mutex_lock_nested+0x75/0x2fa
 [ 1966.356886]        [8100af27]
 kvm_assigned_dev_interrupt_work_handler+0x42/0x13a
 [ 1966.356890]        [81098743] worker_thread+0x1cf/0x2f6
 [ 1966.356892]        [8109e335] kthread+0xa8/0xb0
 [ 1966.356899]        [8103609a] child_rip+0xa/0x20
 [ 1966.356906]        [] 0x


 This is already fixed in kvm.git.  I'm not sure about merging it to 2.6.30
 since the race is very rare and involves device assignment (which is not
 very mainstream), while the fix touches the core kvm parts.

tip/master merged with kvm/master doesn't have that warning.

YH
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3] enable x2APIC without interrupt remapping under KVM

2009-07-01 Thread Eric W. Biederman

Suresh Siddha suresh.b.sid...@intel.com writes:

 On Tue, 2009-06-30 at 12:36 -0700, Eric W. Biederman wrote:
 Dropped irqs..  Driver hangs because it is waiting for an irq.  Hardware
 hangs because it is waiting for the cpu to process the irq.
 
 Potentially we get a level triggered irq that is never acked by
 the cpu that won't arm until the cpu send an ack, and we can't
 send an ack from another cpu.

 Eric,

 Among number of experiments you have tried in the past to fix this, have
 you tried the experiment of explicitly clearing the remoteIRR by
 changing the trigger mode to edge and then back to level.

 Is there a problem with this?

The problem I had wasn't remoteIRR getting stuck, but the symptoms
were largely the same.  I did try changing the trigger mode to edge
and back and that did not unstick the ioapic in all cases.

 We can send a spurious IPI (after the RTE migration) with the new vector
 to another cpu and handler which services the level interrupt will check
 if we saw the edge mode for a level interrupt and then the handler can
 explicitly restore the level trigger and reset the remote IRR by mask
 +edge and unmask+level.

 We might have to work with some rough edges but do you recollect any
 major issue with this approach..

This is coming up enough recently I expect it is time to cook up
a patch that does the ioapic migration in process context plus
some user space code that stress tests things.  Just so people
can repeat my experiments and see what I am trying to avoid.

Eric
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 1/2] KVM/PPC: Fix PPC KVM e500_tlb.c build error

2009-07-01 Thread Liu Yu-B13201


This fix is already accepted in kvm.git 

 -Original Message-
 From: Yang Shi [mailto:yang@windriver.com] 
 Sent: Thursday, July 02, 2009 10:55 AM
 To: Liu Yu-B13201; holl...@us.ibm.com; a...@redhat.com
 Cc: kvm-...@vger.kernel.org; kvm@vger.kernel.org; 
 linuxppc-...@ozlabs.org
 Subject: [PATCH 1/2] KVM/PPC: Fix PPC KVM e500_tlb.c build error
 
 Since include/asm/mmu-fsl-booke.h was replaced by 
 include/asm/mmu-book3e.h,
 fix e500_tlb.h to reflect the change and fix e500_tlb.c to 
 align with the
 new page size macro definition in include/asm/mmu-book3e.h.
 
 Signed-off-by: Yang Shi yang@windriver.com
 ---
  arch/powerpc/kvm/e500_tlb.c |8 
  arch/powerpc/kvm/e500_tlb.h |2 +-
  2 files changed, 5 insertions(+), 5 deletions(-)
 
 diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
 index 0e773fc..616762b 100644
 --- a/arch/powerpc/kvm/e500_tlb.c
 +++ b/arch/powerpc/kvm/e500_tlb.c
 @@ -309,7 +309,7 @@ static inline void 
 kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
   vcpu_e500-shadow_pages[tlbsel][esel] = new_page;
  
   /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
 - stlbe-mas1 = MAS1_TSIZE(BOOKE_PAGESZ_4K)
 + stlbe-mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
   | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
   stlbe-mas2 = (gvaddr  MAS2_EPN)
   | e500_shadow_mas2_attrib(gtlbe-mas2,
 @@ -545,7 +545,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
   case 0:
   /* TLB0 */
   gtlbe-mas1 = ~MAS1_TSIZE(~0);
 - gtlbe-mas1 |= MAS1_TSIZE(BOOKE_PAGESZ_4K);
 + gtlbe-mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
  
   stlbsel = 0;
   sesel = 
 kvmppc_e500_stlbe_map(vcpu_e500, 0, esel);
 @@ -679,14 +679,14 @@ void kvmppc_e500_tlb_setup(struct 
 kvmppc_vcpu_e500 *vcpu_e500)
  
   /* Insert large initial mapping for guest. */
   tlbe = vcpu_e500-guest_tlb[1][0];
 - tlbe-mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_256M);
 + tlbe-mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
   tlbe-mas2 = 0;
   tlbe-mas3 = E500_TLB_SUPER_PERM_MASK;
   tlbe-mas7 = 0;
  
   /* 4K map for serial output. Used by kernel wrapper. */
   tlbe = vcpu_e500-guest_tlb[1][1];
 - tlbe-mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_4K);
 + tlbe-mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
   tlbe-mas2 = (0xe0004500  0xF000) | MAS2_I | MAS2_G;
   tlbe-mas3 = (0xe0004500  0xF000) | 
 E500_TLB_SUPER_PERM_MASK;
   tlbe-mas7 = 0;
 diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
 index 45b064b..abb1bf8 100644
 --- a/arch/powerpc/kvm/e500_tlb.h
 +++ b/arch/powerpc/kvm/e500_tlb.h
 @@ -16,7 +16,7 @@
  #define __KVM_E500_TLB_H__
  
  #include linux/kvm_host.h
 -#include asm/mmu-fsl-booke.h
 +#include asm/mmu-book3e.h
  #include asm/tlb.h
  #include asm/kvm_e500.h
  
 -- 
 1.6.0.4
 
 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/2] KVM/PPC: Fix kvm_main.c build error for PPC KVM

2009-07-01 Thread Yang Shi

With the latest kernel building KVM for PPC, got integer overflow
error in kvm_main.c file. The root cause is that compiler consider
KVM_PAGES_PER_HPAGE as signed long type, however it should be
unsigned long type.

So, change it to unsigned long type in include/asm/kvm_host.h

Signed-off-by: Yang Shi yang@windriver.com
---
 arch/powerpc/include/asm/kvm_host.h |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index dfdf13c..fddc3ed 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -34,7 +34,7 @@
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
 /* We don't currently support large pages. */
-#define KVM_PAGES_PER_HPAGE (131)
+#define KVM_PAGES_PER_HPAGE (1UL  31)
 
 struct kvm;
 struct kvm_run;
-- 
1.6.0.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/2] KVM/PPC: Fix PPC KVM e500_tlb.c build error

2009-07-01 Thread Yang Shi

Since include/asm/mmu-fsl-booke.h was replaced by include/asm/mmu-book3e.h,
fix e500_tlb.h to reflect the change and fix e500_tlb.c to align with the
new page size macro definition in include/asm/mmu-book3e.h.

Signed-off-by: Yang Shi yang@windriver.com
---
 arch/powerpc/kvm/e500_tlb.c |8 
 arch/powerpc/kvm/e500_tlb.h |2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 0e773fc..616762b 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -309,7 +309,7 @@ static inline void kvmppc_e500_shadow_map(struct 
kvmppc_vcpu_e500 *vcpu_e500,
vcpu_e500-shadow_pages[tlbsel][esel] = new_page;
 
/* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
-   stlbe-mas1 = MAS1_TSIZE(BOOKE_PAGESZ_4K)
+   stlbe-mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
stlbe-mas2 = (gvaddr  MAS2_EPN)
| e500_shadow_mas2_attrib(gtlbe-mas2,
@@ -545,7 +545,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
case 0:
/* TLB0 */
gtlbe-mas1 = ~MAS1_TSIZE(~0);
-   gtlbe-mas1 |= MAS1_TSIZE(BOOKE_PAGESZ_4K);
+   gtlbe-mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
 
stlbsel = 0;
sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel);
@@ -679,14 +679,14 @@ void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 
*vcpu_e500)
 
/* Insert large initial mapping for guest. */
tlbe = vcpu_e500-guest_tlb[1][0];
-   tlbe-mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_256M);
+   tlbe-mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
tlbe-mas2 = 0;
tlbe-mas3 = E500_TLB_SUPER_PERM_MASK;
tlbe-mas7 = 0;
 
/* 4K map for serial output. Used by kernel wrapper. */
tlbe = vcpu_e500-guest_tlb[1][1];
-   tlbe-mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_4K);
+   tlbe-mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
tlbe-mas2 = (0xe0004500  0xF000) | MAS2_I | MAS2_G;
tlbe-mas3 = (0xe0004500  0xF000) | E500_TLB_SUPER_PERM_MASK;
tlbe-mas7 = 0;
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
index 45b064b..abb1bf8 100644
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -16,7 +16,7 @@
 #define __KVM_E500_TLB_H__
 
 #include linux/kvm_host.h
-#include asm/mmu-fsl-booke.h
+#include asm/mmu-book3e.h
 #include asm/tlb.h
 #include asm/kvm_e500.h
 
-- 
1.6.0.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Autotest] [PATCH] Add a client-side test qemu_iotests

2009-07-01 Thread Martin Bligh

From: root r...@dhcp-66-70-57.nay.redhat.com


Signed-off-by: root r...@dhcp-66-70-57.nay.redhat.com
---


;-)
Can we get these signed off by a person please? Preferably with a real email
address (see the DCO, in top level directory)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Autotest] [PATCH] Add a client-side test qemu_iotests

2009-07-01 Thread Yolkfull Chow


On 07/02/2009 11:49 AM, Martin Bligh wrote:

From: rootr...@dhcp-66-70-57.nay.redhat.com


Signed-off-by: rootr...@dhcp-66-70-57.nay.redhat.com
---


;-)
Can we get these signed off by a person please? Preferably with a real email
address (see the DCO, in top level directory)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
   

Sure. Will re-send it now. :)

--
Yolkfull
Regards,

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [Autotest] [PATCH] Add a client-side test qemu_iotests

2009-07-01 Thread Yolkfull Chow


On 07/02/2009 12:24 PM, sudhir kumar wrote:

Please send the tarball as a separate attachment. Your email is scary.

OK, I will resend the patch. Sorry for this scary email. :)


--
Yolkfull
Regards,

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] rev3: support colon in filenames

2009-07-01 Thread Ram Pai

Problem: It is impossible to feed filenames with the character colon because
qemu interprets such names as a protocol. For example filename scsi:0, is
interpreted as a protocol by name scsi.

This patch allows user to escape colon characters. For example the above
filename can now be expressed either as 'scsi\:0' or as file:scsi:0

anything following the file: tag is interpreted verbatim. However if file:
tag is omitted then any colon characters in the string must be escaped using
backslash.

Here are couple of examples:

scsi\:0\:abc is a local file scsi:0:abc
http\://myweb is a local file by name http://myweb
file:scsi:0:abc is a local file scsi:0:abc
file:http://myweb is a local file by name http://myweb

fat:c:\path\to\dir\:floppy\:  is a fat file by name \path\to\dir:floppy:
NOTE:The above example cannot be expressed using the file: protocol.


Changelog w.r.t to iteration 0:
   1) removes flexibility added to nbd semantics  eg -- nbd:\::
   2) introduce the file: protocol to indicate local file

Changelog w.r.t to iteration 1:
   1) generically handles 'file:' protocol in find_protocol
   2) centralizes 'filename' pruning before the call to open().
   3) fixes buffer overflow seen in fill_token()
   4) adheres to coding style
   5) patch against upstream qemu tree

Changelog w.r.t to iteration 2:
   1) really really fixes buffer overflow seen in fill_token()
   2) the centralized 'filename' pruning had a side effect with
qcow2 files and other files. Fixed it. _open() is back.

Signed-off-by: Ram Pai linux...@us.ibm.com

 block.c   |   10 +
 block/raw-posix.c |   15 
 block/vvfat.c |  100 ++--
 cutils.c  |   40 +
 qemu-common.h |2 +
 5 files changed, 148 insertions(+), 19 deletions(-)

diff --git a/block.c b/block.c
index aca5a6d..7ad4dd9 100644
--- a/block.c
+++ b/block.c
@@ -225,7 +225,6 @@ static BlockDriver *find_protocol(const char *filename)
 {
 BlockDriver *drv1;
 char protocol[128];
-int len;
 const char *p;
 
 #ifdef _WIN32
@@ -233,14 +232,9 @@ static BlockDriver *find_protocol(const char *filename)
 is_windows_drive_prefix(filename))
 return bdrv_find_format(raw);
 #endif
-p = strchr(filename, ':');
-if (!p)
+p = prune_strcpy(protocol, 128, filename, ':');
+if (*p != ':')
 return bdrv_find_format(raw);
-len = p - filename;
-if (len  sizeof(protocol) - 1)
-len = sizeof(protocol) - 1;
-memcpy(protocol, filename, len);
-protocol[len] = '\0';
 for(drv1 = first_drv; drv1 != NULL; drv1 = drv1-next) {
 if (drv1-protocol_name 
 !strcmp(drv1-protocol_name, protocol))
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 41bfa37..8a0c0df 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -151,7 +151,7 @@ static int raw_open_common(BlockDriverState *bs, const char 
*filename,
 s-open_flags |= O_DSYNC;
 
 s-fd = -1;
-fd = open(filename, s-open_flags, 0644);
+fd = _open(filename, s-open_flags, 0644);
 if (fd  0) {
 ret = -errno;
 if (ret == -EROFS)
@@ -844,7 +844,7 @@ static int raw_create(const char *filename, 
QEMUOptionParameter *options)
 options++;
 }
 
-fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
+fd = _open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
   0644);
 if (fd  0)
 return -EIO;
@@ -889,6 +889,7 @@ static BlockDriver bdrv_raw = {
 .bdrv_getlength = raw_getlength,
 
 .create_options = raw_create_options,
+.protocol_name = file,
 };
 
 /***/
@@ -985,7 +986,7 @@ static int hdev_open(BlockDriverState *bs, const char 
*filename, int flags)
 if ( bsdPath[ 0 ] != '\0' ) {
 strcat(bsdPath,s0);
 /* some CDs don't have a partition 0 */
-fd = open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
+fd = _open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
 if (fd  0) {
 bsdPath[strlen(bsdPath)-1] = '1';
 } else {
@@ -1037,7 +1038,7 @@ static int fd_open(BlockDriverState *bs)
 #endif
 return -EIO;
 }
-s-fd = open(bs-filename, s-open_flags  ~O_NONBLOCK);
+s-fd = _open(bs-filename, s-open_flags  ~O_NONBLOCK);
 if (s-fd  0) {
 s-fd_error_time = qemu_get_clock(rt_clock);
 s-fd_got_error = 1;
@@ -1133,7 +1134,7 @@ static int hdev_create(const char *filename, 
QEMUOptionParameter *options)
 options++;
 }
 
-fd = open(filename, O_WRONLY | O_BINARY);
+fd = _open(filename, O_WRONLY | O_BINARY);
 if (fd  0)
 return -EIO;
 
@@ -1239,7 +1240,7 @@ static int floppy_eject(BlockDriverState *bs, int 
eject_flag)
 close(s-fd);
 s-fd = -1;
 }
-fd = open(bs-filename, s-open_flags | O_NONBLOCK);
+fd =

Re: slow guest performance with build load, looking for ideas

2009-07-01 Thread Avi Kivity


On 07/02/2009 12:41 AM, Erik Jacobson wrote:

I wanted to post in to the thread the lastest test run.

Avi Kivity provided some ideas to try.  I had mixed luck.  I'd like to try
this again if we have any thoughts on the vpid/ept issue, or any other
ideas for drilling down on this.  Avi Kivity mentioned LVM in the thread.
I continued to just export the whole /dev/sdb to the guest. I'm happy to
try LVM in some form if we think it would help?
   


Exporting an entire drive is even better than LVM (in terms of 
performance; flexibility obviously suffers).  Just make sure to use 
cache=none (which I see in your command line below).



  * I could NOT find vpid and ept parameters on the host.  They weren't here:
/sys/module/kvm_intel/parameters
nor here
/sys/module/kvm/parameters
So the check for those parameters resulted in no information.
Didn't see them elsewhere either:
# pwd
/sys
# find . -name vpid -print
# find . -name ept -print

   


Apparently the parameters were only exposed in 2.6.30.  Previously they 
were only available during modprobe.  Since you're using nehalem, let's 
assume they're set correctly (since that's the default).




I had done some stuff to set up the test including a build I didn't count.

GUEST time (make -j12   make -j12 modules), work area disk no cache param
--
kvm_stat output BEFORE running this test:

kvm statistics

  efer_reload 13   0
  exits 271450761142
  fpu_reload 1298729   0
  halt_exits 2152011 189
  halt_wakeup 494689 123
  host_state_reload 4998646 837
  hypercalls   0   0
  insn_emulation10165593 302
  insn_emulation_fail  0   0
  invlpg   0   0
  io_exits   2096834 643
  irq_exits  6469071   8
  irq_injections 4765189 190
  irq_window  279385   0
  largepages   0   0
  mmio_exits   0   0
  mmu_cache_miss   18670   0
  mmu_flooded  0   0
  mmu_pde_zapped   0   0
  mmu_pte_updated  0   0
  mmu_pte_write10440   0
  mmu_recycled 0   0
   


Nice and quiet.


qemu-kvm command:
/usr/bin/qemu-kvm -M pc -m 4096 -smp 8 -name f11-test -uuid 
b7b4b7e4-9c07-22aa-0c95-d5c8a24176c5 -monitor pty -pidfile 
/var/run/libvirt/qemu//f11-test.pid -drive 
file=/var/lib/libvirt/images/f11-test.img,if=virtio,index=0,boot=on -drive 
file=/dev/sdb,if=virtio,index=1 -net nic,macaddr=54:52:00:46:48:0e,model=virtio 
-net user -serial pty -parallel none -usb -usbdevice tablet -vnc cct201:1 
-soundhw es1370 -redir tcp:::22
   


-usbdevice tablet is known to cause large interrupt loads.  I suggest 
dropping it.  If it helps your vnc session, drop your vnc client and use 
vinagre instead.



test run timing:
real12m36.165s
user27m28.976s
sys 8m32.245s
   


12 minutes real vs 35 cpu minutes - scaling only 3:1 on smp 8.



kvm_stat output after this test run
kvm statistics

  efer_reload 13   0
  exits 470979812003
  fpu_reload 2168308   0
  halt_exits 3378761 301
  halt_wakeup 707171 241
  host_state_reload 75459901538
  hypercalls   0   0
  insn_emulation17809066 462
  insn_emulation_fail  0   0
  invlpg   0   0
  io_exits   28012211232
  irq_exits 11959063   7
  irq_injections 8395980 304
  irq_window  531641   3
  largepages   0   0
  mmio_exits   0   0
  mmu_cache_miss   28419   0
  mmu_flooded  0   0
  mmu_pde_zapped   0   0
  mmu_pte_updated  0   0
  mmu_pte_write10440   0
  mmu_recycled  7193   0

   


Nice and quiet too, but what's needed is kvm_stat (or kvm_stat -1) 
during the run.  Many of the 47M exists are unaccounted for, there's a 
lack in the stats gathering code.


vmstat 1 on host and guest during the run would also help.


HOST time (make -j12   make -j12 modules) with no guest running

real6m50.936s
user29m12.051s
sys 5m50.867s

   


35 minutes cpu run on 7 minutes real time, so scaling 1:7.  User time 
almost the same, system time different but not enough to account for the 
large difference in run time.


I'm due to get my own Nehalem next week, I'll try to reproduce your 
results here.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the

Re: Exception handling between QEMU and KVM

2009-07-01 Thread Alexander Graf



On 01.07.2009, at 05:28, Christoffer Dall wrote:


Hi all.

We are still working on our ARM port of KVM and we are managing to let
the guest decompress the kernel image and run from relocated address
and we can support enabling MMU before this.

Howver, to debug relocated micro-hypervisor for exception handling, we
are trying to implement some NOT_IMPLEMENTED() macros and ASSERT()
macros.

What we have done so far is simply to exit QEMU roughly after
returning -EINVAL from the KVM_RUN system call, but before we start
supporting an interrupt cycle we have to improve on this. Setting
kvm_run-exit_reason = KVM_EXIT_EXCEPTION or kvm_run-exit_reason =
KVM_EXIT_SHUTDOWN just results in QEMU looping in the cpu execution
loop.

Can someone point us in the direction of what we're missing here in
order to exit the whole QEMU process from an exception occurring
inside the guest?


I'm still having a hard time understanding what exactly you're trying  
to do.


If you need to have your machine quit, just set exit_reason to some  
value you want to use for that purpose and make it behave accordingly  
in target-arm/kvm.c.


But I really can't think of any scenarios where you'd need it.  
Shouldn't all exceptions be handled in the kernel module? What exactly  
are you trying to implement here?


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Exception handling between QEMU and KVM

2009-07-01 Thread Christoffer Dall

OK, my question is this:

If I want to tell QEMU to quit from within the KVM_RUN ioctl in a way
that causes QEMU to exit gracefully (e.g. free any allocated memory
etc.) what is the way to do that?

I have tried setting EXIT_REASON to both KVM_EXIT_EXCEPTION and
KVM_EXIT_SHUTDOWN, but QEMU stays in the execution loop and
re-executes the machine over and over.

Is this some logic that has to be done architecture specifically in
kvm_arch_post_run(...) or ?

And yes, any exceptions should be handled in KVM, but in the case of
an unrecoverable error I'm left with reporting this to the user and
end the QEMU process, right?

Thanks,
Christoffer

On Wed, Jul 1, 2009 at 5:00 AM, Avi Kivitya...@redhat.com wrote:
 On 07/01/2009 06:28 AM, Christoffer Dall wrote:

 Hi all.

 We are still working on our ARM port of KVM and we are managing to let
 the guest decompress the kernel image and run from relocated address
 and we can support enabling MMU before this.

 Howver, to debug relocated micro-hypervisor for exception handling, we
 are trying to implement some NOT_IMPLEMENTED() macros and ASSERT()
 macros.

 What we have done so far is simply to exit QEMU roughly after
 returning -EINVAL from the KVM_RUN system call, but before we start
 supporting an interrupt cycle we have to improve on this. Setting
 kvm_run-exit_reason = KVM_EXIT_EXCEPTION or kvm_run-exit_reason =
 KVM_EXIT_SHUTDOWN just results in QEMU looping in the cpu execution
 loop.

 Can someone point us in the direction of what we're missing here in
 order to exit the whole QEMU process from an exception occurring
 inside the guest?


 Not sure I understand.  Can't you call exit() or abort() after kvm returns
 an error?

 --
 error compiling committee.c: too many arguments to function


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 1/2] KVM/PPC: Fix PPC KVM e500_tlb.c build error

2009-07-01 Thread Liu Yu-B13201


This fix is already accepted in kvm.git 

 -Original Message-
 From: Yang Shi [mailto:yang@windriver.com] 
 Sent: Thursday, July 02, 2009 10:55 AM
 To: Liu Yu-B13201; holl...@us.ibm.com; a...@redhat.com
 Cc: kvm-ppc@vger.kernel.org; k...@vger.kernel.org; 
 linuxppc-...@ozlabs.org
 Subject: [PATCH 1/2] KVM/PPC: Fix PPC KVM e500_tlb.c build error
 
 Since include/asm/mmu-fsl-booke.h was replaced by 
 include/asm/mmu-book3e.h,
 fix e500_tlb.h to reflect the change and fix e500_tlb.c to 
 align with the
 new page size macro definition in include/asm/mmu-book3e.h.
 
 Signed-off-by: Yang Shi yang@windriver.com
 ---
  arch/powerpc/kvm/e500_tlb.c |8 
  arch/powerpc/kvm/e500_tlb.h |2 +-
  2 files changed, 5 insertions(+), 5 deletions(-)
 
 diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
 index 0e773fc..616762b 100644
 --- a/arch/powerpc/kvm/e500_tlb.c
 +++ b/arch/powerpc/kvm/e500_tlb.c
 @@ -309,7 +309,7 @@ static inline void 
 kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
   vcpu_e500-shadow_pages[tlbsel][esel] = new_page;
  
   /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
 - stlbe-mas1 = MAS1_TSIZE(BOOKE_PAGESZ_4K)
 + stlbe-mas1 = MAS1_TSIZE(BOOK3E_PAGESZ_4K)
   | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
   stlbe-mas2 = (gvaddr  MAS2_EPN)
   | e500_shadow_mas2_attrib(gtlbe-mas2,
 @@ -545,7 +545,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
   case 0:
   /* TLB0 */
   gtlbe-mas1 = ~MAS1_TSIZE(~0);
 - gtlbe-mas1 |= MAS1_TSIZE(BOOKE_PAGESZ_4K);
 + gtlbe-mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K);
  
   stlbsel = 0;
   sesel = 
 kvmppc_e500_stlbe_map(vcpu_e500, 0, esel);
 @@ -679,14 +679,14 @@ void kvmppc_e500_tlb_setup(struct 
 kvmppc_vcpu_e500 *vcpu_e500)
  
   /* Insert large initial mapping for guest. */
   tlbe = vcpu_e500-guest_tlb[1][0];
 - tlbe-mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_256M);
 + tlbe-mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M);
   tlbe-mas2 = 0;
   tlbe-mas3 = E500_TLB_SUPER_PERM_MASK;
   tlbe-mas7 = 0;
  
   /* 4K map for serial output. Used by kernel wrapper. */
   tlbe = vcpu_e500-guest_tlb[1][1];
 - tlbe-mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_4K);
 + tlbe-mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K);
   tlbe-mas2 = (0xe0004500  0xF000) | MAS2_I | MAS2_G;
   tlbe-mas3 = (0xe0004500  0xF000) | 
 E500_TLB_SUPER_PERM_MASK;
   tlbe-mas7 = 0;
 diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
 index 45b064b..abb1bf8 100644
 --- a/arch/powerpc/kvm/e500_tlb.h
 +++ b/arch/powerpc/kvm/e500_tlb.h
 @@ -16,7 +16,7 @@
  #define __KVM_E500_TLB_H__
  
  #include linux/kvm_host.h
 -#include asm/mmu-fsl-booke.h
 +#include asm/mmu-book3e.h
  #include asm/tlb.h
  #include asm/kvm_e500.h
  
 -- 
 1.6.0.4
 
 
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

95 matches

Mail list logo