[COMMIT master] qemu-kvm: fix build on 32 bit

2009-10-10 Thread Avi Kivity
From: Michael S. Tsirkin m...@redhat.com

Fix build on 32 bit system: cast 64 bit integer
to pointer through pointer-sized integer.  Without this, I get:
qemu-kvm.c:1557: error: cast to pointer from integer of different size

Signed-off-by: Michael S. Tsirkin m...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/qemu-kvm.c b/qemu-kvm.c
index a4a90ed..62ca050 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -588,7 +588,7 @@ int kvm_register_phys_mem(kvm_context_t kvm,
 struct kvm_userspace_memory_region memory = {
 .memory_size = len,
 .guest_phys_addr = phys_start,
-.userspace_addr = (unsigned long) (intptr_t) userspace_addr,
+.userspace_addr = (unsigned long) (uintptr_t) userspace_addr,
 .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
 };
 int r;
@@ -1554,7 +1554,8 @@ static void sigbus_handler(int n, struct 
qemu_signalfd_siginfo *siginfo,
 CPUState *cenv;
 
 /* Hope we are lucky for AO MCE */
-if (do_qemu_ram_addr_from_host((void *)siginfo-ssi_addr, paddr)) {
+if (do_qemu_ram_addr_from_host((void *)(intptr_t)siginfo-ssi_addr,
+  paddr)) {
 fprintf(stderr, Hardware memory error for memory used by 
 QEMU itself instead of guest system!: %llx\n,
 (unsigned long long)siginfo-ssi_addr);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] qemu-kvm: fix build with KVM_CAP_SET_GUEST_DEBUG

2009-10-10 Thread Avi Kivity
From: Michael S. Tsirkin m...@redhat.com

Fix build with KVM_CAP_SET_GUEST_DEBUG: use QLIST macro
to declare list head.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/qemu-kvm.h b/qemu-kvm.h
index 4523e25..d6748c7 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -1229,7 +1229,7 @@ typedef struct KVMState {
 int broken_set_mem_region;
 int migration_log;
 #ifdef KVM_CAP_SET_GUEST_DEBUG
-struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
+QTAILQ_HEAD(, kvm_sw_breakpoint) kvm_sw_breakpoints;
 #endif
 struct kvm_context kvm_context;
 } KVMState;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] Update kvm headers

2009-10-10 Thread Avi Kivity
From: Avi Kivity a...@redhat.com

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/kvm/include/ia64/asm/kvm.h b/kvm/include/ia64/asm/kvm.h
index 9920dd6..bc90c75 100644
--- a/kvm/include/ia64/asm/kvm.h
+++ b/kvm/include/ia64/asm/kvm.h
@@ -21,7 +21,7 @@
  *
  */
 
-#include asm/types.h
+#include linux/types.h
 #include linux/ioctl.h
 
 /* Select x86 specific features in linux/kvm.h */
@@ -60,6 +60,7 @@ struct kvm_ioapic_state {
 #define KVM_IRQCHIP_PIC_MASTER   0
 #define KVM_IRQCHIP_PIC_SLAVE1
 #define KVM_IRQCHIP_IOAPIC   2
+#define KVM_NR_IRQCHIPS  3
 
 #define KVM_CONTEXT_SIZE   8*1024
 
diff --git a/kvm/include/ia64/asm/kvm_para.h b/kvm/include/ia64/asm/kvm_para.h
index 0d6d8ca..1588aee 100644
--- a/kvm/include/ia64/asm/kvm_para.h
+++ b/kvm/include/ia64/asm/kvm_para.h
@@ -19,9 +19,13 @@
  *
  */
 
+#ifdef __KERNEL__
+
 static inline unsigned int kvm_arch_para_features(void)
 {
return 0;
 }
 
 #endif
+
+#endif
diff --git a/kvm/include/linux/compiler.h b/kvm/include/linux/compiler.h
new file mode 100644
index 000..f70c49f
--- /dev/null
+++ b/kvm/include/linux/compiler.h
@@ -0,0 +1,2 @@
+/* dummy file */
+
diff --git a/kvm/include/linux/kvm.h b/kvm/include/linux/kvm.h
index 6d99737..f8f8900 100644
--- a/kvm/include/linux/kvm.h
+++ b/kvm/include/linux/kvm.h
@@ -7,14 +7,14 @@
  * Note: you must update KVM_API_VERSION if you change this interface.
  */
 
-#include asm/types.h
-
+#include linux/types.h
+#include linux/compiler.h
 #include linux/ioctl.h
 #include asm/kvm.h
 
 #define KVM_API_VERSION 12
 
-/* for KVM_TRACE_ENABLE */
+/* for KVM_TRACE_ENABLE, deprecated */
 struct kvm_user_trace_setup {
__u32 buf_size; /* sub_buffer size of each per-cpu */
__u32 buf_nr; /* the number of sub_buffers of each per-cpu */
@@ -241,7 +241,7 @@ struct kvm_dirty_log {
__u32 slot;
__u32 padding1;
union {
-   void   *dirty_bitmap; /* one bit per page */
+   void __user *dirty_bitmap; /* one bit per page */
__u64 padding2;
};
 };
@@ -307,6 +307,28 @@ struct kvm_guest_debug {
struct kvm_guest_debug_arch arch;
 };
 
+enum {
+   kvm_ioeventfd_flag_nr_datamatch,
+   kvm_ioeventfd_flag_nr_pio,
+   kvm_ioeventfd_flag_nr_deassign,
+   kvm_ioeventfd_flag_nr_max,
+};
+
+#define KVM_IOEVENTFD_FLAG_DATAMATCH (1  kvm_ioeventfd_flag_nr_datamatch)
+#define KVM_IOEVENTFD_FLAG_PIO   (1  kvm_ioeventfd_flag_nr_pio)
+#define KVM_IOEVENTFD_FLAG_DEASSIGN  (1  kvm_ioeventfd_flag_nr_deassign)
+
+#define KVM_IOEVENTFD_VALID_FLAG_MASK  ((1  kvm_ioeventfd_flag_nr_max) - 1)
+
+struct kvm_ioeventfd {
+   __u64 datamatch;
+   __u64 addr;/* legal pio/mmio address */
+   __u32 len; /* 1, 2, 4, or 8 bytes*/
+   __s32 fd;
+   __u32 flags;
+   __u8  pad[36];
+};
+
 #define KVM_TRC_SHIFT   16
 /*
  * kvm trace categories
@@ -325,35 +347,6 @@ struct kvm_guest_debug {
 #define KVM_TRC_CYCLE_SIZE  8
 #define KVM_TRC_EXTRA_MAX   7
 
-/* This structure represents a single trace buffer record. */
-struct kvm_trace_rec {
-   /* variable rec_val
-* is split into:
-* bits 0 - 27  - event id
-* bits 28 -30  - number of extra data args of size u32
-* bits 31  - binary indicator for if tsc is in record
-*/
-   __u32 rec_val;
-   __u32 pid;
-   __u32 vcpu_id;
-   union {
-   struct {
-   __u64 timestamp;
-   __u32 extra_u32[KVM_TRC_EXTRA_MAX];
-   } __attribute__((packed)) timestamp;
-   struct {
-   __u32 extra_u32[KVM_TRC_EXTRA_MAX];
-   } notimestamp;
-   } u;
-};
-
-#define TRACE_REC_EVENT_ID(val) \
-   (0x0fff  (val))
-#define TRACE_REC_NUM_DATA_ARGS(val) \
-   (0x7000  ((val)  28))
-#define TRACE_REC_TCS(val) \
-   (0x8000  ((val)  31))
-
 #define KVMIO 0xAE
 
 /*
@@ -433,8 +426,15 @@ struct kvm_trace_rec {
 #ifdef __KVM_HAVE_MCE
 #define KVM_CAP_MCE 31
 #endif
+#define KVM_CAP_IRQFD 32
+#ifdef __KVM_HAVE_PIT
 #define KVM_CAP_PIT2 33
+#endif
+#define KVM_CAP_SET_BOOT_CPU_ID 34
+#ifdef __KVM_HAVE_PIT_STATE2
 #define KVM_CAP_PIT_STATE2 35
+#endif
+#define KVM_CAP_IOEVENTFD 36
 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
 
 #ifdef KVM_CAP_IRQ_ROUTING
@@ -488,16 +488,19 @@ struct kvm_x86_mce {
 };
 #endif
 
+#define KVM_IRQFD_FLAG_DEASSIGN (1  0)
+
+struct kvm_irqfd {
+   __u32 fd;
+   __u32 gsi;
+   __u32 flags;
+   __u8  pad[20];
+};
+
 /*
  * ioctls for VM fds
  */
 #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region)
-#define KVM_SET_NR_MMU_PAGES  _IO(KVMIO, 0x44)
-#define KVM_GET_NR_MMU_PAGES  _IO(KVMIO, 0x45)
-#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
-   struct kvm_userspace_memory_region)
-#define KVM_SET_TSS_ADDR  _IO(KVMIO, 

[COMMIT master] KVM: use proper hrtimer function to retrieve expiration time

2009-10-10 Thread Avi Kivity
From: Marcelo Tosatti mtosa...@redhat.com

hrtimer-base can be temporarily NULL due to racing hrtimer_start.
See switch_hrtimer_base/lock_hrtimer_base.

Use hrtimer_get_remaining which is robust against it.

CC: sta...@kernel.org
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index b857ca3..fab7440 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -116,7 +116,7 @@ static s64 __kpit_elapsed(struct kvm *kvm)
 * itself with the initial count and continues counting
 * from there.
 */
-   remaining = hrtimer_expires_remaining(ps-pit_timer.timer);
+   remaining = hrtimer_get_remaining(ps-pit_timer.timer);
elapsed = ps-pit_timer.period - ktime_to_ns(remaining);
elapsed = mod_64(elapsed, ps-pit_timer.period);
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3ca7767..cd60c0b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -515,7 +515,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
if (apic_get_reg(apic, APIC_TMICT) == 0)
return 0;
 
-   remaining = hrtimer_expires_remaining(apic-lapic_timer.timer);
+   remaining = hrtimer_get_remaining(apic-lapic_timer.timer);
if (ktime_to_ns(remaining)  0)
remaining = ktime_set(0, 0);
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Autotest] [PATCH] Fix a bug in function create in kvm_vm

2009-10-10 Thread Yolkfull Chow
On Mon, Oct 05, 2009 at 04:03:22PM -0300, Lucas Meneghel Rodrigues wrote:
 Hi Yolkfull! I've checked your patch, but it turns out that the comma
 is valid syntax for the logging module. By any chance you actually had
 an error with it?

Hi Lucas,
I just checked, yes it's valid syntax for this module. Before this I met
a traceback during running autotest and it indicated this line
around,thus I doubt about this by mistake. Sorry for confusing. ;-)

But I found for the variables in logging.debug(),sometimes it use comma
to format while sometimes '%' which will drop code readability.

Anyway, thanks for checking.

 
 On Mon, Sep 28, 2009 at 4:45 AM, Yolkfull Chow yz...@redhat.com wrote:
 
  Signed-off-by: Yolkfull Chow yz...@redhat.com
  ---
   client/tests/kvm/kvm_vm.py |    2 +-
   1 files changed, 1 insertions(+), 1 deletions(-)
 
  diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py
  index 55220f9..8ae 100755
  --- a/client/tests/kvm/kvm_vm.py
  +++ b/client/tests/kvm/kvm_vm.py
  @@ -406,7 +406,7 @@ class VM:
                                self.process.get_output()))
                  return False
 
  -            logging.debug(VM appears to be alive with PID %d,
  +            logging.debug(VM appears to be alive with PID %d %
                            self.process.get_pid())
              return True
 
  --
  1.6.2.5
 
  ___
  Autotest mailing list
  autot...@test.kernel.org
  http://test.kernel.org/cgi-bin/mailman/listinfo/autotest
 
 
 
 
 -- 
 Lucas
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Autotest] [PATCH] Fix a bug in function create in kvm_vm

2009-10-10 Thread Yolkfull Chow
On Sat, Oct 10, 2009 at 04:24:45PM +0800, Yolkfull Chow wrote:
 On Mon, Oct 05, 2009 at 04:03:22PM -0300, Lucas Meneghel Rodrigues wrote:
  Hi Yolkfull! I've checked your patch, but it turns out that the comma
  is valid syntax for the logging module. By any chance you actually had
  an error with it?
 
 Hi Lucas,
 I just checked, yes it's valid syntax for this module. Before this I met
 a traceback during running autotest and it indicated this line
 around,thus I doubt about this by mistake. Sorry for confusing. ;-)
 
 But I found for the variables in logging.debug(),sometimes it use comma
 to format while sometimes '%' which will drop code readability.

Another reason is if someone who still using kvm_log want to backport
codes from this tree, not only need he replace all 'logging' with
'kvm_log' but also need change these comma syntax. ;-)

 
 Anyway, thanks for checking.
 
  
  On Mon, Sep 28, 2009 at 4:45 AM, Yolkfull Chow yz...@redhat.com wrote:
  
   Signed-off-by: Yolkfull Chow yz...@redhat.com
   ---
    client/tests/kvm/kvm_vm.py |    2 +-
    1 files changed, 1 insertions(+), 1 deletions(-)
  
   diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py
   index 55220f9..8ae 100755
   --- a/client/tests/kvm/kvm_vm.py
   +++ b/client/tests/kvm/kvm_vm.py
   @@ -406,7 +406,7 @@ class VM:
                                 self.process.get_output()))
                   return False
  
   -            logging.debug(VM appears to be alive with PID %d,
   +            logging.debug(VM appears to be alive with PID %d %
                             self.process.get_pid())
               return True
  
   --
   1.6.2.5
  
   ___
   Autotest mailing list
   autot...@test.kernel.org
   http://test.kernel.org/cgi-bin/mailman/listinfo/autotest
  
  
  
  
  -- 
  Lucas
  --
  To unsubscribe from this list: send the line unsubscribe kvm in
  the body of a message to majord...@vger.kernel.org
  More majordomo info at  http://vger.kernel.org/majordomo-info.html
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm guest: hrtimer: interrupt too slow

2009-10-10 Thread Michael Tokarev

Michael Tokarev wrote:

Frederic Weisbecker wrote:

[]

Was there swapping going on?

Not as far as I can see, and sar output agrees.


But I can read this from you guest traces:


I missed this one yesterday.  Note it's GUEST traces
indeed.  Higher (read: non-zero) pgp{in,out} and faults
values happens in *guest*, not on host (original question
was if we've swapping in HOST, which'd explain the timer
issues)

[cutting extra all-zero columns]

pgpgin/s pgpgout/s   fault/s  pgfree/s
11:44:47 0.00 32.32907.07   277.78
11:44:4827.59 22.99 44.83   150.57
11:44:49 0.00 33.68 22.11   218.95
[...]
21:46:54 0.00 31.68 16.8390.10
21:46:55 0.00108.00 17.0089.00
21:46:56 9.76482.93   3890.24   439.02
21:46:57 0.00760.00   8627.00  1133.00
21:46:58 0.00 84.85   2612.12   138.38
21:46:59 0.00 16.00 17.0090.00

So it looks like there was some swapping in when the hrtimer (spuriously)
hanged.


One possible guess.  Since the guest hanged for some time, the
higher values there might be a result of accumulated values for
several seconds.


It's not swapping.  Swapping is in a separate table, with columns titled
pswpin/s and pswpout/s -- first table.

On my home machine with no swap at all, 4gig memory and 2gig free,
pgpgin and pgpgout are increasing too.

Also, while in the second case (21:46:56) there's actually some
noticeable activity (page faults at least), in first case that
activity is modest.

Note there's no documentation for /proc/vmstat file :)

/mjt

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm guest: hrtimer: interrupt too slow

2009-10-10 Thread Frederic Weisbecker
On Sat, Oct 10, 2009 at 01:18:16PM +0400, Michael Tokarev wrote:
 Michael Tokarev wrote:
 Frederic Weisbecker wrote:
 []
 Was there swapping going on?
 Not as far as I can see, and sar output agrees.

 But I can read this from you guest traces:

 I missed this one yesterday.  Note it's GUEST traces
 indeed.  Higher (read: non-zero) pgp{in,out} and faults
 values happens in *guest*, not on host (original question
 was if we've swapping in HOST, which'd explain the timer
 issues)



Yeah indeed. But still, that's a strange happenstance.



 [cutting extra all-zero columns]
 pgpgin/s pgpgout/s   fault/s  pgfree/s
 11:44:47 0.00 32.32907.07   277.78
 11:44:4827.59 22.99 44.83   150.57
 11:44:49 0.00 33.68 22.11   218.95
 [...]
 21:46:54 0.00 31.68 16.8390.10
 21:46:55 0.00108.00 17.0089.00
 21:46:56 9.76482.93   3890.24   439.02
 21:46:57 0.00760.00   8627.00  1133.00
 21:46:58 0.00 84.85   2612.12   138.38
 21:46:59 0.00 16.00 17.0090.00

 So it looks like there was some swapping in when the hrtimer (spuriously)
 hanged.

 One possible guess.  Since the guest hanged for some time, the
 higher values there might be a result of accumulated values for
 several seconds.



May be yeah. I don't know enough about virtual internals so...

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] qemu-kvm: Fix configure to respect --kerneldir

2009-10-10 Thread Jan Kiszka
This simplifies working with new features without having to update the
locally mirrored headers. It also reduces the diff to upstream.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---

 configure |   44 +++-
 1 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/configure b/configure
index 8866258..3a4318c 100755
--- a/configure
+++ b/configure
@@ -1346,23 +1346,7 @@ fi
 ##
 # kvm probe
 if test $kvm != no ; then
-  case $cpu in
-  i386 | x86_64)
-kvm_arch=x86
-;;
-  ppc)
-kvm_arch=powerpc
-;;
-  *)
-kvm_arch=$cpu
-;;
-  esac
-
-  kvm_cflags=-I$source_path/kvm/include
-  kvm_cflags=$kvm_cflags -I$source_path/kvm/include/$kvm_arch
-  kvm_cflags=$kvm_cflags -idirafter $source_path/compat
-
-  cat  $TMPC EOF
+cat  $TMPC EOF
 #include linux/kvm.h
 #if !defined(KVM_API_VERSION) || KVM_API_VERSION  12 || KVM_API_VERSION  12
 #error Invalid KVM version
@@ -1378,6 +1362,32 @@ if test $kvm != no ; then
 #endif
 int main(void) { return 0; }
 EOF
+  if test $kerneldir !=  ; then
+  kvm_cflags=-I$kerneldir/include
+  if test \( $cpu = i386 -o $cpu = x86_64 \) \
+ -a -d $kerneldir/arch/x86/include ; then
+kvm_cflags=$kvm_cflags -I$kerneldir/arch/x86/include
+   elif test $cpu = ppc -a -d $kerneldir/arch/powerpc/include ; then
+   kvm_cflags=$kvm_cflags -I$kerneldir/arch/powerpc/include
+elif test -d $kerneldir/arch/$cpu/include ; then
+kvm_cflags=$kvm_cflags -I$kerneldir/arch/$cpu/include
+  fi
+  else
+  case $cpu in
+  i386 | x86_64)
+kvm_arch=x86
+;;
+  ppc)
+kvm_arch=powerpc
+;;
+  *)
+kvm_arch=$cpu
+;;
+  esac
+  kvm_cflags=-I$source_path/kvm/include
+  kvm_cflags=$kvm_cflags -I$source_path/kvm/include/$kvm_arch
+  fi
+  kvm_cflags=$kvm_cflags -idirafter $source_path/compat
   if compile_prog $kvm_cflags  ; then
 kvm=yes
   else



signature.asc
Description: OpenPGP digital signature


Re: Problem booting guest with Linux 2.6.3x

2009-10-10 Thread Daniel Bareiro
Hi, Michael.

On Wednesday, 07 October 2009 15:12:26 +0400,
Michael Tokarev wrote:

 root  (hd0,1)
  Filesystem type is ext2fs, partition type 0x83
 kernel  /boot/vmlinuz-2.6.31.2-dgb root=/dev/hda2 ro quiet console=tty0 
 console
 =ttyS0,38400n8
   [Linux-bzImage, setup=0x3600, size=0x203480]
 initrd  /boot/initrd.img-2.6.31.2-dgb
   [Linux-initrd @ 0x1f983000, 0x65c455 bytes]
 
 Loading, please wait...
 WARNING bootdevice may be renamed. Try root=/dev/sda2

 I think if you boot without the quiet option you'll see that your
 guest IDE disk did in fact get installed as /dev/sda and following
 the advice of the error message above will allow you to boot the
 guest.

 I'm using the option quiet with both stock kernel and the kernel
 compiled by myself.

 It's irrelevant. By using quiet you're hiding the details, that's what
 it is about -- what's what Alex is saying.

Yes. Now that I read this paragraph again, I understood what was the
idea of the comment of Alex. I must have been something sleepy when I
replied to him :-)

Booting without the quiet option indeed it can be observed that the
disks are detected like sdX:

[2.874722] scsi0 : ata_piix
[2.879423] scsi1 : ata_piix
[2.882361] ata1: PATA max MWDMA2 cmd 0x1f0 ctl 0x3f6 bmdma 0xc000 irq 14
[2.887637] ata2: PATA max MWDMA2 cmd 0x170 ctl 0x376 bmdma 0xc008 irq 15
[3.049008] ata1.00: ATA-7: QEMU HARDDISK, 0.10.50, max UDMA/100
[3.053725] ata1.00: 10485760 sectors, multi 16: LBA48
[3.057987] ata1.01: ATA-7: QEMU HARDDISK, 0.10.50, max UDMA/100
[3.062926] ata1.01: 20971520 sectors, multi 16: LBA48
[3.068436] ata2.00: ATAPI: QEMU DVD-ROM, 0.10.50, max UDMA/100
[3.073701] ata2.00: configured for MWDMA2
[3.077614] ata1.00: configured for MWDMA2
[3.081563] ata1.01: configured for MWDMA2
[3.085219] scsi 0:0:0:0: Direct-Access ATA  QEMU HARDDISK0.10 
PQ: 0 ANSI: 5
[3.092264] scsi 0:0:1:0: Direct-Access ATA  QEMU HARDDISK0.10 
PQ: 0 ANSI: 5
[3.099339] scsi 1:0:0:0: CD-ROMQEMU QEMU DVD-ROM 0.10 
PQ: 0 ANSI: 5
[3.119143] Uniform Multi-Platform E-IDE driver
[3.163028] sd 0:0:0:0: [sda] 10485760 512-byte logical blocks: (5.36 
GB/5.00 GiB)
[3.169382] sd 0:0:1:0: [sdb] 20971520 512-byte logical blocks: (10.7 
GB/10.0 GiB)
[3.175584] sd 0:0:1:0: [sdb] Write Protect is off
[3.179590] sd 0:0:1:0: [sdb] Write cache: disabled, read cache: enabled, 
doesn't support DPO or FUA
[3.187178] sd 0:0:0:0: [sda] Write Protect is off
[3.191050] sd 0:0:0:0: [sda] Write cache: disabled, read cache: enabled, 
doesn't support DPO or FUA
[3.198647]  sda:
[3.200566]  sdb: sda1 sda2
[3.204719]  sdb1
[3.207129] sd 0:0:1:0: [sdb] Attached SCSI disk
[3.211340] sd 0:0:0:0: [sda] Attached SCSI disk
[3.304284] sr0: scsi3-mmc drive: 4x/4x xa/form2 tray
[3.308449] Uniform CD-ROM driver Revision: 3.20
[3.363890] sd 0:0:0:0: Attached scsi generic sg0 type 0
[3.368353] sd 0:0:1:0: Attached scsi generic sg1 type 0
[3.372536] sr 1:0:0:0: Attached scsi generic sg2 type 5

  You could boot using the uuid of the partition or label the
  filesystem to avoid device naming issues between your original
  lenny kernel and the newer kernel.

 I was trying changing the not swap devices to the uuid form. Although
 in this case the swap device was not detected, the guest boots
 without majors problems. I think that being used the QEMU_HARDDISK
 names provided by udevinfo would have been solved this problem.

 But according to it seems, I could verify that the disks that are
 passed with -hdX in KVM-88 are mapped in 2.6.31.2 guests like
 SATA/SCSI devices. With Linux stock 2.6.26 these are mapped like IDE
 disks. Can it be due to some change in the kernel code related with
 KVM?

 It has nothing to do with kvm.  It's different kernel options, all
 kernels since very early 2.6.x are able to see ide disks as hdX or
 sdX, depending on the kernel options and modules loaded.  There are 2
 drivers for each IDE controller - IDE/ATA one, which creates hdX, and
 PATA one which creates sdX.

According to I was investigating, I have the impression that the newest
kernels delegate this disks denomination to the use of libata. It would
be that in 2.6.26 Debian stock kernel not yet was productive to be in
experimental phase?

Thanks for your reply.

Regards,
Daniel
-- 
Fingerprint: BFB3 08D6 B4D1 31B2 72B9  29CE 6696 BF1B 14E6 1D37
Powered by Debian GNU/Linux Squeeze - Linux user #188.598


signature.asc
Description: Digital signature


Re: Problem booting guest with Linux 2.6.3x

2009-10-10 Thread Michael Tokarev

Daniel Bareiro wrote:
[]

But according to it seems, I could verify that the disks that are
passed with -hdX in KVM-88 are mapped in 2.6.31.2 guests like
SATA/SCSI devices. With Linux stock 2.6.26 these are mapped like IDE
disks. Can it be due to some change in the kernel code related with
KVM?



It has nothing to do with kvm.  It's different kernel options, all
kernels since very early 2.6.x are able to see ide disks as hdX or
sdX, depending on the kernel options and modules loaded.  There are 2
drivers for each IDE controller - IDE/ATA one, which creates hdX, and
PATA one which creates sdX.


According to I was investigating, I have the impression that the newest
kernels delegate this disks denomination to the use of libata. It would
be that in 2.6.26 Debian stock kernel not yet was productive to be in
experimental phase?


Debian stock kernel config does not enable ata devices, only ide ones.

/mjt
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm or qemu-kvm?

2009-10-10 Thread René Pfeiffer
On Oct 01, 2009 at 1902 +0200, Avi Kivity appeared and said:
 On 10/01/2009 06:51 PM, Ross Boylan wrote:
 
 My distro (Debian) is only at 85, even in unstable.  Since it wasn't
 current, and also the dependencies will have wide effects on my system
 (which I'm trying to keep at the stable release Lenny), I figured
 getting the current source and building it myself would be the best
 move.  For other reasons I'm already running a 2.6.30 kernel from
 Debian, which includes kernel side kvm.  So I figure I only need to mess
 with user space.
 
 Right, stick with your kernel's kvm.ko, qemu-kvm-0.11.0 should make a
 good fit.

Just to be sure: If I use Debian Lenny with a kernel from kernel.org,
then I can use the qemu-kvm packages and be fine. Right?

Best,
René.

-- 
  )\._.,--,'``.  fL  Let GNU/Linux work for you while you take a nap.
 /,   _.. \   _\  (`._ ,. R. Pfeiffer lynx at luchs.at + http://web.luchs.at/
`._.-(,_..'--(,_..'`-.;.'  - System administration + Consulting + Teaching -
Got mail delivery problems?  http://web.luchs.at/information/blockedmail.php


pgpwtdyedBtjK.pgp
Description: PGP signature


Re: kvm or qemu-kvm?

2009-10-10 Thread Michael Tokarev

René Pfeiffer wrote:

On Oct 01, 2009 at 1902 +0200, Avi Kivity appeared and said:

[]

Right, stick with your kernel's kvm.ko, qemu-kvm-0.11.0 should make a
good fit.


Just to be sure: If I use Debian Lenny with a kernel from kernel.org,
then I can use the qemu-kvm packages and be fine. Right?


If you use kernel from kernel.org and kvm from linux-kvm.org,
there's no difference (as far as kvm is concerned) what
distribution it is.

qemu-kvm-0.11.0 works fine with stock kernels = 2.6.28.

Speaking of debian, take a look at http://www.corpit.ru/debian/tls/kvm/ --
ready-to-install debian packages including 0.11.0.
(don't add this to sources.list)

/mjt
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] tune hrtimer_interrupt hang logic

2009-10-10 Thread Marcelo Tosatti

The hrtimer_interrupt hang logic adjusts min_delta_ns based on the
execution time of the hrtimer callbacks.

This is error-prone for virtual machines, where a guest vcpu can be
scheduled out during the execution of the callbacks (and the callbacks
themselves can do operations that translate to blocking operations in
the hypervisor), which in can lead to large min_delta_ns rendering the
system unusable.

Change the logic to simply schedule the next interrupt using the 1/4 
ratio, while keeping min_delta_ns intact.

Reported-by: Michael Tokarev m...@tls.msk.ru
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index ff037f0..88a8ca5 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -245,6 +245,8 @@ static inline ktime_t hrtimer_expires_remaining(const 
struct hrtimer *timer)
 #ifdef CONFIG_HIGH_RES_TIMERS
 struct clock_event_device;
 
+extern int hrtimer_interrupt_hang;
+
 extern void clock_was_set(void);
 extern void hres_timers_resume(void);
 extern void hrtimer_interrupt(struct clock_event_device *dev);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 6d70204..6b81888 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1219,29 +1219,17 @@ static void __run_hrtimer(struct hrtimer *timer, 
ktime_t *now)
 
 #ifdef CONFIG_HIGH_RES_TIMERS
 
-static int force_clock_reprogram;
-
 /*
  * After 5 iteration's attempts, we consider that hrtimer_interrupt()
  * is hanging, which could happen with something that slows the interrupt
- * such as the tracing. Then we force the clock reprogramming for each future
- * hrtimer interrupts to avoid infinite loops and use the min_delta_ns
- * threshold that we will overwrite.
+ * such as the tracing. 
  * The next tick event will be scheduled to 3 times we currently spend on
  * hrtimer_interrupt(). This gives a good compromise, the cpus will spend
  * 1/4 of their time to process the hrtimer interrupts. This is enough to
  * let it running without serious starvation.
  */
+int __read_mostly hrtimer_interrupt_hang;
 
-static inline void
-hrtimer_interrupt_hanging(struct clock_event_device *dev,
-   ktime_t try_time)
-{
-   force_clock_reprogram = 1;
-   dev-min_delta_ns = (unsigned long)try_time.tv64 * 3;
-   printk(KERN_WARNING hrtimer: interrupt too slow, 
-   forcing clock min delta to %lu ns\n, dev-min_delta_ns);
-}
 /*
  * High resolution timer interrupt
  * Called with interrupts disabled
@@ -1260,8 +1248,27 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 
  retry:
/* 5 retries is enough to notice a hang */
-   if (!(++nr_retries % 5))
-   hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now));
+   if (!(++nr_retries % 5)) {
+   int tries = 0;
+   ktime_t try_time = ktime_sub(ktime_get(), now);
+
+   if (ktime_to_ns(try_time)  dev-min_delta_ns)
+   try_time = ns_to_ktime(dev-min_delta_ns);
+
+   do {
+   tries++;
+   for (i = 0; i  3*tries; i++)
+   expires_next = ktime_add(expires_next,try_time);
+
+   printk_once(KERN_WARNING hrtimer: interrupt too slow, 
+   scheduling tick %lld ns ahead\n,
+   ktime_to_ns(ktime_sub(expires_next,
+   ktime_get(;
+   } while (tick_program_event(expires_next, 1));
+
+   hrtimer_interrupt_hang++;
+   return;
+   }
 
now = ktime_get();
 
@@ -1327,7 +1334,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 
/* Reprogramming necessary ? */
if (expires_next.tv64 != KTIME_MAX) {
-   if (tick_program_event(expires_next, force_clock_reprogram))
+   if (tick_program_event(expires_next, 0))
goto retry;
}
 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0d949c5..7223853 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1032,6 +1032,17 @@ static struct ctl_table kern_table[] = {
.proc_handler   = proc_dointvec,
},
 #endif
+#ifdef CONFIG_HIGH_RES_TIMERS
+   {
+   .ctl_name   = CTL_UNNUMBERED,
+   .procname   = hrtimer_interrupt_hang,
+   .data   = hrtimer_interrupt_hang,
+   .maxlen = sizeof (int),
+   .mode   = 0444,
+   .proc_handler   = proc_dointvec,
+   
+   },
+#endif
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kvm guest: hrtimer: interrupt too slow

2009-10-10 Thread Marcelo Tosatti
On Sat, Oct 10, 2009 at 02:34:22AM +0400, Michael Tokarev wrote:
 Frederic Weisbecker wrote:
 On Sat, Oct 10, 2009 at 01:22:16AM +0400, Michael Tokarev wrote:
 Marcelo Tosatti wrote:
 [snip]

 Would be useful to collect sar (sar -B -b -u) output every one second
 in both host/guest. You already mentioned load was low, but this should
 give more details.
 Here we go:  http://www.corpit.ru/mjt/hrtimer-interrupt-too-slow/

 Two incindents - cases when hrtimer: interrupt is too slow were
 reported in the guest (with Marcelo's patch so that min_delta is
 increased to 50% only), happened at 11:44:48 and 21:46:56 (as
 shown in guest-dmesg file).  For both, there's `sar -BWbd' output
 for a 2-minute interval (starting one minute before the delay
 and ending one minute after) from both host and guest.

 Was there swapping going on?
 Not as far as I can see, and sar output agrees.

 But I can read this from you guest traces:

 pgpgin/s pgpgout/s   fault/s  majflt/s  pgfree/s pgscank/s 
 pgscand/s pgsteal/s%vmeff

 11:44:45 0.00 32.32174.75  0.00176.77  0.00  
 0.00  0.00  0.00
 11:44:46 0.00 16.00789.00  0.00323.00  0.00  
 0.00  0.00  0.00
 11:44:47 0.00 32.32907.07  0.00277.78  0.00  
 0.00  0.00  0.00
 11:44:4827.59 22.99 44.83  0.00150.57  0.00  
 0.00  0.00  0.00
 11:44:49 0.00 33.68 22.11  0.00218.95  0.00  
 0.00  0.00  0.00
 11:44:50 0.00101.01 17.17  0.00151.52  0.00  
 0.00  0.00  0.00
 11:44:51 0.00 15.69 16.67  0.00126.47  0.00  
 0.00  0.00  0.00
 [...]
 21:46:52 0.00 40.00 17.00  0.00 82.00  0.00  
 0.00  0.00  0.00
 21:46:53 0.00 31.68 18.81  0.00 94.06  0.00  
 0.00  0.00  0.00
 21:46:54 0.00 31.68 16.83  0.00 90.10  0.00  
 0.00  0.00  0.00
 21:46:55 0.00108.00 17.00  0.00 89.00  0.00  
 0.00  0.00  0.00
 21:46:56 9.76482.93   3890.24  0.00439.02  0.00  
 0.00  0.00  0.00
 21:46:57 0.00760.00   8627.00  0.00   1133.00  0.00  
 0.00  0.00  0.00
 21:46:58 0.00 84.85   2612.12  0.00138.38  0.00  
 0.00  0.00  0.00
 21:46:59 0.00 16.00 17.00  0.00 90.00  0.00  
 0.00  0.00  0.00
 21:47:00 0.00 36.36 17.17  0.00 90.91  0.00  
 0.00  0.00  0.00


 So it looks like there was some swapping in when the hrtimer (spuriously)
 hanged.

 It's not swapping.  Swapping is in a separate table, with columns titled
 pswpin/s and pswpout/s -- first table.

 On my home machine with no swap at all, 4gig memory and 2gig free,
 pgpgin and pgpgout are increasing too.

 Also, while in the second case (21:46:56) there's actually some
 noticeable activity (page faults at least), in first case that
 activity is modest.

Yes. It still could be scheduling though (but you mention the host is
idle, you forgot cpu stats in sar).

It might be a QEMU bug, can you share your qemu-kvm command line?
There's always (guest) disk i/o when the hang happens.

Also please give the latest hrtimer_interrupt patch a try.

Thanks!

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kernel bug in kvm_intel

2009-10-10 Thread Avi Kivity

On 10/09/2009 10:04 PM, Andrew Theurer wrote:
This is on latest master branch on kvm.git and qemu-kvm.git, running 
12 Windows Server2008 VMs, and using oprofile.  I ran again without 
oprofile and did not get the BUG.  I am wondering if anyone else is 
seeing this.


Thanks,

-Andrew

Oct  9 11:55:13 virtvictory-eth0 kernel: BUG: unable to handle kernel 
paging request at 9fe9a2b4
Oct  9 11:55:13 virtvictory-eth0 kernel: IP: [a02e1af1] 
vmx_vcpu_run+0x26d/0x64f [kvm_intel]


Can you run this through objdump or gdb to see what source this 
corresponds to?


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 02/27] Pass PVR in sregs

2009-10-10 Thread Alexander Graf


Am 09.10.2009 um 23:00 schrieb Hollis Blanchard holl...@us.ibm.com:


On Tue, 2009-09-29 at 10:17 +0200, Alexander Graf wrote:

Right now sregs is unused on PPC, so we can use it for initialization
of the CPU.

KVM on BookE always virtualizes the host CPU. On Book3s we go a  
step further
and take the PVR from userspace that tells us what kind of CPU we  
are supposed

to virtualize, because we support Book3s_32 and Book3s_64 guests.

In order to get that information, we use the sregs ioctl, because  
we don't

want to reset the guest CPU on every normal register set.

Signed-off-by: Alexander Graf ag...@suse.de
---
arch/powerpc/include/asm/kvm.h |2 ++
1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/ 
asm/kvm.h

index bb2de6a..b82bd68 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -46,6 +46,8 @@ struct kvm_regs {
};

struct kvm_sregs {
+__u64 pvr;
+char pad[1016];
};

struct kvm_fpu {


Architecturally, PVR is 32 bits, even for PPC64. Is there a reason you
want it to be 64 bits here? (I can understand just picking 64 for
registers that could be either size, but that's not this case.)


No obvious reason. It fills a registerwhich can be up to u64, but if  
it's limited to u32 we can keep it 32 bits.


Alex



--
Hollis Blanchard
IBM Linux Technology Center


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 25/27] Fix trace.h

2009-10-10 Thread Benjamin Herrenschmidt
On Fri, 2009-10-09 at 14:42 -0700, Hollis Blanchard wrote:
 After much digging, I managed to actually enable CONFIG_TRACEPOINTS.
 However, I still don't get any build errors from this code. Maybe you
 could paste the full gcc output? 

Another option is to be a bit more ppc-ish and call it IAR instead of
PC :-)

Cheers,
Ben.

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 16/27] Add desktop PowerPC specific emulation

2009-10-10 Thread Benjamin Herrenschmidt
On Fri, 2009-10-09 at 13:57 -0700, Hollis Blanchard wrote:

  +#ifdef CONFIG_PPC64
  +#ifdef DEBUG_EMUL
  +   printk(KERN_INFO mtDEC: %x\n, vcpu-arch.dec);
  +#endif
  +   /* POWER4+ triggers a dec interrupt if the value is  0 */
  +   if (vcpu-arch.dec  0x8000) {
  +   del_timer(vcpu-arch.dec_timer);
  +   kvmppc_core_queue_dec(vcpu);
  +   }
  +   else if (true) {
  +#else
  if (vcpu-arch.tcr  TCR_DIE) {
  +#endif
  /* The decrementer ticks at the same rate as the timebase, so
   * that's how we convert the guest DEC value to the number of
   * host ticks. */
 
 Ifdefs through the middle of control syntax makes my head hurt. :)

Note that your original BookE DEC emulation looks fishy :-)

I may have missed something in your code... but I don't think it
emulates the expected HW behaviour:

Basically, when the BookE DEC hits the 1 - 0 transition it latches an
event in TSE:DIS always, whether TCR:DIE is set or not. If DIE is not
set, the interrupt is sent. It will then stop counting if auto-reload
isn't enabled.

That means that if TSR:DIS is set from a previous event while TCR:DIE is
clear, then setting TCR:DIE (with MSR:EE enabled of course) will trigger
a DEC interrupt on BookE.

The BookE DEC interrupt is basically a level sensitive thing sourced
from to (TSR:DIS  TCR:DIE), and TSR:DIS remains set until explicitely
cleared.

Cheers,
Ben.

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html