[COMMIT master] qemu-kvm: fix build on 32 bit
From: Michael S. Tsirkin m...@redhat.com Fix build on 32 bit system: cast 64 bit integer to pointer through pointer-sized integer. Without this, I get: qemu-kvm.c:1557: error: cast to pointer from integer of different size Signed-off-by: Michael S. Tsirkin m...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/qemu-kvm.c b/qemu-kvm.c index a4a90ed..62ca050 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -588,7 +588,7 @@ int kvm_register_phys_mem(kvm_context_t kvm, struct kvm_userspace_memory_region memory = { .memory_size = len, .guest_phys_addr = phys_start, -.userspace_addr = (unsigned long) (intptr_t) userspace_addr, +.userspace_addr = (unsigned long) (uintptr_t) userspace_addr, .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0, }; int r; @@ -1554,7 +1554,8 @@ static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo, CPUState *cenv; /* Hope we are lucky for AO MCE */ -if (do_qemu_ram_addr_from_host((void *)siginfo-ssi_addr, paddr)) { +if (do_qemu_ram_addr_from_host((void *)(intptr_t)siginfo-ssi_addr, + paddr)) { fprintf(stderr, Hardware memory error for memory used by QEMU itself instead of guest system!: %llx\n, (unsigned long long)siginfo-ssi_addr); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] qemu-kvm: fix build with KVM_CAP_SET_GUEST_DEBUG
From: Michael S. Tsirkin m...@redhat.com Fix build with KVM_CAP_SET_GUEST_DEBUG: use QLIST macro to declare list head. Signed-off-by: Michael S. Tsirkin m...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/qemu-kvm.h b/qemu-kvm.h index 4523e25..d6748c7 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -1229,7 +1229,7 @@ typedef struct KVMState { int broken_set_mem_region; int migration_log; #ifdef KVM_CAP_SET_GUEST_DEBUG -struct kvm_sw_breakpoint_head kvm_sw_breakpoints; +QTAILQ_HEAD(, kvm_sw_breakpoint) kvm_sw_breakpoints; #endif struct kvm_context kvm_context; } KVMState; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] Update kvm headers
From: Avi Kivity a...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/kvm/include/ia64/asm/kvm.h b/kvm/include/ia64/asm/kvm.h index 9920dd6..bc90c75 100644 --- a/kvm/include/ia64/asm/kvm.h +++ b/kvm/include/ia64/asm/kvm.h @@ -21,7 +21,7 @@ * */ -#include asm/types.h +#include linux/types.h #include linux/ioctl.h /* Select x86 specific features in linux/kvm.h */ @@ -60,6 +60,7 @@ struct kvm_ioapic_state { #define KVM_IRQCHIP_PIC_MASTER 0 #define KVM_IRQCHIP_PIC_SLAVE1 #define KVM_IRQCHIP_IOAPIC 2 +#define KVM_NR_IRQCHIPS 3 #define KVM_CONTEXT_SIZE 8*1024 diff --git a/kvm/include/ia64/asm/kvm_para.h b/kvm/include/ia64/asm/kvm_para.h index 0d6d8ca..1588aee 100644 --- a/kvm/include/ia64/asm/kvm_para.h +++ b/kvm/include/ia64/asm/kvm_para.h @@ -19,9 +19,13 @@ * */ +#ifdef __KERNEL__ + static inline unsigned int kvm_arch_para_features(void) { return 0; } #endif + +#endif diff --git a/kvm/include/linux/compiler.h b/kvm/include/linux/compiler.h new file mode 100644 index 000..f70c49f --- /dev/null +++ b/kvm/include/linux/compiler.h @@ -0,0 +1,2 @@ +/* dummy file */ + diff --git a/kvm/include/linux/kvm.h b/kvm/include/linux/kvm.h index 6d99737..f8f8900 100644 --- a/kvm/include/linux/kvm.h +++ b/kvm/include/linux/kvm.h @@ -7,14 +7,14 @@ * Note: you must update KVM_API_VERSION if you change this interface. */ -#include asm/types.h - +#include linux/types.h +#include linux/compiler.h #include linux/ioctl.h #include asm/kvm.h #define KVM_API_VERSION 12 -/* for KVM_TRACE_ENABLE */ +/* for KVM_TRACE_ENABLE, deprecated */ struct kvm_user_trace_setup { __u32 buf_size; /* sub_buffer size of each per-cpu */ __u32 buf_nr; /* the number of sub_buffers of each per-cpu */ @@ -241,7 +241,7 @@ struct kvm_dirty_log { __u32 slot; __u32 padding1; union { - void *dirty_bitmap; /* one bit per page */ + void __user *dirty_bitmap; /* one bit per page */ __u64 padding2; }; }; @@ -307,6 +307,28 @@ struct kvm_guest_debug { struct kvm_guest_debug_arch arch; }; +enum { + kvm_ioeventfd_flag_nr_datamatch, + kvm_ioeventfd_flag_nr_pio, + kvm_ioeventfd_flag_nr_deassign, + kvm_ioeventfd_flag_nr_max, +}; + +#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 kvm_ioeventfd_flag_nr_datamatch) +#define KVM_IOEVENTFD_FLAG_PIO (1 kvm_ioeventfd_flag_nr_pio) +#define KVM_IOEVENTFD_FLAG_DEASSIGN (1 kvm_ioeventfd_flag_nr_deassign) + +#define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 kvm_ioeventfd_flag_nr_max) - 1) + +struct kvm_ioeventfd { + __u64 datamatch; + __u64 addr;/* legal pio/mmio address */ + __u32 len; /* 1, 2, 4, or 8 bytes*/ + __s32 fd; + __u32 flags; + __u8 pad[36]; +}; + #define KVM_TRC_SHIFT 16 /* * kvm trace categories @@ -325,35 +347,6 @@ struct kvm_guest_debug { #define KVM_TRC_CYCLE_SIZE 8 #define KVM_TRC_EXTRA_MAX 7 -/* This structure represents a single trace buffer record. */ -struct kvm_trace_rec { - /* variable rec_val -* is split into: -* bits 0 - 27 - event id -* bits 28 -30 - number of extra data args of size u32 -* bits 31 - binary indicator for if tsc is in record -*/ - __u32 rec_val; - __u32 pid; - __u32 vcpu_id; - union { - struct { - __u64 timestamp; - __u32 extra_u32[KVM_TRC_EXTRA_MAX]; - } __attribute__((packed)) timestamp; - struct { - __u32 extra_u32[KVM_TRC_EXTRA_MAX]; - } notimestamp; - } u; -}; - -#define TRACE_REC_EVENT_ID(val) \ - (0x0fff (val)) -#define TRACE_REC_NUM_DATA_ARGS(val) \ - (0x7000 ((val) 28)) -#define TRACE_REC_TCS(val) \ - (0x8000 ((val) 31)) - #define KVMIO 0xAE /* @@ -433,8 +426,15 @@ struct kvm_trace_rec { #ifdef __KVM_HAVE_MCE #define KVM_CAP_MCE 31 #endif +#define KVM_CAP_IRQFD 32 +#ifdef __KVM_HAVE_PIT #define KVM_CAP_PIT2 33 +#endif +#define KVM_CAP_SET_BOOT_CPU_ID 34 +#ifdef __KVM_HAVE_PIT_STATE2 #define KVM_CAP_PIT_STATE2 35 +#endif +#define KVM_CAP_IOEVENTFD 36 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37 #ifdef KVM_CAP_IRQ_ROUTING @@ -488,16 +488,19 @@ struct kvm_x86_mce { }; #endif +#define KVM_IRQFD_FLAG_DEASSIGN (1 0) + +struct kvm_irqfd { + __u32 fd; + __u32 gsi; + __u32 flags; + __u8 pad[20]; +}; + /* * ioctls for VM fds */ #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) -#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) -#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) -#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\ - struct kvm_userspace_memory_region) -#define KVM_SET_TSS_ADDR _IO(KVMIO,
[COMMIT master] KVM: use proper hrtimer function to retrieve expiration time
From: Marcelo Tosatti mtosa...@redhat.com hrtimer-base can be temporarily NULL due to racing hrtimer_start. See switch_hrtimer_base/lock_hrtimer_base. Use hrtimer_get_remaining which is robust against it. CC: sta...@kernel.org Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index b857ca3..fab7440 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -116,7 +116,7 @@ static s64 __kpit_elapsed(struct kvm *kvm) * itself with the initial count and continues counting * from there. */ - remaining = hrtimer_expires_remaining(ps-pit_timer.timer); + remaining = hrtimer_get_remaining(ps-pit_timer.timer); elapsed = ps-pit_timer.period - ktime_to_ns(remaining); elapsed = mod_64(elapsed, ps-pit_timer.period); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3ca7767..cd60c0b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -515,7 +515,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) if (apic_get_reg(apic, APIC_TMICT) == 0) return 0; - remaining = hrtimer_expires_remaining(apic-lapic_timer.timer); + remaining = hrtimer_get_remaining(apic-lapic_timer.timer); if (ktime_to_ns(remaining) 0) remaining = ktime_set(0, 0); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Autotest] [PATCH] Fix a bug in function create in kvm_vm
On Mon, Oct 05, 2009 at 04:03:22PM -0300, Lucas Meneghel Rodrigues wrote: Hi Yolkfull! I've checked your patch, but it turns out that the comma is valid syntax for the logging module. By any chance you actually had an error with it? Hi Lucas, I just checked, yes it's valid syntax for this module. Before this I met a traceback during running autotest and it indicated this line around,thus I doubt about this by mistake. Sorry for confusing. ;-) But I found for the variables in logging.debug(),sometimes it use comma to format while sometimes '%' which will drop code readability. Anyway, thanks for checking. On Mon, Sep 28, 2009 at 4:45 AM, Yolkfull Chow yz...@redhat.com wrote: Signed-off-by: Yolkfull Chow yz...@redhat.com --- client/tests/kvm/kvm_vm.py | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py index 55220f9..8ae 100755 --- a/client/tests/kvm/kvm_vm.py +++ b/client/tests/kvm/kvm_vm.py @@ -406,7 +406,7 @@ class VM: self.process.get_output())) return False - logging.debug(VM appears to be alive with PID %d, + logging.debug(VM appears to be alive with PID %d % self.process.get_pid()) return True -- 1.6.2.5 ___ Autotest mailing list autot...@test.kernel.org http://test.kernel.org/cgi-bin/mailman/listinfo/autotest -- Lucas -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Autotest] [PATCH] Fix a bug in function create in kvm_vm
On Sat, Oct 10, 2009 at 04:24:45PM +0800, Yolkfull Chow wrote: On Mon, Oct 05, 2009 at 04:03:22PM -0300, Lucas Meneghel Rodrigues wrote: Hi Yolkfull! I've checked your patch, but it turns out that the comma is valid syntax for the logging module. By any chance you actually had an error with it? Hi Lucas, I just checked, yes it's valid syntax for this module. Before this I met a traceback during running autotest and it indicated this line around,thus I doubt about this by mistake. Sorry for confusing. ;-) But I found for the variables in logging.debug(),sometimes it use comma to format while sometimes '%' which will drop code readability. Another reason is if someone who still using kvm_log want to backport codes from this tree, not only need he replace all 'logging' with 'kvm_log' but also need change these comma syntax. ;-) Anyway, thanks for checking. On Mon, Sep 28, 2009 at 4:45 AM, Yolkfull Chow yz...@redhat.com wrote: Signed-off-by: Yolkfull Chow yz...@redhat.com --- client/tests/kvm/kvm_vm.py | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py index 55220f9..8ae 100755 --- a/client/tests/kvm/kvm_vm.py +++ b/client/tests/kvm/kvm_vm.py @@ -406,7 +406,7 @@ class VM: self.process.get_output())) return False - logging.debug(VM appears to be alive with PID %d, + logging.debug(VM appears to be alive with PID %d % self.process.get_pid()) return True -- 1.6.2.5 ___ Autotest mailing list autot...@test.kernel.org http://test.kernel.org/cgi-bin/mailman/listinfo/autotest -- Lucas -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kvm guest: hrtimer: interrupt too slow
Michael Tokarev wrote: Frederic Weisbecker wrote: [] Was there swapping going on? Not as far as I can see, and sar output agrees. But I can read this from you guest traces: I missed this one yesterday. Note it's GUEST traces indeed. Higher (read: non-zero) pgp{in,out} and faults values happens in *guest*, not on host (original question was if we've swapping in HOST, which'd explain the timer issues) [cutting extra all-zero columns] pgpgin/s pgpgout/s fault/s pgfree/s 11:44:47 0.00 32.32907.07 277.78 11:44:4827.59 22.99 44.83 150.57 11:44:49 0.00 33.68 22.11 218.95 [...] 21:46:54 0.00 31.68 16.8390.10 21:46:55 0.00108.00 17.0089.00 21:46:56 9.76482.93 3890.24 439.02 21:46:57 0.00760.00 8627.00 1133.00 21:46:58 0.00 84.85 2612.12 138.38 21:46:59 0.00 16.00 17.0090.00 So it looks like there was some swapping in when the hrtimer (spuriously) hanged. One possible guess. Since the guest hanged for some time, the higher values there might be a result of accumulated values for several seconds. It's not swapping. Swapping is in a separate table, with columns titled pswpin/s and pswpout/s -- first table. On my home machine with no swap at all, 4gig memory and 2gig free, pgpgin and pgpgout are increasing too. Also, while in the second case (21:46:56) there's actually some noticeable activity (page faults at least), in first case that activity is modest. Note there's no documentation for /proc/vmstat file :) /mjt -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kvm guest: hrtimer: interrupt too slow
On Sat, Oct 10, 2009 at 01:18:16PM +0400, Michael Tokarev wrote: Michael Tokarev wrote: Frederic Weisbecker wrote: [] Was there swapping going on? Not as far as I can see, and sar output agrees. But I can read this from you guest traces: I missed this one yesterday. Note it's GUEST traces indeed. Higher (read: non-zero) pgp{in,out} and faults values happens in *guest*, not on host (original question was if we've swapping in HOST, which'd explain the timer issues) Yeah indeed. But still, that's a strange happenstance. [cutting extra all-zero columns] pgpgin/s pgpgout/s fault/s pgfree/s 11:44:47 0.00 32.32907.07 277.78 11:44:4827.59 22.99 44.83 150.57 11:44:49 0.00 33.68 22.11 218.95 [...] 21:46:54 0.00 31.68 16.8390.10 21:46:55 0.00108.00 17.0089.00 21:46:56 9.76482.93 3890.24 439.02 21:46:57 0.00760.00 8627.00 1133.00 21:46:58 0.00 84.85 2612.12 138.38 21:46:59 0.00 16.00 17.0090.00 So it looks like there was some swapping in when the hrtimer (spuriously) hanged. One possible guess. Since the guest hanged for some time, the higher values there might be a result of accumulated values for several seconds. May be yeah. I don't know enough about virtual internals so... -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] qemu-kvm: Fix configure to respect --kerneldir
This simplifies working with new features without having to update the locally mirrored headers. It also reduces the diff to upstream. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- configure | 44 +++- 1 files changed, 27 insertions(+), 17 deletions(-) diff --git a/configure b/configure index 8866258..3a4318c 100755 --- a/configure +++ b/configure @@ -1346,23 +1346,7 @@ fi ## # kvm probe if test $kvm != no ; then - case $cpu in - i386 | x86_64) -kvm_arch=x86 -;; - ppc) -kvm_arch=powerpc -;; - *) -kvm_arch=$cpu -;; - esac - - kvm_cflags=-I$source_path/kvm/include - kvm_cflags=$kvm_cflags -I$source_path/kvm/include/$kvm_arch - kvm_cflags=$kvm_cflags -idirafter $source_path/compat - - cat $TMPC EOF +cat $TMPC EOF #include linux/kvm.h #if !defined(KVM_API_VERSION) || KVM_API_VERSION 12 || KVM_API_VERSION 12 #error Invalid KVM version @@ -1378,6 +1362,32 @@ if test $kvm != no ; then #endif int main(void) { return 0; } EOF + if test $kerneldir != ; then + kvm_cflags=-I$kerneldir/include + if test \( $cpu = i386 -o $cpu = x86_64 \) \ + -a -d $kerneldir/arch/x86/include ; then +kvm_cflags=$kvm_cflags -I$kerneldir/arch/x86/include + elif test $cpu = ppc -a -d $kerneldir/arch/powerpc/include ; then + kvm_cflags=$kvm_cflags -I$kerneldir/arch/powerpc/include +elif test -d $kerneldir/arch/$cpu/include ; then +kvm_cflags=$kvm_cflags -I$kerneldir/arch/$cpu/include + fi + else + case $cpu in + i386 | x86_64) +kvm_arch=x86 +;; + ppc) +kvm_arch=powerpc +;; + *) +kvm_arch=$cpu +;; + esac + kvm_cflags=-I$source_path/kvm/include + kvm_cflags=$kvm_cflags -I$source_path/kvm/include/$kvm_arch + fi + kvm_cflags=$kvm_cflags -idirafter $source_path/compat if compile_prog $kvm_cflags ; then kvm=yes else signature.asc Description: OpenPGP digital signature
Re: Problem booting guest with Linux 2.6.3x
Hi, Michael. On Wednesday, 07 October 2009 15:12:26 +0400, Michael Tokarev wrote: root (hd0,1) Filesystem type is ext2fs, partition type 0x83 kernel /boot/vmlinuz-2.6.31.2-dgb root=/dev/hda2 ro quiet console=tty0 console =ttyS0,38400n8 [Linux-bzImage, setup=0x3600, size=0x203480] initrd /boot/initrd.img-2.6.31.2-dgb [Linux-initrd @ 0x1f983000, 0x65c455 bytes] Loading, please wait... WARNING bootdevice may be renamed. Try root=/dev/sda2 I think if you boot without the quiet option you'll see that your guest IDE disk did in fact get installed as /dev/sda and following the advice of the error message above will allow you to boot the guest. I'm using the option quiet with both stock kernel and the kernel compiled by myself. It's irrelevant. By using quiet you're hiding the details, that's what it is about -- what's what Alex is saying. Yes. Now that I read this paragraph again, I understood what was the idea of the comment of Alex. I must have been something sleepy when I replied to him :-) Booting without the quiet option indeed it can be observed that the disks are detected like sdX: [2.874722] scsi0 : ata_piix [2.879423] scsi1 : ata_piix [2.882361] ata1: PATA max MWDMA2 cmd 0x1f0 ctl 0x3f6 bmdma 0xc000 irq 14 [2.887637] ata2: PATA max MWDMA2 cmd 0x170 ctl 0x376 bmdma 0xc008 irq 15 [3.049008] ata1.00: ATA-7: QEMU HARDDISK, 0.10.50, max UDMA/100 [3.053725] ata1.00: 10485760 sectors, multi 16: LBA48 [3.057987] ata1.01: ATA-7: QEMU HARDDISK, 0.10.50, max UDMA/100 [3.062926] ata1.01: 20971520 sectors, multi 16: LBA48 [3.068436] ata2.00: ATAPI: QEMU DVD-ROM, 0.10.50, max UDMA/100 [3.073701] ata2.00: configured for MWDMA2 [3.077614] ata1.00: configured for MWDMA2 [3.081563] ata1.01: configured for MWDMA2 [3.085219] scsi 0:0:0:0: Direct-Access ATA QEMU HARDDISK0.10 PQ: 0 ANSI: 5 [3.092264] scsi 0:0:1:0: Direct-Access ATA QEMU HARDDISK0.10 PQ: 0 ANSI: 5 [3.099339] scsi 1:0:0:0: CD-ROMQEMU QEMU DVD-ROM 0.10 PQ: 0 ANSI: 5 [3.119143] Uniform Multi-Platform E-IDE driver [3.163028] sd 0:0:0:0: [sda] 10485760 512-byte logical blocks: (5.36 GB/5.00 GiB) [3.169382] sd 0:0:1:0: [sdb] 20971520 512-byte logical blocks: (10.7 GB/10.0 GiB) [3.175584] sd 0:0:1:0: [sdb] Write Protect is off [3.179590] sd 0:0:1:0: [sdb] Write cache: disabled, read cache: enabled, doesn't support DPO or FUA [3.187178] sd 0:0:0:0: [sda] Write Protect is off [3.191050] sd 0:0:0:0: [sda] Write cache: disabled, read cache: enabled, doesn't support DPO or FUA [3.198647] sda: [3.200566] sdb: sda1 sda2 [3.204719] sdb1 [3.207129] sd 0:0:1:0: [sdb] Attached SCSI disk [3.211340] sd 0:0:0:0: [sda] Attached SCSI disk [3.304284] sr0: scsi3-mmc drive: 4x/4x xa/form2 tray [3.308449] Uniform CD-ROM driver Revision: 3.20 [3.363890] sd 0:0:0:0: Attached scsi generic sg0 type 0 [3.368353] sd 0:0:1:0: Attached scsi generic sg1 type 0 [3.372536] sr 1:0:0:0: Attached scsi generic sg2 type 5 You could boot using the uuid of the partition or label the filesystem to avoid device naming issues between your original lenny kernel and the newer kernel. I was trying changing the not swap devices to the uuid form. Although in this case the swap device was not detected, the guest boots without majors problems. I think that being used the QEMU_HARDDISK names provided by udevinfo would have been solved this problem. But according to it seems, I could verify that the disks that are passed with -hdX in KVM-88 are mapped in 2.6.31.2 guests like SATA/SCSI devices. With Linux stock 2.6.26 these are mapped like IDE disks. Can it be due to some change in the kernel code related with KVM? It has nothing to do with kvm. It's different kernel options, all kernels since very early 2.6.x are able to see ide disks as hdX or sdX, depending on the kernel options and modules loaded. There are 2 drivers for each IDE controller - IDE/ATA one, which creates hdX, and PATA one which creates sdX. According to I was investigating, I have the impression that the newest kernels delegate this disks denomination to the use of libata. It would be that in 2.6.26 Debian stock kernel not yet was productive to be in experimental phase? Thanks for your reply. Regards, Daniel -- Fingerprint: BFB3 08D6 B4D1 31B2 72B9 29CE 6696 BF1B 14E6 1D37 Powered by Debian GNU/Linux Squeeze - Linux user #188.598 signature.asc Description: Digital signature
Re: Problem booting guest with Linux 2.6.3x
Daniel Bareiro wrote: [] But according to it seems, I could verify that the disks that are passed with -hdX in KVM-88 are mapped in 2.6.31.2 guests like SATA/SCSI devices. With Linux stock 2.6.26 these are mapped like IDE disks. Can it be due to some change in the kernel code related with KVM? It has nothing to do with kvm. It's different kernel options, all kernels since very early 2.6.x are able to see ide disks as hdX or sdX, depending on the kernel options and modules loaded. There are 2 drivers for each IDE controller - IDE/ATA one, which creates hdX, and PATA one which creates sdX. According to I was investigating, I have the impression that the newest kernels delegate this disks denomination to the use of libata. It would be that in 2.6.26 Debian stock kernel not yet was productive to be in experimental phase? Debian stock kernel config does not enable ata devices, only ide ones. /mjt -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kvm or qemu-kvm?
On Oct 01, 2009 at 1902 +0200, Avi Kivity appeared and said: On 10/01/2009 06:51 PM, Ross Boylan wrote: My distro (Debian) is only at 85, even in unstable. Since it wasn't current, and also the dependencies will have wide effects on my system (which I'm trying to keep at the stable release Lenny), I figured getting the current source and building it myself would be the best move. For other reasons I'm already running a 2.6.30 kernel from Debian, which includes kernel side kvm. So I figure I only need to mess with user space. Right, stick with your kernel's kvm.ko, qemu-kvm-0.11.0 should make a good fit. Just to be sure: If I use Debian Lenny with a kernel from kernel.org, then I can use the qemu-kvm packages and be fine. Right? Best, René. -- )\._.,--,'``. fL Let GNU/Linux work for you while you take a nap. /, _.. \ _\ (`._ ,. R. Pfeiffer lynx at luchs.at + http://web.luchs.at/ `._.-(,_..'--(,_..'`-.;.' - System administration + Consulting + Teaching - Got mail delivery problems? http://web.luchs.at/information/blockedmail.php pgpwtdyedBtjK.pgp Description: PGP signature
Re: kvm or qemu-kvm?
René Pfeiffer wrote: On Oct 01, 2009 at 1902 +0200, Avi Kivity appeared and said: [] Right, stick with your kernel's kvm.ko, qemu-kvm-0.11.0 should make a good fit. Just to be sure: If I use Debian Lenny with a kernel from kernel.org, then I can use the qemu-kvm packages and be fine. Right? If you use kernel from kernel.org and kvm from linux-kvm.org, there's no difference (as far as kvm is concerned) what distribution it is. qemu-kvm-0.11.0 works fine with stock kernels = 2.6.28. Speaking of debian, take a look at http://www.corpit.ru/debian/tls/kvm/ -- ready-to-install debian packages including 0.11.0. (don't add this to sources.list) /mjt -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] tune hrtimer_interrupt hang logic
The hrtimer_interrupt hang logic adjusts min_delta_ns based on the execution time of the hrtimer callbacks. This is error-prone for virtual machines, where a guest vcpu can be scheduled out during the execution of the callbacks (and the callbacks themselves can do operations that translate to blocking operations in the hypervisor), which in can lead to large min_delta_ns rendering the system unusable. Change the logic to simply schedule the next interrupt using the 1/4 ratio, while keeping min_delta_ns intact. Reported-by: Michael Tokarev m...@tls.msk.ru Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index ff037f0..88a8ca5 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -245,6 +245,8 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) #ifdef CONFIG_HIGH_RES_TIMERS struct clock_event_device; +extern int hrtimer_interrupt_hang; + extern void clock_was_set(void); extern void hres_timers_resume(void); extern void hrtimer_interrupt(struct clock_event_device *dev); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 6d70204..6b81888 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1219,29 +1219,17 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) #ifdef CONFIG_HIGH_RES_TIMERS -static int force_clock_reprogram; - /* * After 5 iteration's attempts, we consider that hrtimer_interrupt() * is hanging, which could happen with something that slows the interrupt - * such as the tracing. Then we force the clock reprogramming for each future - * hrtimer interrupts to avoid infinite loops and use the min_delta_ns - * threshold that we will overwrite. + * such as the tracing. * The next tick event will be scheduled to 3 times we currently spend on * hrtimer_interrupt(). This gives a good compromise, the cpus will spend * 1/4 of their time to process the hrtimer interrupts. This is enough to * let it running without serious starvation. */ +int __read_mostly hrtimer_interrupt_hang; -static inline void -hrtimer_interrupt_hanging(struct clock_event_device *dev, - ktime_t try_time) -{ - force_clock_reprogram = 1; - dev-min_delta_ns = (unsigned long)try_time.tv64 * 3; - printk(KERN_WARNING hrtimer: interrupt too slow, - forcing clock min delta to %lu ns\n, dev-min_delta_ns); -} /* * High resolution timer interrupt * Called with interrupts disabled @@ -1260,8 +1248,27 @@ void hrtimer_interrupt(struct clock_event_device *dev) retry: /* 5 retries is enough to notice a hang */ - if (!(++nr_retries % 5)) - hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now)); + if (!(++nr_retries % 5)) { + int tries = 0; + ktime_t try_time = ktime_sub(ktime_get(), now); + + if (ktime_to_ns(try_time) dev-min_delta_ns) + try_time = ns_to_ktime(dev-min_delta_ns); + + do { + tries++; + for (i = 0; i 3*tries; i++) + expires_next = ktime_add(expires_next,try_time); + + printk_once(KERN_WARNING hrtimer: interrupt too slow, + scheduling tick %lld ns ahead\n, + ktime_to_ns(ktime_sub(expires_next, + ktime_get(; + } while (tick_program_event(expires_next, 1)); + + hrtimer_interrupt_hang++; + return; + } now = ktime_get(); @@ -1327,7 +1334,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) /* Reprogramming necessary ? */ if (expires_next.tv64 != KTIME_MAX) { - if (tick_program_event(expires_next, force_clock_reprogram)) + if (tick_program_event(expires_next, 0)) goto retry; } } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0d949c5..7223853 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1032,6 +1032,17 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif +#ifdef CONFIG_HIGH_RES_TIMERS + { + .ctl_name = CTL_UNNUMBERED, + .procname = hrtimer_interrupt_hang, + .data = hrtimer_interrupt_hang, + .maxlen = sizeof (int), + .mode = 0444, + .proc_handler = proc_dointvec, + + }, +#endif /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kvm guest: hrtimer: interrupt too slow
On Sat, Oct 10, 2009 at 02:34:22AM +0400, Michael Tokarev wrote: Frederic Weisbecker wrote: On Sat, Oct 10, 2009 at 01:22:16AM +0400, Michael Tokarev wrote: Marcelo Tosatti wrote: [snip] Would be useful to collect sar (sar -B -b -u) output every one second in both host/guest. You already mentioned load was low, but this should give more details. Here we go: http://www.corpit.ru/mjt/hrtimer-interrupt-too-slow/ Two incindents - cases when hrtimer: interrupt is too slow were reported in the guest (with Marcelo's patch so that min_delta is increased to 50% only), happened at 11:44:48 and 21:46:56 (as shown in guest-dmesg file). For both, there's `sar -BWbd' output for a 2-minute interval (starting one minute before the delay and ending one minute after) from both host and guest. Was there swapping going on? Not as far as I can see, and sar output agrees. But I can read this from you guest traces: pgpgin/s pgpgout/s fault/s majflt/s pgfree/s pgscank/s pgscand/s pgsteal/s%vmeff 11:44:45 0.00 32.32174.75 0.00176.77 0.00 0.00 0.00 0.00 11:44:46 0.00 16.00789.00 0.00323.00 0.00 0.00 0.00 0.00 11:44:47 0.00 32.32907.07 0.00277.78 0.00 0.00 0.00 0.00 11:44:4827.59 22.99 44.83 0.00150.57 0.00 0.00 0.00 0.00 11:44:49 0.00 33.68 22.11 0.00218.95 0.00 0.00 0.00 0.00 11:44:50 0.00101.01 17.17 0.00151.52 0.00 0.00 0.00 0.00 11:44:51 0.00 15.69 16.67 0.00126.47 0.00 0.00 0.00 0.00 [...] 21:46:52 0.00 40.00 17.00 0.00 82.00 0.00 0.00 0.00 0.00 21:46:53 0.00 31.68 18.81 0.00 94.06 0.00 0.00 0.00 0.00 21:46:54 0.00 31.68 16.83 0.00 90.10 0.00 0.00 0.00 0.00 21:46:55 0.00108.00 17.00 0.00 89.00 0.00 0.00 0.00 0.00 21:46:56 9.76482.93 3890.24 0.00439.02 0.00 0.00 0.00 0.00 21:46:57 0.00760.00 8627.00 0.00 1133.00 0.00 0.00 0.00 0.00 21:46:58 0.00 84.85 2612.12 0.00138.38 0.00 0.00 0.00 0.00 21:46:59 0.00 16.00 17.00 0.00 90.00 0.00 0.00 0.00 0.00 21:47:00 0.00 36.36 17.17 0.00 90.91 0.00 0.00 0.00 0.00 So it looks like there was some swapping in when the hrtimer (spuriously) hanged. It's not swapping. Swapping is in a separate table, with columns titled pswpin/s and pswpout/s -- first table. On my home machine with no swap at all, 4gig memory and 2gig free, pgpgin and pgpgout are increasing too. Also, while in the second case (21:46:56) there's actually some noticeable activity (page faults at least), in first case that activity is modest. Yes. It still could be scheduling though (but you mention the host is idle, you forgot cpu stats in sar). It might be a QEMU bug, can you share your qemu-kvm command line? There's always (guest) disk i/o when the hang happens. Also please give the latest hrtimer_interrupt patch a try. Thanks! -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kernel bug in kvm_intel
On 10/09/2009 10:04 PM, Andrew Theurer wrote: This is on latest master branch on kvm.git and qemu-kvm.git, running 12 Windows Server2008 VMs, and using oprofile. I ran again without oprofile and did not get the BUG. I am wondering if anyone else is seeing this. Thanks, -Andrew Oct 9 11:55:13 virtvictory-eth0 kernel: BUG: unable to handle kernel paging request at 9fe9a2b4 Oct 9 11:55:13 virtvictory-eth0 kernel: IP: [a02e1af1] vmx_vcpu_run+0x26d/0x64f [kvm_intel] Can you run this through objdump or gdb to see what source this corresponds to? -- Do not meddle in the internals of kernels, for they are subtle and quick to panic. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 02/27] Pass PVR in sregs
Am 09.10.2009 um 23:00 schrieb Hollis Blanchard holl...@us.ibm.com: On Tue, 2009-09-29 at 10:17 +0200, Alexander Graf wrote: Right now sregs is unused on PPC, so we can use it for initialization of the CPU. KVM on BookE always virtualizes the host CPU. On Book3s we go a step further and take the PVR from userspace that tells us what kind of CPU we are supposed to virtualize, because we support Book3s_32 and Book3s_64 guests. In order to get that information, we use the sregs ioctl, because we don't want to reset the guest CPU on every normal register set. Signed-off-by: Alexander Graf ag...@suse.de --- arch/powerpc/include/asm/kvm.h |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/ asm/kvm.h index bb2de6a..b82bd68 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -46,6 +46,8 @@ struct kvm_regs { }; struct kvm_sregs { +__u64 pvr; +char pad[1016]; }; struct kvm_fpu { Architecturally, PVR is 32 bits, even for PPC64. Is there a reason you want it to be 64 bits here? (I can understand just picking 64 for registers that could be either size, but that's not this case.) No obvious reason. It fills a registerwhich can be up to u64, but if it's limited to u32 we can keep it 32 bits. Alex -- Hollis Blanchard IBM Linux Technology Center -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 25/27] Fix trace.h
On Fri, 2009-10-09 at 14:42 -0700, Hollis Blanchard wrote: After much digging, I managed to actually enable CONFIG_TRACEPOINTS. However, I still don't get any build errors from this code. Maybe you could paste the full gcc output? Another option is to be a bit more ppc-ish and call it IAR instead of PC :-) Cheers, Ben. -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 16/27] Add desktop PowerPC specific emulation
On Fri, 2009-10-09 at 13:57 -0700, Hollis Blanchard wrote: +#ifdef CONFIG_PPC64 +#ifdef DEBUG_EMUL + printk(KERN_INFO mtDEC: %x\n, vcpu-arch.dec); +#endif + /* POWER4+ triggers a dec interrupt if the value is 0 */ + if (vcpu-arch.dec 0x8000) { + del_timer(vcpu-arch.dec_timer); + kvmppc_core_queue_dec(vcpu); + } + else if (true) { +#else if (vcpu-arch.tcr TCR_DIE) { +#endif /* The decrementer ticks at the same rate as the timebase, so * that's how we convert the guest DEC value to the number of * host ticks. */ Ifdefs through the middle of control syntax makes my head hurt. :) Note that your original BookE DEC emulation looks fishy :-) I may have missed something in your code... but I don't think it emulates the expected HW behaviour: Basically, when the BookE DEC hits the 1 - 0 transition it latches an event in TSE:DIS always, whether TCR:DIE is set or not. If DIE is not set, the interrupt is sent. It will then stop counting if auto-reload isn't enabled. That means that if TSR:DIS is set from a previous event while TCR:DIE is clear, then setting TCR:DIE (with MSR:EE enabled of course) will trigger a DEC interrupt on BookE. The BookE DEC interrupt is basically a level sensitive thing sourced from to (TSR:DIS TCR:DIE), and TSR:DIS remains set until explicitely cleared. Cheers, Ben. -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html