[PATCH] KVM: PCIPT: VT-d support
From: Kay, Allen M [EMAIL PROTECTED] This patch includes the functions to support VT-d for passthrough devices. [Ben: fixed memory pinning] Signed-off-by: Kay, Allen M [EMAIL PROTECTED] Signed-off-by: Weidong Han [EMAIL PROTECTED] Signed-off-by: Ben-Ami Yassour [EMAIL PROTECTED] --- arch/x86/kvm/Makefile |2 +- arch/x86/kvm/vtd.c | 176 arch/x86/kvm/x86.c | 10 +++ include/asm-x86/kvm_host.h |1 + include/linux/kvm_host.h |6 ++ virt/kvm/kvm_main.c|6 ++ 6 files changed, 200 insertions(+), 1 deletions(-) create mode 100644 arch/x86/kvm/vtd.c diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index d0e940b..5d9d079 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -11,7 +11,7 @@ endif EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ - i8254.o + i8254.o vtd.o obj-$(CONFIG_KVM) += kvm.o kvm-intel-objs = vmx.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c new file mode 100644 index 000..83efb8a --- /dev/null +++ b/arch/x86/kvm/vtd.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) 2006-2008 Intel Corporation + * Author: Allen M. Kay [EMAIL PROTECTED] + * Author: Weidong Han [EMAIL PROTECTED] + */ + +#include linux/list.h +#include linux/kvm_host.h +#include linux/pci.h +#include linux/dmar.h +#include linux/intel-iommu.h + +static int kvm_iommu_unmap_memslots(struct kvm *kvm); + +int kvm_iommu_map_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages) +{ + gfn_t gfn = base_gfn; + pfn_t pfn; + int i, rc; + struct dmar_domain *domain = kvm-arch.intel_iommu_domain; + + if (!domain) + return -EFAULT; + + for (i = 0; i npages; i++) { + pfn = gfn_to_pfn(kvm, gfn); + rc = intel_iommu_page_mapping(domain, + gfn PAGE_SHIFT, + pfn PAGE_SHIFT, + PAGE_SIZE, + DMA_PTE_READ | + DMA_PTE_WRITE); + if (rc) + kvm_release_pfn_clean(pfn); + + gfn++; + } + return 0; +} + +static int kvm_iommu_map_memslots(struct kvm *kvm) +{ + int i, rc; + for (i = 0; i kvm-nmemslots; i++) { + rc = kvm_iommu_map_pages(kvm, kvm-memslots[i].base_gfn, +kvm-memslots[i].npages); + if (rc) + return rc; + } + return 0; +} + +int kvm_iommu_map_guest(struct kvm *kvm, + struct kvm_pci_passthrough_dev *pci_pt_dev) +{ + struct pci_dev *pdev = NULL; + + printk(KERN_DEBUG VT-d direct map: host bdf = %x:%x:%x\n, + pci_pt_dev-host.busnr, + PCI_SLOT(pci_pt_dev-host.devfn), + PCI_FUNC(pci_pt_dev-host.devfn)); + + for_each_pci_dev(pdev) { + if ((pdev-bus-number == pci_pt_dev-host.busnr) + (pdev-devfn == pci_pt_dev-host.devfn)) { + break; + } + } + + if (pdev == NULL) { + if (kvm-arch.intel_iommu_domain) { + intel_iommu_domain_exit(kvm-arch.intel_iommu_domain); + kvm-arch.intel_iommu_domain = NULL; + } + return -ENODEV; + } + + kvm-arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev); + + if (kvm_iommu_map_memslots(kvm)) { + kvm_iommu_unmap_memslots(kvm); + return -EFAULT; + } + + intel_iommu_detach_dev(kvm-arch.intel_iommu_domain, + pdev-bus-number, pdev-devfn); + + if (intel_iommu_context_mapping(kvm-arch.intel_iommu_domain, + pdev)) { + printk(KERN_ERR Domain context map for %s failed, + pci_name(pdev)); + return -EFAULT; + } + return 0; +} + +static int
[PATCH] VT-d : changes to support KVM
From: Kay, Allen M [EMAIL PROTECTED] This patch extends the VT-d driver to support KVM [Ben: fixed memory pinning] Signed-off-by: Kay, Allen M [EMAIL PROTECTED] Signed-off-by: Weidong Han [EMAIL PROTECTED] Signed-off-by: Ben-Ami Yassour [EMAIL PROTECTED] --- drivers/pci/dmar.c |4 +- drivers/pci/intel-iommu.c| 117 +- drivers/pci/iova.c |2 +- {drivers/pci = include/linux}/intel-iommu.h | 11 +++ {drivers/pci = include/linux}/iova.h|0 5 files changed, 127 insertions(+), 7 deletions(-) rename {drivers/pci = include/linux}/intel-iommu.h (94%) rename {drivers/pci = include/linux}/iova.h (100%) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index f941f60..a58a5b0 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -26,8 +26,8 @@ #include linux/pci.h #include linux/dmar.h -#include iova.h -#include intel-iommu.h +#include linux/iova.h +#include linux/intel-iommu.h #undef PREFIX #define PREFIX DMAR: diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 66c0fd2..6ad2c75 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -20,6 +20,7 @@ * Author: Anil S Keshavamurthy [EMAIL PROTECTED] */ +#undef DEBUG #include linux/init.h #include linux/bitmap.h #include linux/debugfs.h @@ -33,8 +34,8 @@ #include linux/dma-mapping.h #include linux/mempool.h #include linux/timer.h -#include iova.h -#include intel-iommu.h +#include linux/iova.h +#include linux/intel-iommu.h #include asm/proto.h /* force_iommu in this header in x86-64*/ #include asm/cacheflush.h #include asm/gart.h @@ -160,7 +161,7 @@ static inline void *alloc_domain_mem(void) return iommu_kmem_cache_alloc(iommu_domain_cache); } -static inline void free_domain_mem(void *vaddr) +static void free_domain_mem(void *vaddr) { kmem_cache_free(iommu_domain_cache, vaddr); } @@ -1414,7 +1415,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain) * find_domain * Note: we use struct pci_dev-dev.archdata.iommu stores the info */ -struct dmar_domain * +static struct dmar_domain * find_domain(struct pci_dev *pdev) { struct device_domain_info *info; @@ -2408,3 +2409,111 @@ int __init intel_iommu_init(void) return 0; } +void intel_iommu_domain_exit(struct dmar_domain *domain) +{ + u64 end; + + /* Domain 0 is reserved, so dont process it */ + if (!domain) + return; + + end = DOMAIN_MAX_ADDR(domain-gaw); + end = end (~PAGE_MASK_4K); + + /* clear ptes */ + dma_pte_clear_range(domain, 0, end); + + /* free page tables */ + dma_pte_free_pagetable(domain, 0, end); + + iommu_free_domain(domain); + free_domain_mem(domain); +} +EXPORT_SYMBOL_GPL(intel_iommu_domain_exit); + +struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev) +{ + struct dmar_drhd_unit *drhd; + struct dmar_domain *domain; + struct intel_iommu *iommu; + + drhd = dmar_find_matched_drhd_unit(pdev); + if (!drhd) { + printk(KERN_ERR intel_iommu_domain_alloc: drhd == NULL\n); + return NULL; + } + + iommu = drhd-iommu; + if (!iommu) { + printk(KERN_ERR + intel_iommu_domain_alloc: iommu == NULL\n); + return NULL; + } + domain = iommu_alloc_domain(iommu); + if (!domain) { + printk(KERN_ERR + intel_iommu_domain_alloc: domain == NULL\n); + return NULL; + } + if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + printk(KERN_ERR + intel_iommu_domain_alloc: domain_init() failed\n); + intel_iommu_domain_exit(domain); + return NULL; + } + return domain; +} +EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc); + +int intel_iommu_context_mapping( + struct dmar_domain *domain, struct pci_dev *pdev) +{ + int rc; + rc = domain_context_mapping(domain, pdev); + return rc; +} +EXPORT_SYMBOL_GPL(intel_iommu_context_mapping); + +int intel_iommu_page_mapping( + struct dmar_domain *domain, dma_addr_t iova, + u64 hpa, size_t size, int prot) +{ + int rc; + rc = domain_page_mapping(domain, iova, hpa, size, prot); + return rc; +} +EXPORT_SYMBOL_GPL(intel_iommu_page_mapping); + +void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn) +{ + detach_domain_for_dev(domain, bus, devfn); +} +EXPORT_SYMBOL_GPL(intel_iommu_detach_dev); + +struct dmar_domain * +intel_iommu_find_domain(struct pci_dev *pdev) +{ + return find_domain(pdev); +} +EXPORT_SYMBOL_GPL(intel_iommu_find_domain); + +int intel_iommu_found(void) +{ + return g_num_of_iommus; +} +EXPORT_SYMBOL_GPL(intel_iommu_found); + +u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64
Re: [Qemu-devel] [RFC][PATCH] Add HPET emulation to qemu
Cool! Does it now happen that qemu no longer wakes up every 10ms? If not, please try to make sure it happens, that would eventually fix that power leak :) Samuel -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: PCIPT: VT-d support
On Wed, 2008-07-09 at 10:49 -0500, Anthony Liguori wrote: Ben-Ami Yassour wrote: From: Kay, Allen M [EMAIL PROTECTED] This patch includes the functions to support VT-d for passthrough devices. [Ben: fixed memory pinning] Signed-off-by: Kay, Allen M [EMAIL PROTECTED] Signed-off-by: Weidong Han [EMAIL PROTECTED] Signed-off-by: Ben-Ami Yassour [EMAIL PROTECTED] --- arch/x86/kvm/Makefile |2 +- arch/x86/kvm/vtd.c | 189 include/asm-x86/kvm_host.h | 17 include/asm-x86/kvm_para.h | 14 +++ include/linux/kvm_host.h |6 ++ 5 files changed, 227 insertions(+), 1 deletions(-) create mode 100644 arch/x86/kvm/vtd.c diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index d0e940b..5d9d079 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -11,7 +11,7 @@ endif EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ - i8254.o + i8254.o vtd.o obj-$(CONFIG_KVM) += kvm.o kvm-intel-objs = vmx.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c new file mode 100644 index 000..5abeef1 --- /dev/null +++ b/arch/x86/kvm/vtd.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) 2006-2008 Intel Corporation + * Author: Allen M. Kay [EMAIL PROTECTED] + * Author: Weidong Han [EMAIL PROTECTED] + */ + +#include linux/list.h +#include linux/kvm_host.h +#include linux/pci.h +#include linux/dmar.h +#include linux/intel-iommu.h + +static int kvm_iommu_unmap_memslots(struct kvm *kvm); + +int kvm_iommu_map_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages) +{ + gfn_t gfn = base_gfn; + pfn_t pfn; + struct page *page; + int i, rc; + struct dmar_domain *domain = kvm-arch.intel_iommu_domain; + + if (!domain) + return -EFAULT; + + for (i = 0; i npages; i++) { + pfn = gfn_to_pfn(kvm, gfn); + if (pfn_valid(pfn)) { As I've mentioned before, this is wrong. We should add MMIO pages to the VT-d tables but at any rate, pfn_valid() doesn't work for checking if something is MMIO. removing the check. + rc = intel_iommu_page_mapping(domain, + gfn PAGE_SHIFT, + pfn PAGE_SHIFT, + PAGE_SIZE, + DMA_PTE_READ | + DMA_PTE_WRITE); + if (rc) { + page = pfn_to_page(pfn); + put_page(page); This should be kvm_release_pfn_clean(). fixed in the new version. + } + } else { + printk(KERN_DEBUG kvm_iommu_map_page: + invalid pfn=%lx\n, pfn); + return 0; + } + gfn++; + } + return 0; +} + +static int kvm_iommu_map_memslots(struct kvm *kvm) +{ + int i, rc; + for (i = 0; i kvm-nmemslots; i++) { + rc = kvm_iommu_map_pages(kvm, kvm-memslots[i].base_gfn, +kvm-memslots[i].npages); + if (rc) + return rc; + } + return 0; +} + +int kvm_iommu_map_guest(struct kvm *kvm, + struct kvm_pci_passthrough_dev *pci_pt_dev) +{ + struct pci_dev *pdev = NULL; + + printk(KERN_DEBUG VT-d direct map: host bdf = %x:%x:%x\n, + pci_pt_dev-host.busnr, + PCI_SLOT(pci_pt_dev-host.devfn), + PCI_FUNC(pci_pt_dev-host.devfn)); + + for_each_pci_dev(pdev) { + if ((pdev-bus-number == pci_pt_dev-host.busnr) + (pdev-devfn == pci_pt_dev-host.devfn)) { + break; + } + } + + if (pdev == NULL) { + if (kvm-arch.intel_iommu_domain) { +
Re: KSM Algorithm
ציטוט Sukanto Ghosh: Can anyone answer these queries regarding KSM ? How does KSM offers its services through the /dev/ksm device ? ioctls Are every guest pages scanned in KVM while using KSM or page-scanning and sharing is triggered on some event (low memory, etc) ? no, the scanning run all the time (with number pages to scan and sleep parameters...) Is sharing done only between the pages which have been registered via KSM_REGISTER_MEMORY_REGION ? yes What are these for ? KSM_CREATE_SHARED_MEMORY_AREA and KSM_CREATE_SCAN ? KSM_CREATE_SHARED_MEMORY_AREA - to register memory area to be scanned for identical pages KSM_CREATE_SCAN - to create the scanner that scan for this pages Where are the shared pages kept ? In kernel memory ? Aren't the shared pages always pinned in RAM (due to same reasons for pinned shadowed pages) ? in the version that was sent to the list it was kernel memory (meaning shared pages are not swappable (just the pages that are shared not the pages that we scan, when they are split beacuse of copy on write it become swappable again new version that i will send soon the pages are normal anonymous/userspace memory that is swappable How much is the overhead involved due to this ? depeand on the speed you tell it to scan, but the overhead is about ~5% for common cases -- Regards, Sukanto Ghosh -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: PCIPT: VT-d support
On Thursday 10 July 2008 17:14:42 Ben-Ami Yassour wrote: From: Kay, Allen M [EMAIL PROTECTED] This patch includes the functions to support VT-d for passthrough devices. [Ben: fixed memory pinning] Signed-off-by: Kay, Allen M [EMAIL PROTECTED] Signed-off-by: Weidong Han [EMAIL PROTECTED] Signed-off-by: Ben-Ami Yassour [EMAIL PROTECTED] --- arch/x86/kvm/Makefile |2 +- arch/x86/kvm/vtd.c | 176 arch/x86/kvm/x86.c | 10 +++ include/asm-x86/kvm_host.h |1 + include/linux/kvm_host.h |6 ++ virt/kvm/kvm_main.c|6 ++ 6 files changed, 200 insertions(+), 1 deletions(-) create mode 100644 arch/x86/kvm/vtd.c diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 531d635..dc67d90 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -41,6 +41,7 @@ #include linux/pagemap.h #include linux/mman.h #include linux/swap.h +#include linux/intel-iommu.h #include asm/processor.h #include asm/io.h @@ -422,6 +423,11 @@ int __kvm_set_memory_region(struct kvm *kvm, } kvm_free_physmem_slot(old, new); + + /* map the pages in iommu page table */ + if (intel_iommu_found()) + kvm_iommu_map_pages(kvm, base_gfn, npages); + I don't understand why we need this along with kvm_iommu_map_memslots(). This works during the memory setup, and in kvm_iommu_map_guest() we do it again with the overlapped memory region? I think even if we need pin all pages, we still just need do it once... -- Thanks Yang, Sheng -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: PCIPT: VT-d support
On Thursday 10 July 2008 17:51:53 Ben-Ami Yassour wrote: On Thu, 2008-07-10 at 17:30 +0800, Yang, Sheng wrote: On Thursday 10 July 2008 17:14:42 Ben-Ami Yassour wrote: From: Kay, Allen M [EMAIL PROTECTED] This patch includes the functions to support VT-d for passthrough devices. [Ben: fixed memory pinning] Signed-off-by: Kay, Allen M [EMAIL PROTECTED] Signed-off-by: Weidong Han [EMAIL PROTECTED] Signed-off-by: Ben-Ami Yassour [EMAIL PROTECTED] --- arch/x86/kvm/Makefile |2 +- arch/x86/kvm/vtd.c | 176 arch/x86/kvm/x86.c | 10 +++ include/asm-x86/kvm_host.h |1 + include/linux/kvm_host.h |6 ++ virt/kvm/kvm_main.c|6 ++ 6 files changed, 200 insertions(+), 1 deletions(-) create mode 100644 arch/x86/kvm/vtd.c diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 531d635..dc67d90 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -41,6 +41,7 @@ #include linux/pagemap.h #include linux/mman.h #include linux/swap.h +#include linux/intel-iommu.h #include asm/processor.h #include asm/io.h @@ -422,6 +423,11 @@ int __kvm_set_memory_region(struct kvm *kvm, } kvm_free_physmem_slot(old, new); + + /* map the pages in iommu page table */ + if (intel_iommu_found()) + kvm_iommu_map_pages(kvm, base_gfn, npages); + I don't understand why we need this along with kvm_iommu_map_memslots(). This works during the memory setup, and in kvm_iommu_map_guest() we do it again with the overlapped memory region? I think even if we need pin all pages, we still just need do it once... We map the entire guest memory on initialization by going over all the existing memory slots. If a new slot is created later then we need to map it as well, this is the call that you see here. I think it's may be unnecessary to map pages when device assigned. The table can be set up along with set_memory_region(), it covered all memory slots already, or I miss something here? -- Thanks Yang, Sheng -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/5] kvmtrace: make cycle calculation architecture aware
On Wednesday 09 July 2008 23:03:19 Hollis Blanchard wrote: On Wed, 2008-07-09 at 11:17 +0200, Christian Ehrhardt wrote: So the question that is left before changing that is, if the original author had something special in mind chosing cycles here. I added Eric on CC for that. I wait with my resubmission of the patch series until all architectures agree *hope* on using getnstimeofday() - after an ack from all sides I would revise my patch series and submit that changes alltogether. I got an email bounce from Eric the last time I tried to email him, so I'm not sure he's still with Intel. However, I don't think he had any special intention; I think he was just porting xentrace to KVM. Eric had completed his internship in Intel, so... I like the term timestamp too. I think he used cycles only because there is a function called get_cycles(). But instead of getnstimeofday(), I suggest using ktime_get() here. It's little more precise than getnstimeofday(), and ktime_t is more easily to be handled. And I think the overhead it brought can be ignored too. -- Thanks Yang, Sheng -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 7/9] kvm-userspace: kvmtrace_format: add ppc instruction emulation
From: Christian Ehrhardt [EMAIL PROTECTED] This patch adds the handling of the ppc instruction emulation trace records. Due to the fact that those are more complex than the classic formats file way this patch adds a check of the event id and maps to the internal handler function if needed (other complex trace records that might appear in future can hook up there too). Additionally this fixes the ppc tlb trace record definitions in the formats file now that the revised kernel patch series is submitted. Signed-off-by: Christian Ehrhardt [EMAIL PROTECTED] --- [diffstat] formats | 16 +-- kvmtrace_format | 276 +--- 2 files changed, 268 insertions(+), 24 deletions(-) [diff] diff --git a/user/formats b/user/formats --- a/user/formats +++ b/user/formats @@ -23,13 +23,9 @@ 0x00020013 %(tsc)d (+%(reltsc)8d) LMSW vcpu = 0x%(vcpu)08x pid = 0x%(pid)08x [ value = 0x%(1)08x ] 0x00020014 %(tsc)d (+%(reltsc)8d) APIC_ACCESS vcpu = 0x%(vcpu)08x pid = 0x%(pid)08x [ offset = 0x%(1)08x ] 0x00020015 %(tsc)d (+%(reltsc)8d) TDP_FAULT vcpu = 0x%(vcpu)08x pid = 0x%(pid)08x [ errorcode = 0x%(1)08x, virt = 0x%(3)08x %(2)08x ] -# ppc: context switch -0x00020016 %(tsc)d (+%(reltsc)8d) CONT_SWITCH vcpu = 0x%(vcpu)08x pid = 0x%(pid)08x -# ppc: tlb write -0x00020017 %(tsc)d (+%(reltsc)8d) TLB_WRITEvcpu = 0x%(vcpu)08x pid = 0x%(pid)08x [ index = 0x%(1)08x, tid = 0x%(2)08x, word1=0x%(3)08x, word2=0x%(4)08x, word3=0x%(5)08x ] -# ppc: tlb invalidate -0x00020018 %(tsc)d (+%(reltsc)8d) TLB_INVAL vcpu = 0x%(vcpu)08x pid = 0x%(pid)08x [ index = 0x%(1)08x, tid = 0x%(2)08x, word1=0x%(3)08x, word2=0x%(4)08x, word3=0x%(5)08x ] -# ppc: guest TLB write -0x00020019 %(tsc)d (+%(reltsc)8d) GTLB_WRITEvcpu = 0x%(vcpu)08x pid = 0x%(pid)08x [ index = 0x%(1)08x, tid = 0x%(2)08x, word1=0x%(3)08x, word2=0x%(4)08x, word3=0x%(5)08x ] -# ppc: shadow TLB write -0x00020020 %(tsc)d (+%(reltsc)8d) STLB_WRITE vcpu = 0x%(vcpu)08x pid = 0x%(pid)08x [ index = 0x%(1)08x, tid = 0x%(2)08x, word1=0x%(3)08x, word2=0x%(4)08x, word3=0x%(5)08x ] +# ppc: tlb traces +0x00020016 GTLB_WRITEvcpu = 0x%(vcpu)08x pid = 0x%(pid)08x [ index = 0x%(1)08x, tid = 0x%(2)08x, word1=0x%(3)08x, word2=0x%(4)08x, word3=0x%(5)08x ] +0x00020017 STLB_WRITEvcpu = 0x%(vcpu)08x pid = 0x%(pid)08x [ index = 0x%(1)08x, tid = 0x%(2)08x, word1=0x%(3)08x, word2=0x%(4)08x, word3=0x%(5)08x ] +0x00020018 STLB_INVALvcpu = 0x%(vcpu)08x pid = 0x%(pid)08x [ index = 0x%(1)08x, tid = 0x%(2)08x, word1=0x%(3)08x, word2=0x%(4)08x, word3=0x%(5)08x ] +# ppc: instruction emulation - this type is handled more complex in kvmtrace_format, but listed to show the eventid and transported data +#0x00020019 %(tsc)d (+%(reltsc)8d) PPC_INSTR vcpu = 0x%(vcpu)08x pid = 0x%(pid)08x [ instr = 0x%(1)08x, pc = 0x%(2)08x, emul = 0x%(3)08x, nsec = %(4)08d ] diff --git a/user/kvmtrace_format b/user/kvmtrace_format --- a/user/kvmtrace_format +++ b/user/kvmtrace_format @@ -58,6 +58,250 @@ def sighand(x,y): global interrupted interrupted = 1 + +# ppc instruction decoding for event type 0x00020019 (PPC_INSTR) +def get_op(instr): +return (instr 26); + +def get_xop(instr): +return (instr 1) 0x3ff; + +def get_sprn(instr): + return ((instr 16) 0x1f) | ((instr 6) 0x3e0) + +def get_dcrn(instr): + return ((instr 16) 0x1f) | ((instr 6) 0x3e0); + +def get_tlbwe_type(instr): + ws = (instr 11) 0x1f; + if ws == 0: + return PAGEID + elif ws == 1: + return XLAT + elif ws == 2: + return ATTRIB + else: + return UNKNOWN + +def get_name(instr): + if get_op(instr)==3: + return trap + elif get_op(instr)==19: + if get_xop(instr) == 50: + return rfi + else: + return unknown + elif get_op(instr)==31: + if get_xop(instr) == 83: + return mfmsr + + elif get_xop(instr) == 87: + return lbzx + + elif get_xop(instr) == 131: + return wrtee + + elif get_xop(instr) == 146: + return mtmsr + + elif get_xop(instr) == 163: + return wrteei + + elif get_xop(instr) == 215: + return stbx + + elif get_xop(instr) == 247: + return stbux + + elif get_xop(instr) == 279: + return lhzx + + elif get_xop(instr) == 311: + return lhzux + + elif get_xop(instr) == 323: + return mfdcr + + elif get_xop(instr) == 339: + return mfspr + + elif get_xop(instr) == 407: + return
[PATCH 0/5] kvmtrace: powerpc support and timestamps for KVM_TRACE
From: Christian Ehrhardt [EMAIL PROTECTED] Chaning kvmtrace code to use timestamp and add powerpc support. This is the revised series including the feedback from the earlier submission this week. I now also added the userspace changes which completes the series. The series contains nine patches (4 generic / 5 powerpc code): Subject: [PATCH 1/9] kvmtrace: Remove use of bit fields in kvm trace structure v3 Introducing a endian save way to store the flags event,tcs, num_data Subject: [PATCH 2/9] kvmtrace: replace get_cycles with getnstimeofday use a cross host comparable timestamp source and make it 32/64 bit save Subject: [PATCH 3/9] kvmtrace: rename cycles to timestamp it is no more tracking cycles, so rename variables and change comments Subject: [PATCH 3/9] kvmppc: kvmtrace: enable KVM_TRACE building for powerpc enable KVM_TRACE for powerpc in Kconfig/Makefile Subject: [PATCH 4/9] kvmppc: kvmtrace: adds trace points for ppc tlb activity v2 use KVM_TRACE to track tlb/shadow tlb activity on embedded powerpc Subject: [PATCH 5/9] kvmppc: kvmtrace: trace powerpc instruction emulation use KVM_TRACE to track instruction emulation overhead Subject: [PATCH 7/9] kvm-userspace: kvmtrace_format: add ppc instruction emulation extension to kvmtrace_format to handle complex event types e.g. ppc instructon emulation Subject: [PATCH 8/9] kvm-userspace: kvmtrace_format: add statistic section adding statistic creation/prsentation to the kvmtrace data processing Subject: [PATCH 9/9] kvm-userspace: kvmtrace: rename cycles to timestamp it is no more tracking cycles, so rename variables and change comments Signed-off-by: Christian Ehrhardt [EMAIL PROTECTED] --- [diffstat] arch/powerpc/kvm/emulate.c |2 b/arch/powerpc/kvm/44x_tlb.c | 15 ++ b/arch/powerpc/kvm/Kconfig | 11 + b/arch/powerpc/kvm/Makefile |6 b/arch/powerpc/kvm/emulate.c |4 b/include/linux/kvm.h| 17 ++ b/user/formats | 16 -- b/user/kvmtrace_format | 276 --- b/virt/kvm/kvm_trace.c | 19 +- include/linux/kvm.h | 10 + user/formats | 50 +++ user/kvmtrace_format | 114 + virt/kvm/kvm_trace.c | 29 ++-- 13 files changed, 461 insertions(+), 108 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/9] kvmtrace: Remove use of bit fields in kvm trace structure v3
From: Christian Ehrhardt [EMAIL PROTECTED] From: Jerone Young [EMAIL PROTECTED] This patch fixes kvmtrace use on big endian systems. When using bit fields the compiler will lay data out in the wrong order expected when laid down into a file. This fixes it by using one variable instead of using bit fields. Updates in v3: - fixed macro definition bug in v2 - ensured in macro operator order - fixed whitespace/indent issues - removed superfluous initialization Signed-off-by: Jerone Young [EMAIL PROTECTED] Signed-off-by: Christian Ehrhardt [EMAIL PROTECTED] --- [diffstat] include/linux/kvm.h | 17 ++--- virt/kvm/kvm_trace.c | 19 ++- 2 files changed, 24 insertions(+), 12 deletions(-) [diff] diff --git a/include/linux/kvm.h b/include/linux/kvm.h --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -311,9 +311,13 @@ /* This structure represents a single trace buffer record. */ struct kvm_trace_rec { - __u32 event:28; - __u32 extra_u32:3; - __u32 cycle_in:1; + /* variable rec_val +* is split into: +* bits 0 - 27 - event id +* bits 28 -30 - number of extra data args of size u32 +* bits 31 - binary indicator for if tsc is in record +*/ + __u32 rec_val; __u32 pid; __u32 vcpu_id; union { @@ -326,6 +330,13 @@ } nocycle; } u; } __attribute__((packed)); + +#define TRACE_REC_EVENT_ID(val) \ + (0x0fff (val)) +#define TRACE_REC_NUM_DATA_ARGS(val) \ + (0x7000 ((val) 28)) +#define TRACE_REC_TCS(val) \ + (0x8000 ((val) 31)) #define KVMIO 0xAE diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c --- a/virt/kvm/kvm_trace.c +++ b/virt/kvm/kvm_trace.c @@ -54,12 +54,13 @@ struct kvm_trace *kt = kvm_trace; struct kvm_trace_rec rec; struct kvm_vcpu *vcpu; - inti, extra, size; + inti, size; + u32extra; if (unlikely(kt-trace_state != KVM_TRACE_STATE_RUNNING)) return; - rec.event = va_arg(*args, u32); + rec.rec_val = TRACE_REC_EVENT_ID(va_arg(*args, u32)); vcpu= va_arg(*args, struct kvm_vcpu *); rec.pid = current-tgid; rec.vcpu_id = vcpu-vcpu_id; @@ -67,21 +68,21 @@ extra = va_arg(*args, u32); WARN_ON(!(extra = KVM_TRC_EXTRA_MAX)); extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); - rec.extra_u32 = extra; - rec.cycle_in= p-cycle_in; - - if (rec.cycle_in) { + rec.rec_val |= TRACE_REC_TCS(p-cycle_in) + | TRACE_REC_NUM_DATA_ARGS(extra); + + if (p-cycle_in) { rec.u.cycle.cycle_u64 = get_cycles(); - for (i = 0; i rec.extra_u32; i++) + for (i = 0; i extra; i++) rec.u.cycle.extra_u32[i] = va_arg(*args, u32); } else { - for (i = 0; i rec.extra_u32; i++) + for (i = 0; i extra; i++) rec.u.nocycle.extra_u32[i] = va_arg(*args, u32); } - size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32)); + size = calc_rec_size(p-cycle_in, extra * sizeof(u32)); relay_write(kt-rchan, rec, size); } -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 8/9] kvm-userspace: kvmtrace_format: add statistic section
From: Christian Ehrhardt [EMAIL PROTECTED] Usually people don't want to read thousands of trace log lines to interpret the data, a condensed statistic about the traced events is usually better to read. This patch adds a new command line switch -s that tells kvmtrace_format to generate statistics while processing the trace records. Those statistics are then printed at the end of the output. This patch contains a statistic function for the ppc instruction emulation. An example output might look like that: mnemonic + count ---+--- wrteei | 1260 mfmsr |977 mtspr |895 wrtee |742 mfspr |534 rfi |179 mtmsr | 90 lbz | 53 stb | 28 sum = 4758 [...] more detailed statistics about spr, dcr and tlb usage Signed-off-by: Christian Ehrhardt [EMAIL PROTECTED] --- [diffstat] kvmtrace_format | 73 +++- 1 file changed, 67 insertions(+), 6 deletions(-) [diff] diff --git a/user/kvmtrace_format b/user/kvmtrace_format --- a/user/kvmtrace_format +++ b/user/kvmtrace_format @@ -4,7 +4,7 @@ # Program for reformatting trace buffer output according to user-supplied rules -import re, sys, string, signal, struct, os, getopt +import re, sys, string, signal, struct, os, getopt, operator def usage(): print sys.stderr, \ @@ -29,6 +29,12 @@ this script may not be able to keep up with the output of kvmtrace if it is piped directly. In these circumstances you should have kvmtrace output to a file for processing off-line. + + kvmtrace_format has the following additional switches + -c mhz - specifies the mhz of the traced machine used to convert + cycle data in trace records into time + -s - if this switch is set additional trace statistics are + created and printed at the end of the output sys.exit(1) @@ -60,6 +66,33 @@ interrupted = 1 # ppc instruction decoding for event type 0x00020019 (PPC_INSTR) +# some globals for statistic summaries +stat_ppc_instr_mnemonic = {}; +stat_ppc_instr_spr = {}; +stat_ppc_instr_dcr = {}; +stat_ppc_instr_tlb = {}; + +def ppc_instr_print_summary(sortedlist, colname): + print \n\n%14s + %10s % (colname, count) + print %s % (15*-+++11*-) + sum = 0 + for value, key in sortedlist: + sum += key + print %14s | %10d % (value, key) + print %14s = %10d % (sum, sum) + + +def ppc_instr_summary(): + # don't print empty statistics +if stat_ppc_instr_mnemonic: + ppc_instr_print_summary(sorted(stat_ppc_instr_mnemonic.iteritems(), key=operator.itemgetter(1), reverse=True), mnemonic) +if stat_ppc_instr_spr: + ppc_instr_print_summary(sorted(stat_ppc_instr_spr.iteritems(), key=operator.itemgetter(1), reverse=True), mnemonic-spr) +if stat_ppc_instr_dcr: + ppc_instr_print_summary(sorted(stat_ppc_instr_dcr.iteritems(), key=operator.itemgetter(1), reverse=True), mnemonic-dcr) +if stat_ppc_instr_tlb: + ppc_instr_print_summary(sorted(stat_ppc_instr_tlb.iteritems(), key=operator.itemgetter(1), reverse=True), mnemonic-tlb) + def get_op(instr): return (instr 26); @@ -293,28 +326,53 @@ return UNKNOWN def get_special(instr): + name = get_name(instr); + if stat_ppc_instr_mnemonic.has_key(name): + stat_ppc_instr_mnemonic[name] += 1 + else: + stat_ppc_instr_mnemonic[name] = 1 + if get_op(instr) == 31: if (get_xop(instr) == 339) or (get_xop(instr) == 467): sprn = get_sprn(instr); - return (- sprn 0x%03x %8s % (sprn, get_sprn_name(sprn))) + sprn_name = get_sprn_name(sprn); + stat_idx = name+-+sprn_name + if stat_ppc_instr_spr.has_key(stat_idx): + stat_ppc_instr_spr[stat_idx] += 1 + else: + stat_ppc_instr_spr[stat_idx] = 1 + return (- sprn 0x%03x %8s % (sprn, sprn_name)) elif (get_xop(instr) == 323 ) or (get_xop(instr) == 451): - return (- dcrn 0x%03x % get_dcrn(instr)) + dcrn = get_dcrn(instr); + stat_idx = name+-+(%04X%dcrn) + if stat_ppc_instr_dcr.has_key(stat_idx): + stat_ppc_instr_dcr[stat_idx] += 1 + else: + stat_ppc_instr_dcr[stat_idx] = 1 + return (- dcrn 0x%03x % dcrn) elif (get_xop(instr) == 978 ) or (get_xop(instr) == 451): -
Re: KSM Algorithm
Doesn't KSM notifies KVM about the shared pages so that KVM can update its sptes accordingly or is it done by KSM itself ? when using kvm, mmu notifiers is a must for ksm, (mmu notifiers update kvm about the changes in the host page table) What about KSM being used by someone other than KVM ? Does KSM updates the ptes of the shared pages itself ? it can be used by any application, it update the ptes of the host, what you mean by the shared pages itself? by itself I meant 'ksm'. So ksm will take care of updating the ptes of the shared guest pages. Where are the shared pages kept ? In kernel memory ? Aren't the shared pages always pinned in RAM (due to same reasons for pinned shadowed pages) ? in the version that was sent to the list it was kernel memory (meaning shared pages are not swappable (just the pages that are shared not the pages that we scan, when they are split beacuse of copy on write it become swappable again new version that i will send soon the pages are normal anonymous/userspace memory that is swappable What is the strategy/method which you will use to represent the shared memory as normal anonymous/userspace memory ? Then, these pages must be staying at one of the guest's address-space (whose pages have been shared) ? it possible due to another patch that i need to send, that allow modules register new reverse mapping call backs..., ok but in whose address-space will these shared pages stay ? How much is the overhead involved due to this ? depeand on the speed you tell it to scan, but the overhead is about ~5% for common cases Can I know the typical values of the parameters : sleep time, no. of pages to scan, which you used to arrive at the above mentioned overhead. i think it is about 256 2000 (256 pages per 2000 usleep) (note that most of the cpu is acutlay taken by the copying of the pages when a new shared page is created, i have possible way to change it and reduce the cpu even more but i dont know if i want to do it) (it add more IFs to the fast path inside the mm) Thanks :) -- Regards, Sukanto Ghosh -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KSM Algorithm
ציטוט Sukanto Ghosh: Doesn't KSM notifies KVM about the shared pages so that KVM can update its sptes accordingly or is it done by KSM itself ? when using kvm, mmu notifiers is a must for ksm, (mmu notifiers update kvm about the changes in the host page table) What about KSM being used by someone other than KVM ? Does KSM updates the ptes of the shared pages itself ? it can be used by any application, it update the ptes of the host, what you mean by the shared pages itself? by itself I meant 'ksm'. So ksm will take care of updating the ptes of the shared guest pages. yes Where are the shared pages kept ? In kernel memory ? Aren't the shared pages always pinned in RAM (due to same reasons for pinned shadowed pages) ? in the version that was sent to the list it was kernel memory (meaning shared pages are not swappable (just the pages that are shared not the pages that we scan, when they are split beacuse of copy on write it become swappable again new version that i will send soon the pages are normal anonymous/userspace memory that is swappable What is the strategy/method which you will use to represent the shared memory as normal anonymous/userspace memory ? Then, these pages must be staying at one of the guest's address-space (whose pages have been shared) ? it possible due to another patch that i need to send, that allow modules register new reverse mapping call backs..., ok but in whose address-space will these shared pages stay ? if we have 5 applications sharing the same page, we will have the page stay in 5 diffrent address-spaces... How much is the overhead involved due to this ? depeand on the speed you tell it to scan, but the overhead is about ~5% for common cases Can I know the typical values of the parameters : sleep time, no. of pages to scan, which you used to arrive at the above mentioned overhead. i think it is about 256 2000 (256 pages per 2000 usleep) (note that most of the cpu is acutlay taken by the copying of the pages when a new shared page is created, i have possible way to change it and reduce the cpu even more but i dont know if i want to do it) (it add more IFs to the fast path inside the mm) Thanks :) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KSM Algorithm
got it. thanks On Fri, Jul 11, 2008 at 12:50 AM, Izik Eidus [EMAIL PROTECTED] wrote: ציטוט Sukanto Ghosh: Doesn't KSM notifies KVM about the shared pages so that KVM can update its sptes accordingly or is it done by KSM itself ? when using kvm, mmu notifiers is a must for ksm, (mmu notifiers update kvm about the changes in the host page table) What about KSM being used by someone other than KVM ? Does KSM updates the ptes of the shared pages itself ? it can be used by any application, it update the ptes of the host, what you mean by the shared pages itself? by itself I meant 'ksm'. So ksm will take care of updating the ptes of the shared guest pages. yes Where are the shared pages kept ? In kernel memory ? Aren't the shared pages always pinned in RAM (due to same reasons for pinned shadowed pages) ? in the version that was sent to the list it was kernel memory (meaning shared pages are not swappable (just the pages that are shared not the pages that we scan, when they are split beacuse of copy on write it become swappable again new version that i will send soon the pages are normal anonymous/userspace memory that is swappable What is the strategy/method which you will use to represent the shared memory as normal anonymous/userspace memory ? Then, these pages must be staying at one of the guest's address-space (whose pages have been shared) ? it possible due to another patch that i need to send, that allow modules register new reverse mapping call backs..., ok but in whose address-space will these shared pages stay ? if we have 5 applications sharing the same page, we will have the page stay in 5 diffrent address-spaces... How much is the overhead involved due to this ? depeand on the speed you tell it to scan, but the overhead is about ~5% for common cases Can I know the typical values of the parameters : sleep time, no. of pages to scan, which you used to arrive at the above mentioned overhead. i think it is about 256 2000 (256 pages per 2000 usleep) (note that most of the cpu is acutlay taken by the copying of the pages when a new shared page is created, i have possible way to change it and reduce the cpu even more but i dont know if i want to do it) (it add more IFs to the fast path inside the mm) Thanks :) -- Regards, Sukanto Ghosh
Re: [PATCH] mask out clflush
Yang, Sheng wrote: On Wednesday 09 July 2008 02:29:44 Glauber Costa wrote: clflush is a non-privileged instruction that flushes the cacheline given by its parameter, in terms of linear address. As it is non-privileged, it is quite tricky, because a guest doing clflush will actually be trying to flush a host kernel address. The linear address was convert to host physical address, then cache line was flushed. Of course the host physical address was used by guest at the time. I don't understand why we need to prevent guest from flushing cache line related to itself... The problem turned out to be that we aren't emulating clflush in x86_emulate. Regards, Anthony Liguori -- Thanks Yang, Sheng -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/5] kvmtrace: make cycle calculation architecture aware
Yang, Sheng wrote: On Wednesday 09 July 2008 23:03:19 Hollis Blanchard wrote: On Wed, 2008-07-09 at 11:17 +0200, Christian Ehrhardt wrote: So the question that is left before changing that is, if the original author had something special in mind chosing cycles here. I added Eric on CC for that. I wait with my resubmission of the patch series until all architectures agree *hope* on using getnstimeofday() - after an ack from all sides I would revise my patch series and submit that changes alltogether. I got an email bounce from Eric the last time I tried to email him, so I'm not sure he's still with Intel. However, I don't think he had any special intention; I think he was just porting xentrace to KVM. Eric had completed his internship in Intel, so... I like the term timestamp too. I think he used cycles only because there is a function called get_cycles(). But instead of getnstimeofday(), I suggest using ktime_get() here. It's little more precise than getnstimeofday(), and ktime_t is more easily to be handled. And I think the overhead it brought can be ignored too. What is the overhead of ktime_get()? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mask out clflush
Anthony Liguori wrote: Yang, Sheng wrote: On Wednesday 09 July 2008 02:29:44 Glauber Costa wrote: clflush is a non-privileged instruction that flushes the cacheline given by its parameter, in terms of linear address. As it is non-privileged, it is quite tricky, because a guest doing clflush will actually be trying to flush a host kernel address. The linear address was convert to host physical address, then cache line was flushed. Of course the host physical address was used by guest at the time. I don't understand why we need to prevent guest from flushing cache line related to itself... The problem turned out to be that we aren't emulating clflush in x86_emulate. Why would clflush trap? Is it called from real mode? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KSM Algorithm
One more query, what if multiple processes call ioctl KSM_CREATE_SCAN ? Will there be multiple scanners ? Consider a scenario where two processes A B separately call KSM_CREATE_SCAN and then start registering some memory pages/areas via KSM_CREATE_SHARED_MEMORY_AREA ioctl. Lets say A registers 4 pages having content X, Y, Z and X . And, process B registers 2 pages having content Z and X. So, will there be only single copy each of X, Y and Z in the entire system or will they be kept separately i.e the scanner for A maintains X, Y, Z single copy each and scanner for B maintains X and Z separately. -- Regards, Sukanto Ghosh -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KSM Algorithm
ציטוט Sukanto Ghosh: One more query, what if multiple processes call ioctl KSM_CREATE_SCAN ? Will there be multiple scanners ? yes Consider a scenario where two processes A B separately call KSM_CREATE_SCAN and then start registering some memory pages/areas via KSM_CREATE_SHARED_MEMORY_AREA ioctl. Lets say A registers 4 pages having content X, Y, Z and X . And, process B registers 2 pages having content Z and X. So, will there be only single copy each of X, Y and Z in the entire system or will they be kept separately i.e the scanner for A maintains X, Y, Z single copy each and scanner for B maintains X and Z separately. there will be only one copy, ksm is multi threaded safe. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Ignore DEBUGCTL MSRs
Alexander Graf wrote: Avi Kivity wrote: Alexander Graf wrote: Netware writes and reads to the DEBUGCTL and LAST*IP MSRs without further checks and is really confused to receive a #GP during that. To make it happy we should just make them stubs, which is exactly what SVM already does. To support VMX too, I put these in the generic code. Maybe the SVM code could be cleaned up to use generic code too. Please add a pr_unimpl() when bits that cause a real processor to do something are set. Like this? I also removed the set handlers for the *IP MSRs, as these are read only and made it only handle debug bits, no perfmon bits. With a changelog entry. Signed-off-by: Alexander Graf [EMAIL PROTECTED] diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fc0721e..10f5e95 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -609,6 +609,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) pr_unimpl(vcpu, %s: MSR_IA32_MCG_CTL 0x%llx, nop\n, __func__, data); break; + case MSR_IA32_DEBUGCTLMSR: + if (data (DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) { + /* Values other than LBR and BTF are vendor-specific, + thus reserved and should throw a #GP */ + return 1; + } '' is too clever. ' ~(u64)(...)' is clearer. Arithmetic and logical ops don't mix well. + pr_unimpl(vcpu, %s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n, + __func__, data); We can avoid the printout if data == 0, since we support that case fully. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mask out clflush
Anthony Liguori wrote: It's equivalent to a read from a VT perspective so if the read would trap, the clflush instruction will trap. Reads don't normally go through the emulator. Is the guest clflush()ing mmio addresses? Strange as these are not normally cached. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mask out clflush
Glauber Costa wrote: clflush is a non-privileged instruction that flushes the cacheline given by its parameter, in terms of linear address. As it is non-privileged, it is quite tricky, because a guest doing clflush will actually be trying to flush a host kernel address. We need to allow clflush for pci device assignment. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mask out clflush
Avi Kivity wrote: Anthony Liguori wrote: It's equivalent to a read from a VT perspective so if the read would trap, the clflush instruction will trap. Reads don't normally go through the emulator. Is the guest clflush()ing mmio addresses? Strange as these are not normally cached. It seems so, Glauber mentioned that the address was an MMIO address. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Add HPET support to BIOS
* Elizabeth Kon [EMAIL PROTECTED] [2008-07-09 23:07]: This patch, written by Ryan Harper, adds HPET support to BIOS. Based on hpet changes to Xen's hvm firmware/rombios. Signed-off-by: Beth Kon [EMAIL PROTECTED] Signed-off-by: Ryan Harper [EMAIL PROTECTED] -- Ryan Harper Software Engineer; Linux Technology Center IBM Corp., Austin, Tx (512) 838-9253 T/L: 678-9253 [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2009439 ] data corruption with virtio-blk
Bugs item #2009439, was opened at 2008-07-03 06:51 Message generated for change (Settings changed) made by markmc You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2009439group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Closed Resolution: Fixed Priority: 5 Private: No Submitted By: Balaji Rao R (balajirrao) Assigned to: Nobody/Anonymous (nobody) Summary: data corruption with virtio-blk Initial Comment: kvm-userspace : kvm-70-138-g163308a With a kernel from linux-2.6.git (v2.6.26-rc8-89-ge1441b9), data corruption is seen on block devices when exported through virtio. Once a block device is mounted and unmounted in a guest, the kernel is not able to find a filesystem in the block device when being remounted. This is seen from the host as well. -- Comment By: Mark McLoughlin (markmc) Date: 2008-07-07 09:32 Message: Logged In: YES user_id=116392 Originator: NO Should be fixed by: http://git.kernel.org/?p=virt/kvm/kvm-userspace.git;a=commit;h=b5a5e894a -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2009439group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: PCIPT: VT-d support
On Thu, Jul 10, 2008 at 06:07:27PM +0800, Yang, Sheng wrote: I think it's may be unnecessary to map pages when device assigned. The table can be set up along with set_memory_region(), it covered all memory slots already, or I miss something here? VT-d is only initialized after the slots are originally created, so when VT-d is initialized is map all of the existing slots separately, and then for each new slot that may be added we'll catch it via set_memory_region(). Cheers, Muli -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction
Marcelo Tosatti wrote: On Mon, Jul 07, 2008 at 02:31:55PM -0300, Marcelo Tosatti wrote: On Sun, Jul 06, 2008 at 12:15:56AM +0300, Avi Kivity wrote: Marcelo Tosatti wrote: On Sat, Jul 05, 2008 at 08:25:30PM +0300, Avi Kivity wrote: @@ -1955,6 +1955,22 @@ void kvm_mmu_slot_remove_write_access(st } } +int kvm_mmu_slot_has_shadowed_page(struct kvm *kvm, int slot) +{ + struct kvm_mmu_page *sp; + int ret = 0; + + spin_lock(kvm-mmu_lock); + list_for_each_entry(sp, kvm-arch.active_mmu_pages, link) { + if (test_bit(slot, sp-slot_bitmap)) { + ret = -EINVAL; + break; + } + } + spin_unlock(kvm-mmu_lock); + return ret; +} + I don't like the guest influencing host actions in this way. It's just a guest. But I think it's unneeded. kvm_mmu_zap_page() will mark a root shadow page invalid and force all vcpus to reload it, so all that's needed is to keep the mmu spinlock held while removing the slot. You're still keeping a shadowed page around with sp-gfn pointing to non-existant memslot. The code generally makes the assumption that gfn_to_memslot(gfn) on shadowed info will not fail. kvm_mmu_zap_page - unaccount_shadowed, for example. The page has already been zapped, so we might as well unaccount_shadowed() on the first run. It needs to be moved until after the reload_remote_mmus() call, though. Oops, previous patch was unaccounting multiple times for invalid pages. This should be better: During RH6.2 graphical installation the following oops is triggered: BUG: unable to handle kernel NULL pointer dereference at IP: [a00bf172] :kvm:gfn_to_rmap+0x3e/0x61 Pid: 4559, comm: qemu-system-x86 Not tainted The problem is that KVM allows shadow pagetable entries that point to a removed memslot to exist. In this case the cirrus vram mapping was removed, and the NULL dereference happened during kvm_set_memory_alias()'s zap_all_pages(). So nuke all shadowed pages before memslot removal. Signed-off-by: Marcelo Tosatti [EMAIL PROTECTED] diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index a4cf4a2..76259da 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1455,6 +1455,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot) +{ + return 0; +} This (and its friends) ought to be static inlines. On the other hand, don't the other arches have to flush their tlbs? Xiantao/Hollis? So maybe this function needs to be renamed kvm_flush_shadow() and implemented across the board. diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b90da0b..5ef3a5e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -405,6 +405,12 @@ int __kvm_set_memory_region(struct kvm *kvm, if (mem-slot = kvm-nmemslots) kvm-nmemslots = mem-slot + 1; + if (!npages) { + r = kvm_arch_destroy_memory_region(kvm, mem-slot); + if (r) + goto out_free; + } + Destructors should never fail, since there is no possible recovery. And indeed you have 'return 0' in the actual implementation. So I think the function better return void. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mask out clflush
On Thu, Jul 10, 2008 at 11:20 AM, Anthony Liguori [EMAIL PROTECTED] wrote: Avi Kivity wrote: Anthony Liguori wrote: It's equivalent to a read from a VT perspective so if the read would trap, the clflush instruction will trap. Reads don't normally go through the emulator. Is the guest clflush()ing mmio addresses? Strange as these are not normally cached. It seems so, Glauber mentioned that the address was an MMIO address. yes. It is address 0xc8821000, apparently part of a pci controller initialization. Regards, Anthony Liguori -- Glauber Costa. Free as in Freedom http://glommer.net The less confident you are, the more serious you have to act. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mask out clflush
Glauber Costa wrote: On Thu, Jul 10, 2008 at 11:20 AM, Anthony Liguori [EMAIL PROTECTED] wrote: Avi Kivity wrote: Anthony Liguori wrote: It's equivalent to a read from a VT perspective so if the read would trap, the clflush instruction will trap. Reads don't normally go through the emulator. Is the guest clflush()ing mmio addresses? Strange as these are not normally cached. It seems so, Glauber mentioned that the address was an MMIO address. yes. It is address 0xc8821000, apparently part of a pci controller initialization. qemu pci starts at 0xe000 IIRC. So maybe the guest is flushing random addresses just to be annoying. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/7][RFC] Enable kvm/ia-64 to build kvm components in userspace.
Zhang, Xiantao wrote: I agree that for automatic testing it's more of a burden; but it needs to be done, especially as some kvm features are only enabled on newer kernels. The external module is convenient, but it's not a substitute for the real thing. So, I have a question here, When will you drop external module support? I don't really know. You know, it blocks our auto-testing system now, we have to re-evaulate the effort without external module support. I think it's very worthwhile to update the autotester to be able to build kernels. kvm will soon depend on core functionality (mmu notifiers) and since currently kvm is the only user (well, gru will be added soon, but ...) we need to test it ourselves. If it won't be dropped in a few weeks, we are eager to get its support for kvm-ia64. Okay, please send the patches, but with diff -M so they're readable. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: PCIPT: VT-d support
Muli Ben-Yehuda wrote: On Thu, Jul 10, 2008 at 06:07:27PM +0800, Yang, Sheng wrote: I think it's may be unnecessary to map pages when device assigned. The table can be set up along with set_memory_region(), it covered all memory slots already, or I miss something here? VT-d is only initialized after the slots are originally created, so when VT-d is initialized is map all of the existing slots separately, and then for each new slot that may be added we'll catch it via set_memory_region(). It makes sense to initialize VT-d before. Since memslots can be created and destroyed dynamically, with the current implementation we can see create slot create slot init VT-d create slot which means we need to support both slot-creation-after-VT-d and init-VT-d-after-slot-creation. If we initialize VT-d up front, we only need to support (and test) one scenario. On the other hand, this means that you will not be able to assign devices unless you specified this when creating the VM; but I think this is fair. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mask out clflush
Avi Kivity wrote: Glauber Costa wrote: On Thu, Jul 10, 2008 at 11:20 AM, Anthony Liguori [EMAIL PROTECTED] wrote: Avi Kivity wrote: Anthony Liguori wrote: It's equivalent to a read from a VT perspective so if the read would trap, the clflush instruction will trap. Reads don't normally go through the emulator. Is the guest clflush()ing mmio addresses? Strange as these are not normally cached. It seems so, Glauber mentioned that the address was an MMIO address. yes. It is address 0xc8821000, apparently part of a pci controller initialization. qemu pci starts at 0xe000 IIRC. So maybe the guest is flushing random addresses just to be annoying. That's a virtual address, not a physical address IIUC. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mask out clflush
Anthony Liguori wrote: yes. It is address 0xc8821000, apparently part of a pci controller initialization. qemu pci starts at 0xe000 IIRC. So maybe the guest is flushing random addresses just to be annoying. That's a virtual address, not a physical address IIUC. Ah, of course. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Add HPET support to BIOS
Beth Kon wrote: This patch, written by Ryan Harper, adds HPET support to BIOS. Signed-off-by: Beth Kon [EMAIL PROTECTED] diff --git a/bios/Makefile b/bios/Makefile index 48022ea..3e73fb5 100644 --- a/bios/Makefile +++ b/bios/Makefile @@ -40,7 +40,7 @@ LIBS = -lm RANLIB = ranlib BCC = bcc -GCC = gcc -m32 +GCC = gcc -m32 -fno-stack-protector HOST_CC = gcc AS86 = as86 diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl index d1bfa2c..1548c86 100755 --- a/bios/acpi-dsdt.dsl +++ b/bios/acpi-dsdt.dsl @@ -262,6 +262,24 @@ DefinitionBlock ( Return (MEMP) } } +Device(HPET) { +Name(_HID, EISAID(PNP0103)) +Name(_UID, 0) _UID is optional if only one timer block is present. +Method (_STA, 0, NotSerialized) { +Return(0x00) Not present? +} +Name(_CRS, ResourceTemplate() { +DWordMemory( +ResourceConsumer, PosDecode, MinFixed, MaxFixed, +NonCacheable, ReadWrite, +0x, +0xFED0, +0xFED003FF, +0x, +0x0400 /* 1K memory: FED0 - FED003FF */ +) +}) +} } Scope(\_SB.PCI0) { @@ -628,7 +646,7 @@ DefinitionBlock ( { Or (PRQ3, 0x80, PRQ3) } -Method (_CRS, 0, NotSerialized) +Method (_CRS, 1, NotSerialized) { Name (PRR0, ResourceTemplate () { Is this change related? diff --git a/bios/rombios32.c b/bios/rombios32.c index 2dc1d25..c1ec015 100755 --- a/bios/rombios32.c +++ b/bios/rombios32.c @@ -1182,7 +1182,7 @@ struct rsdp_descriptor /* Root System Descriptor Pointer */ struct rsdt_descriptor_rev1 { ACPI_TABLE_HEADER_DEF /* ACPI common table header */ - uint32_t table_offset_entry [2]; /* Array of pointers to other */ + uint32_t table_offset_entry [3]; /* Array of pointers to other */ /* ACPI tables */ }; @@ -1322,6 +1322,30 @@ struct madt_processor_apic #endif }; +/* + * ACPI 2.0 Generic Address Space definition. + */ +struct acpi_20_generic_address { +uint8_t address_space_id; +uint8_t register_bit_width; +uint8_t register_bit_offset; +uint8_t reserved; +uint64_t address; +}; + +/* + * HPET Description Table + */ +struct acpi_20_hpet { +ACPI_TABLE_HEADER_DEF /* ACPI common table header */ +uint32_t timer_block_id; +struct acpi_20_generic_address addr; +uint8_thpet_number; +uint16_t min_tick; +uint8_tpage_protect; +}; +#define ACPI_HPET_ADDRESS 0xFED0UL + struct madt_io_apic { APIC_HEADER_DEF @@ -1393,8 +1417,9 @@ void acpi_bios_init(void) struct fadt_descriptor_rev1 *fadt; struct facs_descriptor_rev1 *facs; struct multiple_apic_table *madt; +struct acpi_20_hpet *hpet; uint8_t *dsdt; -uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr; +uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr, hpet_addr; uint32_t acpi_tables_size, madt_addr, madt_size; int i; @@ -1436,6 +1461,11 @@ void acpi_bios_init(void) madt = (void *)(addr); addr += madt_size; +addr = (addr + 7) ~7; +hpet_addr = addr; +hpet = (void *)(addr); +addr += sizeof(*hpet); + acpi_tables_size = addr - base_addr; BX_INFO(ACPI tables: RSDP addr=0x%08lx ACPI DATA addr=0x%08lx size=0x%x\n, @@ -1457,6 +1487,7 @@ void acpi_bios_init(void) memset(rsdt, 0, sizeof(*rsdt)); rsdt-table_offset_entry[0] = cpu_to_le32(fadt_addr); rsdt-table_offset_entry[1] = cpu_to_le32(madt_addr); +rsdt-table_offset_entry[2] = cpu_to_le32(hpet_addr); acpi_build_table_header((struct acpi_table_header *)rsdt, RSDT, sizeof(*rsdt), 1); @@ -1540,6 +1571,15 @@ void acpi_bios_init(void) acpi_build_table_header((struct acpi_table_header *)madt, APIC, madt_size, 1); } + +/* HPET */ +memset(hpet, 0, sizeof(*hpet)); +hpet-timer_block_id = cpu_to_le32(0x8086a201); + // hpet-timer_block_id = cpu_to_le32(0x80862201); This magic value could need some explanation so people don't have to look it up. Something like: 8086 = pci vendor id a201 = 10100011 1 LegacyReplacement IRQ Routing Capable 0reserved 1 COUNT_SIZE_CAP counter size 00010 Number of Comparators 0001 Hardwave revision id Also add a comment that it should be kept in sync with the emulation (hpet.c). - Sebastian +hpet-addr.address = cpu_to_le32(ACPI_HPET_ADDRESS); +
Re: [Regression] kvm-userspace: VM freezes after booting FreeDOS
On Thu, Jul 10, 2008 at 6:47 PM, Mohammed Gamal [EMAIL PROTECTED] wrote: After updating my kvm-userspace tree to the latest git tree. I am no more able to run FreeDOS. The VM freezes after choosing any of the boot options. I am running both latest kvm.git and kvm-userspace.git Forgot to mention that this problem doesn't occur with -no-kvm switch. However, the problem still occurs with -no-kvm-pit and -no-kvm-irqchip -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Add HPET support to BIOS
* Sebastian Herbszt [EMAIL PROTECTED] [2008-07-10 10:46]: Hey Sebastian, Thanks for the review, Beth Kon wrote: This patch, written by Ryan Harper, adds HPET support to BIOS. Signed-off-by: Beth Kon [EMAIL PROTECTED] diff --git a/bios/Makefile b/bios/Makefile index 48022ea..3e73fb5 100644 --- a/bios/Makefile +++ b/bios/Makefile @@ -40,7 +40,7 @@ LIBS = -lm RANLIB = ranlib BCC = bcc -GCC = gcc -m32 +GCC = gcc -m32 -fno-stack-protector HOST_CC = gcc AS86 = as86 diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl index d1bfa2c..1548c86 100755 --- a/bios/acpi-dsdt.dsl +++ b/bios/acpi-dsdt.dsl @@ -262,6 +262,24 @@ DefinitionBlock ( Return (MEMP) } } +Device(HPET) { +Name(_HID, EISAID(PNP0103)) +Name(_UID, 0) _UID is optional if only one timer block is present. OK +Method (_STA, 0, NotSerialized) { +Return(0x00) Not present? Was playing around with this when trying to get Linux to see the device in the ACPI tables. AFAICT, Linux doesn't care about this value. Should be 1 here then? +} +Name(_CRS, ResourceTemplate() { +DWordMemory( +ResourceConsumer, PosDecode, MinFixed, MaxFixed, +NonCacheable, ReadWrite, +0x, +0xFED0, +0xFED003FF, +0x, +0x0400 /* 1K memory: FED0 - FED003FF */ +) +}) +} } Scope(\_SB.PCI0) { @@ -628,7 +646,7 @@ DefinitionBlock ( { Or (PRQ3, 0x80, PRQ3) } -Method (_CRS, 0, NotSerialized) +Method (_CRS, 1, NotSerialized) { Name (PRR0, ResourceTemplate () { Is this change related? Doubtful, I'll confirm whether or not it is needed. diff --git a/bios/rombios32.c b/bios/rombios32.c index 2dc1d25..c1ec015 100755 --- a/bios/rombios32.c +++ b/bios/rombios32.c @@ -1182,7 +1182,7 @@ struct rsdp_descriptor /* Root System Descriptor Pointer */ struct rsdt_descriptor_rev1 { ACPI_TABLE_HEADER_DEF /* ACPI common table header */ - uint32_t table_offset_entry [2]; /* Array of pointers to other */ + uint32_t table_offset_entry [3]; /* Array of pointers to other */ /* ACPI tables */ }; @@ -1322,6 +1322,30 @@ struct madt_processor_apic #endif }; +/* + * ACPI 2.0 Generic Address Space definition. + */ +struct acpi_20_generic_address { +uint8_t address_space_id; +uint8_t register_bit_width; +uint8_t register_bit_offset; +uint8_t reserved; +uint64_t address; +}; + +/* + * HPET Description Table + */ +struct acpi_20_hpet { +ACPI_TABLE_HEADER_DEF /* ACPI common table header */ +uint32_t timer_block_id; +struct acpi_20_generic_address addr; +uint8_thpet_number; +uint16_t min_tick; +uint8_tpage_protect; +}; +#define ACPI_HPET_ADDRESS 0xFED0UL + struct madt_io_apic { APIC_HEADER_DEF @@ -1393,8 +1417,9 @@ void acpi_bios_init(void) struct fadt_descriptor_rev1 *fadt; struct facs_descriptor_rev1 *facs; struct multiple_apic_table *madt; +struct acpi_20_hpet *hpet; uint8_t *dsdt; -uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr; +uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr, hpet_addr; uint32_t acpi_tables_size, madt_addr, madt_size; int i; @@ -1436,6 +1461,11 @@ void acpi_bios_init(void) madt = (void *)(addr); addr += madt_size; +addr = (addr + 7) ~7; +hpet_addr = addr; +hpet = (void *)(addr); +addr += sizeof(*hpet); + acpi_tables_size = addr - base_addr; BX_INFO(ACPI tables: RSDP addr=0x%08lx ACPI DATA addr=0x%08lx size=0x%x\n, @@ -1457,6 +1487,7 @@ void acpi_bios_init(void) memset(rsdt, 0, sizeof(*rsdt)); rsdt-table_offset_entry[0] = cpu_to_le32(fadt_addr); rsdt-table_offset_entry[1] = cpu_to_le32(madt_addr); +rsdt-table_offset_entry[2] = cpu_to_le32(hpet_addr); acpi_build_table_header((struct acpi_table_header *)rsdt, RSDT, sizeof(*rsdt), 1); @@ -1540,6 +1571,15 @@ void acpi_bios_init(void) acpi_build_table_header((struct acpi_table_header *)madt, APIC, madt_size, 1); } + +/* HPET */ +memset(hpet, 0, sizeof(*hpet)); +hpet-timer_block_id = cpu_to_le32(0x8086a201); + // hpet-timer_block_id = cpu_to_le32(0x80862201); This magic value could need some explanation so people don't have to look it up. Something like: 8086 = pci
Re: [PATCH] Add HPET support to BIOS
Ryan Harper wrote: Hey Sebastian, Thanks for the review, Beth Kon wrote: This patch, written by Ryan Harper, adds HPET support to BIOS. Signed-off-by: Beth Kon [EMAIL PROTECTED] diff --git a/bios/Makefile b/bios/Makefile index 48022ea..3e73fb5 100644 --- a/bios/Makefile +++ b/bios/Makefile @@ -40,7 +40,7 @@ LIBS = -lm RANLIB = ranlib BCC = bcc -GCC = gcc -m32 +GCC = gcc -m32 -fno-stack-protector HOST_CC = gcc AS86 = as86 diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl index d1bfa2c..1548c86 100755 --- a/bios/acpi-dsdt.dsl +++ b/bios/acpi-dsdt.dsl @@ -262,6 +262,24 @@ DefinitionBlock ( Return (MEMP) } } +Device(HPET) { +Name(_HID, EISAID(PNP0103)) +Name(_UID, 0) _UID is optional if only one timer block is present. OK +Method (_STA, 0, NotSerialized) { +Return(0x00) Not present? Was playing around with this when trying to get Linux to see the device in the ACPI tables. AFAICT, Linux doesn't care about this value. Should be 1 here then? I would suggest 0x0F (present, enabled and more). It would be nice to runtime detect the presence of the hpet and return the proper value, e.g. 0x0 if not present and skip the HPET ACPI table creation. The Xen DSDT does it with the help of a bios info table which gets created at runtime. It detects the hpet by reading the vendor id from HPET_BASE. Something like this might also be possible inside the DSDT (OperationRegion, Field and LEqual). +} +Name(_CRS, ResourceTemplate() { +DWordMemory( +ResourceConsumer, PosDecode, MinFixed, MaxFixed, +NonCacheable, ReadWrite, +0x, +0xFED0, +0xFED003FF, +0x, +0x0400 /* 1K memory: FED0 - FED003FF */ +) +}) +} } Scope(\_SB.PCI0) { @@ -628,7 +646,7 @@ DefinitionBlock ( { Or (PRQ3, 0x80, PRQ3) } -Method (_CRS, 0, NotSerialized) +Method (_CRS, 1, NotSerialized) { Name (PRR0, ResourceTemplate () { Is this change related? Doubtful, I'll confirm whether or not it is needed. diff --git a/bios/rombios32.c b/bios/rombios32.c index 2dc1d25..c1ec015 100755 --- a/bios/rombios32.c +++ b/bios/rombios32.c @@ -1182,7 +1182,7 @@ struct rsdp_descriptor /* Root System Descriptor Pointer */ struct rsdt_descriptor_rev1 { ACPI_TABLE_HEADER_DEF /* ACPI common table header */ - uint32_t table_offset_entry [2]; /* Array of pointers to other */ + uint32_t table_offset_entry [3]; /* Array of pointers to other */ /* ACPI tables */ }; @@ -1322,6 +1322,30 @@ struct madt_processor_apic #endif }; +/* + * ACPI 2.0 Generic Address Space definition. + */ +struct acpi_20_generic_address { +uint8_t address_space_id; +uint8_t register_bit_width; +uint8_t register_bit_offset; +uint8_t reserved; +uint64_t address; +}; + +/* + * HPET Description Table + */ +struct acpi_20_hpet { +ACPI_TABLE_HEADER_DEF /* ACPI common table header */ +uint32_t timer_block_id; +struct acpi_20_generic_address addr; +uint8_thpet_number; +uint16_t min_tick; +uint8_tpage_protect; +}; +#define ACPI_HPET_ADDRESS 0xFED0UL + struct madt_io_apic { APIC_HEADER_DEF @@ -1393,8 +1417,9 @@ void acpi_bios_init(void) struct fadt_descriptor_rev1 *fadt; struct facs_descriptor_rev1 *facs; struct multiple_apic_table *madt; +struct acpi_20_hpet *hpet; uint8_t *dsdt; -uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr; +uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr, hpet_addr; uint32_t acpi_tables_size, madt_addr, madt_size; int i; @@ -1436,6 +1461,11 @@ void acpi_bios_init(void) madt = (void *)(addr); addr += madt_size; +addr = (addr + 7) ~7; +hpet_addr = addr; +hpet = (void *)(addr); +addr += sizeof(*hpet); + acpi_tables_size = addr - base_addr; BX_INFO(ACPI tables: RSDP addr=0x%08lx ACPI DATA addr=0x%08lx size=0x%x\n, @@ -1457,6 +1487,7 @@ void acpi_bios_init(void) memset(rsdt, 0, sizeof(*rsdt)); rsdt-table_offset_entry[0] = cpu_to_le32(fadt_addr); rsdt-table_offset_entry[1] = cpu_to_le32(madt_addr); +rsdt-table_offset_entry[2] = cpu_to_le32(hpet_addr); acpi_build_table_header((struct acpi_table_header *)rsdt, RSDT, sizeof(*rsdt), 1); @@ -1540,6 +1571,15 @@ void acpi_bios_init(void) acpi_build_table_header((struct acpi_table_header *)madt, APIC, madt_size, 1); } + +/* HPET */
Re: [Regression] kvm-userspace: VM freezes after booting FreeDOS
Mohammed Gamal wrote: On Thu, Jul 10, 2008 at 7:31 PM, Anthony Liguori [EMAIL PROTECTED] wrote: Mohammed Gamal wrote: After updating my kvm-userspace tree to the latest git tree. I am no more able to run FreeDOS. The VM freezes after choosing any of the boot options. I am running both latest kvm.git and kvm-userspace.git Can you bisect to the commit that broke it? Regards, Anthony Liguori I did, it seems that commit 04c3265c95c12e7c6e73cfd07357c05db6a1bda8 caused it. Everything works fine after I reverted it. Thats: commit 04c3265c95c12e7c6e73cfd07357c05db6a1bda8 Author: Jan Kiszka [EMAIL PROTECTED] Date: Fri May 16 09:21:47 2008 +0200 kvm: qemu: consolidate kvm_eat_signal With the io thread signals are rare. Remove looping over kvm_eat_signal and fold everything into kvm_main_loop_wait. Signed-off-by: Jan Kiszka [EMAIL PROTECTED] Signed-off-by: Avi Kivity [EMAIL PROTECTED] Hrm, that's disturbing. Unfortunately, the patch is a bit more than just code motion. It does change the number of times the lock is released and makes some minor changes to the code. Nothing obvious that would cause the breakage without digging into it. How did you create your freedos image? I'd like to try and figure out what the problem is here since it may be causing other issues. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mask out clflush
On Thu, Jul 10, 2008 at 12:39 PM, Avi Kivity [EMAIL PROTECTED] wrote: Anthony Liguori wrote: yes. It is address 0xc8821000, apparently part of a pci controller initialization. qemu pci starts at 0xe000 IIRC. So maybe the guest is flushing random addresses just to be annoying. That's a virtual address, not a physical address IIUC. Ah, of course. How's that one ? -- error compiling committee.c: too many arguments to function -- Glauber Costa. Free as in Freedom http://glommer.net The less confident you are, the more serious you have to act. 0001-properly-decode-clflush.patch Description: Binary data
Re: patch: qemu + hugetlbfs..
Anthony Liguori wrote: +#include asm/param.h I don't think this is necessary anymore. Depending on a Linux headers breaks the QEMU build on other unices so it's a bad thing. It is no longer required, but see below. hpage is a misnomer too as we aren't actually dependent on huge pages (this code should work equally well for tmpfs). As it currently exists alloc_hpage_mem() is tied to the notion of huge page allocation as it will reference gethugepagesize() irrespective of *mem_path. So even in the case of tmpfs backed files, if the host kernel has been configured with CONFIG_HUGETLBFS we will wind up doing allocations of /dev/shm mapped files at /proc/meminfo:Hugepagesize granularity. Otherwise if HUGETLBFS is not configured gethugepagesize() returns zero and alloc_hpage_mem() itself will not perform the allocation. Probably not what was intended but probably not too much of a concern as -mem-path /dev/shm is likely only used in debug of this flag and associated logic. I don't see it currently being worth the trouble to correct from a squeaky clean POV, and doing so may drag in far more than the header file we've just booted above to deal with this architecture/config dependency. An updated patch is attached. -john -- [EMAIL PROTECTED] --- a/qemu/vl.c +++ b/qemu/vl.c @@ -234,6 +234,7 @@ int autostart = 1; int time_drift_fix = 0; unsigned int kvm_shadow_memory = 0; const char *mem_path = NULL; +int mem_prealloc = 1; /* force preallocation of physical target memory */ int hpagesize = 0; const char *cpu_vendor_string; #ifdef TARGET_ARM @@ -7809,7 +7810,10 @@ static void help(int exitcode) #endif -tdfinject timer interrupts that got lost\n -kvm-shadow-memory megs set the amount of shadow pages to be allocated\n - -mem-path set the path to hugetlbfs/tmpfs mounted directory, also enables allocation of guest memory with huge pages\n + -mem-path set the path to hugetlbfs/tmpfs mounted directory, also\n + enables allocation of guest memory with huge pages\n + -mem-prealloc toggles preallocation of -mem-path backed physical memory\n + at startup. Default is enabled.\n -option-rom rom load a file, rom, into the option ROM space\n #ifdef TARGET_SPARC -prom-env variable=value set OpenBIOS nvram variables\n @@ -7932,6 +7936,7 @@ enum { QEMU_OPTION_tdf, QEMU_OPTION_kvm_shadow_memory, QEMU_OPTION_mempath, +QEMU_OPTION_mem_prealloc }; typedef struct QEMUOption { @@ -8059,6 +8064,7 @@ const QEMUOption qemu_options[] = { { startdate, HAS_ARG, QEMU_OPTION_startdate }, { tb-size, HAS_ARG, QEMU_OPTION_tb_size }, { mem-path, HAS_ARG, QEMU_OPTION_mempath }, +{ mem-prealloc, 0, QEMU_OPTION_mem_prealloc }, { NULL }, }; @@ -8276,11 +8282,13 @@ static int gethugepagesize(void) return hugepagesize; } -void *alloc_mem_area(unsigned long memory, const char *path) +/* attempt to allocate memory mmap'ed to mem_path + */ +void *alloc_hpage_mem(unsigned long memory, const char *path) { char *filename; void *area; -int fd; +int fd, flags; if (asprintf(filename, %s/kvm.XX, path) == -1) return NULL; @@ -8308,26 +8316,27 @@ void *alloc_mem_area(unsigned long memor */ ftruncate(fd, memory); -area = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); -if (area == MAP_FAILED) { - perror(mmap); - close(fd); - return NULL; -} - -return area; +/* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case + * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED + * to sidestep this quirk. + */ +flags = mem_prealloc ? MAP_POPULATE|MAP_SHARED : MAP_PRIVATE; +area = mmap(0, memory, PROT_READ|PROT_WRITE, flags, fd, 0); +if (area != MAP_FAILED) + return (area); +perror(alloc_hpage_mem: can't mmap hugetlbfs pages); +close(fd); +return (NULL); } -void *qemu_alloc_physram(unsigned long memory) +/* allocate guest memory as requested + */ +void *qemu_alloc_physram(unsigned long size) { -void *area = NULL; - if (mem_path) - area = alloc_mem_area(memory, mem_path); -if (!area) - area = qemu_vmalloc(memory); - -return area; + return (alloc_hpage_mem(size, mem_path)); +else + return (qemu_vmalloc(size)); } int main(int argc, char **argv) @@ -8962,6 +8971,9 @@ int main(int argc, char **argv) case QEMU_OPTION_mempath: mem_path = optarg; break; +case QEMU_OPTION_mem_prealloc: + mem_prealloc = !mem_prealloc; + break; case QEMU_OPTION_name: qemu_name = optarg; break;
Re: Live migration without SDL
You can get to the monitor the same way in vnc as you do with the sdl gui. You can also have the monitor connected to a socket or network port. See the qemu docs for more info. On Thursday 10 July 2008 3:51:44 pm Ty! Boyack wrote: Folks, I'm afraid I'm missing something very basic here. I'm looking to try out the live migration facility, and all the documentation I'm seeing shows to initiate the migration by going to the qemu monitor from the SDL windows (alt-ctrl-2). However, I'd like to run my VMs with a VNC head, so I'm not sure where the interface is to initiate the migration without the SDL layer in place. Is the qemu monitor available through some other mechanism? This would be most helpful if the mechanism was reachable (and scriptable) from the HOST os, so that the HOST (in cooperation with another HOST) could initiate a migration of one or more GUEST VMs. If anyone could point me in the right direction, I would sure appreciate it. I have not subscribed to the list, so e-mail replies would be appreciated. -Ty! -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: patch: qemu + hugetlbfs..
john cooper wrote: Anthony Liguori wrote: john cooper wrote: As it currently exists alloc_hpage_mem() is tied to the notion of huge page allocation as it will reference gethugepagesize() irrespective of *mem_path. So even in the case of tmpfs backed files, if the host kernel has been configured with CONFIG_HUGETLBFS we will wind up doing allocations of /dev/shm mapped files at /proc/meminfo:Hugepagesize granularity. Which is fine. It just means we round -m values up to even numbers. Well, yes it will round the allocation. But from a minimally sufficient 4KB boundary to that of 4MB/2MB relative to a 32/64 bit x86 host which is excessive. Probably not what was intended but probably not too much of a concern as -mem-path /dev/shm is likely only used in debug of this flag and associated logic. I don't see it currently being worth the trouble to correct from a squeaky clean POV, and doing so may drag in far more than the header file we've just booted above to deal with this architecture/config dependency. Renaming a function to a name that's less accurate seems bad to me. I don't mean to be pedantic, but it seems like a strange thing to do. I prefer it the way it was before. I don't see any harm reverting the name. But I do believe it is largely cosmetic as given the above, the current code does require some work to make it independent of huge page assumptions. Update attached. -john Looks good to me. Acked-by: Anthony Liguori [EMAIL PROTECTED] -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Live migration without SDL
Ty! Boyack wrote: Folks, I'm afraid I'm missing something very basic here. I'm looking to try out the live migration facility, and all the documentation I'm seeing shows to initiate the migration by going to the qemu monitor from the SDL windows (alt-ctrl-2). However, I'd like to run my VMs with a VNC head, so I'm not sure where the interface is to initiate the migration without the SDL layer in place. I have no trouble switching to the monitor for a VM with a VNC head: alt-ctrl-2 works as expected in the VNC client. Is the qemu monitor available through some other mechanism? You can configure qemu to expose the monitor console via any of the serial port configuration types. That lets you make the monitor available at a udp or tcp port, e.g.: # qemu-system-x86_64 other options -monitor tcp:: The double colon is intended. There are lots of additional features beyond that example. This would be most helpful if the mechanism was reachable (and scriptable) from the HOST os, so that the HOST (in cooperation with another HOST) could initiate a migration of one or more GUEST VMs. That would be the monitor command line option then. snip -- David. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Regression] kvm-userspace: VM freezes after booting FreeDOS
On Fri, Jul 11, 2008 at 12:22 AM, Jan Kiszka [EMAIL PROTECTED] wrote: Mohammed Gamal wrote: On Thu, Jul 10, 2008 at 11:02 PM, Anthony Liguori [EMAIL PROTECTED] wrote: Mohammed Gamal wrote: On Thu, Jul 10, 2008 at 7:31 PM, Anthony Liguori [EMAIL PROTECTED] wrote: Mohammed Gamal wrote: After updating my kvm-userspace tree to the latest git tree. I am no more able to run FreeDOS. The VM freezes after choosing any of the boot options. I am running both latest kvm.git and kvm-userspace.git snip After booting FreeDOS, there are a number of boot options with different memory extenders, after selecting any option the system freezes and I get [Stopped] in thr QEMU title bar. Stopped - interesting. Seems like something causes QEMU to stop the guest as if some breakpoint was injected. I just downloaded that image and gave it a try against vanilla kvm-70 and my own tree which is augment with guest debugging related patches. The former shows your observed behavior (Boot from CD, provide an empty HD image - press '1' + ENTER - press '1' - Stopped). The latter kvm tree made QEMU leave with a #GP in the guest. That may point to a debug register related issue, and that patch you identified just happen to make it visible. However, will try to investigate. Jan I'm interested in seeing these patches. If your tree is hosted online, could you please provide me with its location so that I can merge it with mine. If not, where can I get them from? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction
KVM: MMU: improve invalid shadow root page handling Harden kvm_mmu_zap_page() against invalid root pages that had been shadowed from memslots that are gone. Signed-off-by: Marcelo Tosatti [EMAIL PROTECTED] diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ff7cf63..7f57da6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -930,14 +930,17 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) } kvm_mmu_page_unlink_children(kvm, sp); if (!sp-root_count) { - if (!sp-role.metaphysical) + if (!sp-role.metaphysical !sp-role.invalid) unaccount_shadowed(kvm, sp-gfn); hlist_del(sp-hash_link); kvm_mmu_free_page(kvm, sp); } else { + int invalid = sp-role.invalid; list_move(sp-link, kvm-arch.active_mmu_pages); sp-role.invalid = 1; kvm_reload_remote_mmus(kvm); + if (!sp-role.metaphysical !invalid) + unaccount_shadowed(kvm, sp-gfn); } kvm_mmu_reset_last_pte_updated(kvm); } -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH 0/7][RFC] Enable kvm/ia-64 to build kvm components in userspace.
Avi Kivity wrote: Zhang, Xiantao wrote: I agree that for automatic testing it's more of a burden; but it needs to be done, especially as some kvm features are only enabled on newer kernels. The external module is convenient, but it's not a substitute for the real thing. So, I have a question here, When will you drop external module support? I don't really know. You know, it blocks our auto-testing system now, we have to re-evaulate the effort without external module support. I think it's very worthwhile to update the autotester to be able to build kernels. kvm will soon depend on core functionality (mmu notifiers) and since currently kvm is the only user (well, gru will be added soon, but ...) we need to test it ourselves. If it won't be dropped in a few weeks, we are eager to get its support for kvm-ia64. Okay, please send the patches, but with diff -M so they're readable. OK, I will regenerate the patches. Thanks! Xiantao -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/5] kvmtrace: make cycle calculation architecture aware
On Thursday 10 July 2008 21:32:29 Avi Kivity wrote: Yang, Sheng wrote: On Wednesday 09 July 2008 23:03:19 Hollis Blanchard wrote: On Wed, 2008-07-09 at 11:17 +0200, Christian Ehrhardt wrote: So the question that is left before changing that is, if the original author had something special in mind chosing cycles here. I added Eric on CC for that. I wait with my resubmission of the patch series until all architectures agree *hope* on using getnstimeofday() - after an ack from all sides I would revise my patch series and submit that changes alltogether. I got an email bounce from Eric the last time I tried to email him, so I'm not sure he's still with Intel. However, I don't think he had any special intention; I think he was just porting xentrace to KVM. Eric had completed his internship in Intel, so... I like the term timestamp too. I think he used cycles only because there is a function called get_cycles(). But instead of getnstimeofday(), I suggest using ktime_get() here. It's little more precise than getnstimeofday(), and ktime_t is more easily to be handled. And I think the overhead it brought can be ignored too. What is the overhead of ktime_get()? Well, I just means it wrapped getnstimeofday(), and compared to rdtscll(), it got little overhead... :) -- Thanks Yang, Sheng -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
KVM Test result, kernel cc453ce.., userspace 6787f5a.. -- One Issue Fixed
Hi All, This is today's KVM test result against kvm.git cc453ceef31019dcddff453d35a608ca2c659fec and kvm-userspace.git 6787f5a048c17e4ec4eb7858804795888449cd96. One Issue Fixed: 1. vista auto-unattended installation failed on kvm guests https://sourceforge.net/tracker/?func=detailatid=893831aid=1991653group_id=180599 Two Old Issues: 1. 32bits Rhel5/FC6 guest may fail to reboot after installation https://sourceforge.net/tracker/?func=detailatid=893831aid=1991647group_id=180599 2. failure to migrate guests with more than 4GB of RAM https://sourceforge.net/tracker/index.php?func=detailaid=1971512group_id=180599atid=893831 Test environment Platform Woodcrest CPU 4 Memory size 8G' Details IA32-pae: 1. boot guest with 256M memory PASS 2. boot guest with 1500M memory PASS 3. boot 4 same guest in parallel PASS 4. boot two windows xp guestPASS 5. boot linux and windows guest in parallel PASS 6. save/restore 32-bit HVM guests PASS 7. save/restore 32-bit HVM guests with 4 vcpus PASS 8. live migration 32-bit HVM guests PASS 9. live migration 32-bit HVM guests with 4 vcpus PASS 10. boot base kernel linux PASS 11. kernel build on SMP linux guestPASS 12. LTP on linux guest PASS 13. boot Windows 2000 without ACPI PASS 14. boot Windows 2000 with ACPI enabled PASS 15. boot Windows 2003 with ACPI enabled PASS 16. boot Windows xp with ACPI enabled PASS 17. boot Windows vista with ACPI enabled PASS 18. boot SMP Windows 2000 with ACPI enabled PASS 19. boot SMP Windows 2003 with ACPI enabled PASS 20. boot SMP Windows xp with ACPI enabled PASS 21. boot SMP Windows 2008 with ACPI enabled PASS IA32e: 1. boot 32-bit guest with 256M memory PASS 2. boot 64-bit guest with 256M memory PASS 3. boot 32-bit guest with 1500M memory PASS 4. boot 64-bit guest with 1500M memory PASS 5. boot 4G pae guest PASS 6. boot 4G 64-bit guest PASS 7. boot four 32-bit guest in parallel PASS 8. boot four 64-bit guest in parallel PASS 9. boot two 32-bit windows xp in parallel PASS 10. boot 32-bit linux and 32 bit windows guest in parallel PASS 11. boot four 32-bit different guest in para PASS 12. save/restore 32-bit linux guests PASS 13. save/restore 64-bit linux guests PASS 14. save/restore 64-bit linux guests with 4 vcpus PASS 15. save/restore 32-bit linux guests with 4 vcpus PASS 16. live migration 64bit linux guests PASS 17. live migration 32bit linux guests PASS 18. live migration 64bit linux guests with 4 vcpus PASS 19. live migration 32bit linux guests with 4 vcpus PASS 20. boot 32-bit x-server PASS 21. kernel build in 32-bit linux guest OS PASS 22. kernel build in 64-bit linux guest OS PASS 23. LTP on 32-bit linux guest OS PASS 24. LTP on 64-bit linux guest OS PASS 25. boot 64-bit guests with ACPI enabled PASS 26. boot 32-bit Windows 2000 without ACPIPASS 27. boot 32-bit Windows xp without ACPIPASS 28. boot 64-bit Windows xp with ACPI enabledPASS 29. boot 64-bit Windows vista with ACPI enabled PASS 30. boot 32-bit SMP Windows 2000 with ACPI enabled PASS 31. boot 32-bit SMP windows 2003 with ACPI enabled PASS 32. boot 32-bit SMP Windows xp with ACPI enabledPASS 33. boot 64-bit SMP Windows vista with ACPI enabled PASS 34. boot 32-bit SMP windows 2008 with ACPI enabled PASS 35. boot
[PATCH 03/04] KVM: external module: Allow non-x86 archs to build in userspace.
From 989c9551664988535608bb4051e654ffe5e5ddb0 Mon Sep 17 00:00:00 2001 From: Xiantao Zhang [EMAIL PROTECTED] Date: Wed, 2 Jul 2008 17:04:19 +0800 Subject: [PATCH] KVM: external module: Allow non-x86 archs to build in userspace. This patch targets for allowing kvm/ia64 to build in userspace. For other archs. it just needs to add its arch-specific stuff. Signed-off-by: Xiantao Zhang [EMAIL PROTECTED] --- kernel/Kbuild|1 + kernel/Makefile | 22 ++ kernel/ia64/Kbuild |7 +++ kernel/ia64/Makefile.pre | 23 +++ kernel/ia64/external-module-compat.h | 15 +++ kernel/x86/Makefile.pre |1 + 6 files changed, 61 insertions(+), 8 deletions(-) create mode 100644 kernel/ia64/Kbuild create mode 100644 kernel/ia64/Makefile.pre create mode 100644 kernel/ia64/external-module-compat.h create mode 100644 kernel/x86/Makefile.pre diff --git a/kernel/Kbuild b/kernel/Kbuild index 7019ca0..ec34c43 100644 --- a/kernel/Kbuild +++ b/kernel/Kbuild @@ -1 +1,2 @@ obj-$(CONFIG_X86) += x86/ +obj-$(CONFIG_IA64) += ia64/ diff --git a/kernel/Makefile b/kernel/Makefile index 94d63bc..0d5d148 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -7,6 +7,8 @@ KVERREL = $(patsubst /lib/modules/%/build,%,$(KERNELDIR)) DESTDIR= +MAKEFILE_PRE = $(ARCH_DIR)/Makefile.pre + INSTALLDIR = $(patsubst %/build,%/extra,$(KERNELDIR)) ORIGMODDIR = $(patsubst %/build,%/kernel,$(KERNELDIR)) @@ -27,10 +29,11 @@ unifdef = mv $1 $1.orig \ hack = $(call _hack,$T/$(strip $1)) hack-files-x86 = kvm_main.c mmu.c vmx.c svm.c x86.c irq.h lapic.c i8254.c +hack-files-ia64 = hack-files = $(hack-files-$(ARCH_DIR)) -all:: +all:: header-link # include header priority 1) $LINUX 2) $KERNELDIR 3) include-compat $(MAKE) -C $(KERNELDIR) M=`pwd` \ LINUXINCLUDE=-I`pwd`/include -Iinclude -I`pwd`/include-compat \ @@ -38,7 +41,12 @@ all:: -include `pwd`/$(ARCH_DIR)/external-module-compat.h $$@ -sync: header-sync source-sync +sync: header-sync source-sync header-link prerequisite + +header-link: + rm -f include/asm + ln -sf asm-$(ARCH_DIR) include/asm + ln -sf asm-$(ARCH_DIR) include-compat/asm T = $(subst -sync,,$@)-tmp @@ -56,15 +64,11 @@ header-sync: do mkdir -p $$(dirname $$i); cmp -s $$i $T/$$i || cp $T/$$i $$i; done rm -rf $T - rm -f include/asm - ln -sf asm-$(ARCH_DIR) include/asm - ln -sf asm-$(ARCH_DIR) include-compat/asm - source-sync: rm -rf $T rsync --exclude='*.mod.c' -R \ -$(LINUX)/arch/$(ARCH_DIR)/kvm/./*.[ch] \ -$(LINUX)/virt/kvm/./*.[ch] \ +$(LINUX)/arch/$(ARCH_DIR)/kvm/./*.[cSh] \ +$(LINUX)/virt/kvm/./*.[cSh] \ $T/ set -e for i in $(find $T -name '*.c'); do \ @@ -77,6 +81,8 @@ source-sync: do cmp -s $(ARCH_DIR)/$$i $T/$$i || cp $T/$$i $(ARCH_DIR)/$$i; done rm -rf $T +include $(MAKEFILE_PRE) + install: mkdir -p $(DESTDIR)/$(INSTALLDIR) cp $(ARCH_DIR)/*.ko $(DESTDIR)/$(INSTALLDIR) diff --git a/kernel/ia64/Kbuild b/kernel/ia64/Kbuild new file mode 100644 index 000..764a493 --- /dev/null +++ b/kernel/ia64/Kbuild @@ -0,0 +1,7 @@ +obj-m := kvm.o kvm-intel.o + +kvm-objs := kvm_main.o ioapic.o coalesced_mmio.o kvm-ia64.o kvm_fw.o + +EXTRA_CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 +kvm-intel-objs := vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ + vtlb.o process.o memset.o memcpy.o diff --git a/kernel/ia64/Makefile.pre b/kernel/ia64/Makefile.pre new file mode 100644 index 000..09f2a73 --- /dev/null +++ b/kernel/ia64/Makefile.pre @@ -0,0 +1,23 @@ +prerequisite: asm-offsets.h ia64/memset.S ia64/memcpy.S + cp -f $(LINUX)/arch/ia64/lib/memcpy.S ia64/memcpy.S + cp -f $(LINUX)/arch/ia64/lib/memset.S ia64/memset.S + cmp -s asm-offset.h ia64/asm-offset.h || mv -f asm-offsets.* ia64/ + +asm-offsets.h: asm-offsets.s + @(set -e; \ + echo /*; \ + echo * DO NOT MODIFY.; \ + echo *; \ + echo * This file was auto-generated from $; \ + echo *; \ + echo */; \ + echo ; \ + echo #ifndef __KVM_ASM_OFFSETS_H__; \ + echo #define __KVM_ASM_OFFSETS_H__; \ + echo ; \ + sed -ne /^-/{s:^-\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:-::; p;}; \ + echo ; \ + echo #endif) $ $@ + +asm-offsets.s: ia64/asm-offsets.c + gcc -S -D__KERNEL__ -I$(LINUX)/include -I./include ia64/asm-offsets.c diff --git a/kernel/ia64/external-module-compat.h b/kernel/ia64/external-module-compat.h new file mode 100644 index 000..3c4001c --- /dev/null +++ b/kernel/ia64/external-module-compat.h @@ -0,0 +1,15 @@ +/* + * Compatibility header for building as an external module. + */ + +#include
[PATCH 04/04] KVM: userspace: Enable rpm build for kvm/ia64
From ce37b613776ea71ed2c75c8302985aecb5e6799f Mon Sep 17 00:00:00 2001 From: Xiantao Zhang [EMAIL PROTECTED] Date: Fri, 11 Jul 2008 13:13:52 +0800 Subject: [PATCH] KVM: userspace: Enable rpm build for kvm/ia64 Enable make rpm for kvm/ia64. Signed-off-by: Xiantao Zhang [EMAIL PROTECTED] --- Makefile |4 ++-- kernel/kvm-kmod.spec |8 +--- kvm.spec |6 -- qemu/Makefile|1 + 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 48a8dff..c7e7370 100644 --- a/Makefile +++ b/Makefile @@ -23,7 +23,7 @@ qemu: libkvm ifneq '$(filter $(ARCH), i386 x86_64)' '' qemu: extboot endif -ifneq '$(filter $(ARCH), powerpc)' '' +ifneq '$(filter $(ARCH), powerpc, ia64)' '' qemu: libfdt endif user: libkvm @@ -73,7 +73,7 @@ install-rpm: install -m 755 kvm_stat $(DESTDIR)/$(bindir)/kvm_stat cp scripts/kvm $(DESTDIR)/$(initdir)/kvm cp scripts/qemu-ifup $(DESTDIR)/$(confdir)/qemu-ifup - install -t $(DESTDIR)/etc/udev/rules.d scripts/*kvm*.rules + install -C scripts/*kvm*.rules $(DESTDIR)/etc/udev/rules.d install: $(kcmd)make -C kernel DESTDIR=$(DESTDIR) install diff --git a/kernel/kvm-kmod.spec b/kernel/kvm-kmod.spec index b56fdf1..89b3d88 100644 --- a/kernel/kvm-kmod.spec +++ b/kernel/kvm-kmod.spec @@ -10,11 +10,11 @@ License:GPL URL:http://www.qumranet.com BuildRoot: %{_tmppath}/%{name}-%{version}-%{release} -ExclusiveArch: i386 x86_64 +ExclusiveArch: i386 x86_64 ia64 %description This kernel module provides support for virtual machines using hardware support -(Intel VT or AMD SVM). +(Intel VT-xVT-i or AMD SVM). %prep @@ -27,7 +27,7 @@ rm -rf %{buildroot} %define kverrel unknown %define moddir /lib/modules/%{kverrel}/extra mkdir -p %{buildroot}/%{moddir} -cp %{objdir}/%{kmod_name}.ko %{objdir}/%{kmod_name}-intel.ko %{objdir}/%{kmod_name}-amd.ko %{buildroot}/%{moddir} +cp %{objdir}/%{kmod_name}.ko %{objdir}/%{kmod_name}-*.ko %{buildroot}/%{moddir} chmod u+x %{buildroot}/%{moddir}/%{kmod_name}*.ko %post @@ -43,7 +43,9 @@ depmod %{kverrel} %files %{moddir}/%{kmod_name}.ko +%ifarch i386 x86_64 %{moddir}/%{kmod_name}-amd.ko +%endif %{moddir}/%{kmod_name}-intel.ko diff --git a/kvm.spec b/kvm.spec index af8271e..92acb0e 100644 --- a/kvm.spec +++ b/kvm.spec @@ -8,7 +8,7 @@ License:GPL URL:http://www.qumranet.com BuildRoot: %{_tmppath}/%{name}-%{version}-%{release} -ExclusiveArch: i386 x86_64 +ExclusiveArch: i386 x86_64 ia64 Requires: kvm-kmod bridge-utils @@ -58,7 +58,7 @@ Source8: extboot.tar.gz %description The Kernel Virtual Machine provides a virtualization enviroment for processors -with hardware support for virtualization: Intel's VT and AMD's AMD-V. +with hardware support for virtualization: Intel's VT-xVT-i and AMD's AMD-V. %prep @@ -82,7 +82,9 @@ cd .. ./configure --prefix=/usr/kvm %{qemuldflags} make -C libkvm make -C user +%ifarch i386 x86_64 make extboot +%endif #(cd qemu; #./co # kpath=$(readlink -f ../kernel/include) diff --git a/qemu/Makefile b/qemu/Makefile index 8ca17f7..627e255 100644 --- a/qemu/Makefile +++ b/qemu/Makefile @@ -231,6 +231,7 @@ endif pxe-rtl8139.bin pxe-pcnet.bin pxe-e1000.bin extboot.bin \ bamboo.dtb; \ do \ + [ -f $(SRC_PATH)/pc-bios/$$x ] \ $(INSTALL) -m 644 $(SRC_PATH)/pc-bios/$$x $(DESTDIR)$(datadir); \ done ifndef CONFIG_WIN32 -- 1.5.2 0004-KVM-userspace-Enable-rpm-build-for-kvm-ia64.patch Description: 0004-KVM-userspace-Enable-rpm-build-for-kvm-ia64.patch