[PATCH] KVM: PCIPT: VT-d support

2008-07-10 Thread Ben-Ami Yassour
From: Kay, Allen M [EMAIL PROTECTED]

This patch includes the functions to support VT-d for passthrough
devices.

[Ben: fixed memory pinning]

Signed-off-by: Kay, Allen M [EMAIL PROTECTED]
Signed-off-by: Weidong Han [EMAIL PROTECTED]
Signed-off-by: Ben-Ami Yassour [EMAIL PROTECTED]
---
 arch/x86/kvm/Makefile  |2 +-
 arch/x86/kvm/vtd.c |  176 
 arch/x86/kvm/x86.c |   10 +++
 include/asm-x86/kvm_host.h |1 +
 include/linux/kvm_host.h   |6 ++
 virt/kvm/kvm_main.c|6 ++
 6 files changed, 200 insertions(+), 1 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940b..5d9d079 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -11,7 +11,7 @@ endif
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
-   i8254.o
+   i8254.o vtd.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 000..83efb8a
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Author: Allen M. Kay [EMAIL PROTECTED]
+ * Author: Weidong Han [EMAIL PROTECTED]
+ */
+
+#include linux/list.h
+#include linux/kvm_host.h
+#include linux/pci.h
+#include linux/dmar.h
+#include linux/intel-iommu.h
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+   gfn_t base_gfn, unsigned long npages)
+{
+   gfn_t gfn = base_gfn;
+   pfn_t pfn;
+   int i, rc;
+   struct dmar_domain *domain = kvm-arch.intel_iommu_domain;
+
+   if (!domain)
+   return -EFAULT;
+
+   for (i = 0; i  npages; i++) {
+   pfn = gfn_to_pfn(kvm, gfn);
+   rc = intel_iommu_page_mapping(domain,
+ gfn  PAGE_SHIFT,
+ pfn  PAGE_SHIFT,
+ PAGE_SIZE,
+ DMA_PTE_READ |
+ DMA_PTE_WRITE);
+   if (rc)
+   kvm_release_pfn_clean(pfn);
+
+   gfn++;
+   }
+   return 0;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+   int i, rc;
+   for (i = 0; i  kvm-nmemslots; i++) {
+   rc = kvm_iommu_map_pages(kvm, kvm-memslots[i].base_gfn,
+kvm-memslots[i].npages);
+   if (rc)
+   return rc;
+   }
+   return 0;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+   struct kvm_pci_passthrough_dev *pci_pt_dev)
+{
+   struct pci_dev *pdev = NULL;
+
+   printk(KERN_DEBUG VT-d direct map: host bdf = %x:%x:%x\n,
+  pci_pt_dev-host.busnr,
+  PCI_SLOT(pci_pt_dev-host.devfn),
+  PCI_FUNC(pci_pt_dev-host.devfn));
+
+   for_each_pci_dev(pdev) {
+   if ((pdev-bus-number == pci_pt_dev-host.busnr) 
+   (pdev-devfn == pci_pt_dev-host.devfn)) {
+   break;
+   }
+   }
+
+   if (pdev == NULL) {
+   if (kvm-arch.intel_iommu_domain) {
+   intel_iommu_domain_exit(kvm-arch.intel_iommu_domain);
+   kvm-arch.intel_iommu_domain = NULL;
+   }
+   return -ENODEV;
+   }
+
+   kvm-arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+
+   if (kvm_iommu_map_memslots(kvm)) {
+   kvm_iommu_unmap_memslots(kvm);
+   return -EFAULT;
+   }
+
+   intel_iommu_detach_dev(kvm-arch.intel_iommu_domain,
+  pdev-bus-number, pdev-devfn);
+
+   if (intel_iommu_context_mapping(kvm-arch.intel_iommu_domain,
+   pdev)) {
+   printk(KERN_ERR Domain context map for %s failed,
+  pci_name(pdev));
+   return -EFAULT;
+   }
+   return 0;
+}
+
+static int 

[PATCH] VT-d : changes to support KVM

2008-07-10 Thread Ben-Ami Yassour
From: Kay, Allen M [EMAIL PROTECTED]

This patch extends the VT-d driver to support KVM

[Ben: fixed memory pinning]

Signed-off-by: Kay, Allen M [EMAIL PROTECTED]
Signed-off-by: Weidong Han [EMAIL PROTECTED]
Signed-off-by: Ben-Ami Yassour [EMAIL PROTECTED]
---
 drivers/pci/dmar.c   |4 +-
 drivers/pci/intel-iommu.c|  117 +-
 drivers/pci/iova.c   |2 +-
 {drivers/pci = include/linux}/intel-iommu.h |   11 +++
 {drivers/pci = include/linux}/iova.h|0 
 5 files changed, 127 insertions(+), 7 deletions(-)
 rename {drivers/pci = include/linux}/intel-iommu.h (94%)
 rename {drivers/pci = include/linux}/iova.h (100%)

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index f941f60..a58a5b0 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -26,8 +26,8 @@
 
 #include linux/pci.h
 #include linux/dmar.h
-#include iova.h
-#include intel-iommu.h
+#include linux/iova.h
+#include linux/intel-iommu.h
 
 #undef PREFIX
 #define PREFIX DMAR:
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 66c0fd2..6ad2c75 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -20,6 +20,7 @@
  * Author: Anil S Keshavamurthy [EMAIL PROTECTED]
  */
 
+#undef DEBUG
 #include linux/init.h
 #include linux/bitmap.h
 #include linux/debugfs.h
@@ -33,8 +34,8 @@
 #include linux/dma-mapping.h
 #include linux/mempool.h
 #include linux/timer.h
-#include iova.h
-#include intel-iommu.h
+#include linux/iova.h
+#include linux/intel-iommu.h
 #include asm/proto.h /* force_iommu in this header in x86-64*/
 #include asm/cacheflush.h
 #include asm/gart.h
@@ -160,7 +161,7 @@ static inline void *alloc_domain_mem(void)
return iommu_kmem_cache_alloc(iommu_domain_cache);
 }
 
-static inline void free_domain_mem(void *vaddr)
+static void free_domain_mem(void *vaddr)
 {
kmem_cache_free(iommu_domain_cache, vaddr);
 }
@@ -1414,7 +1415,7 @@ static void domain_remove_dev_info(struct dmar_domain 
*domain)
  * find_domain
  * Note: we use struct pci_dev-dev.archdata.iommu stores the info
  */
-struct dmar_domain *
+static struct dmar_domain *
 find_domain(struct pci_dev *pdev)
 {
struct device_domain_info *info;
@@ -2408,3 +2409,111 @@ int __init intel_iommu_init(void)
return 0;
 }
 
+void intel_iommu_domain_exit(struct dmar_domain *domain)
+{
+   u64 end;
+
+   /* Domain 0 is reserved, so dont process it */
+   if (!domain)
+   return;
+
+   end = DOMAIN_MAX_ADDR(domain-gaw);
+   end = end  (~PAGE_MASK_4K);
+
+   /* clear ptes */
+   dma_pte_clear_range(domain, 0, end);
+
+   /* free page tables */
+   dma_pte_free_pagetable(domain, 0, end);
+
+   iommu_free_domain(domain);
+   free_domain_mem(domain);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
+
+struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
+{
+   struct dmar_drhd_unit *drhd;
+   struct dmar_domain *domain;
+   struct intel_iommu *iommu;
+
+   drhd = dmar_find_matched_drhd_unit(pdev);
+   if (!drhd) {
+   printk(KERN_ERR intel_iommu_domain_alloc: drhd == NULL\n);
+   return NULL;
+   }
+
+   iommu = drhd-iommu;
+   if (!iommu) {
+   printk(KERN_ERR
+   intel_iommu_domain_alloc: iommu == NULL\n);
+   return NULL;
+   }
+   domain = iommu_alloc_domain(iommu);
+   if (!domain) {
+   printk(KERN_ERR
+   intel_iommu_domain_alloc: domain == NULL\n);
+   return NULL;
+   }
+   if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+   printk(KERN_ERR
+   intel_iommu_domain_alloc: domain_init() failed\n);
+   intel_iommu_domain_exit(domain);
+   return NULL;
+   }
+   return domain;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
+
+int intel_iommu_context_mapping(
+   struct dmar_domain *domain, struct pci_dev *pdev)
+{
+   int rc;
+   rc = domain_context_mapping(domain, pdev);
+   return rc;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
+
+int intel_iommu_page_mapping(
+   struct dmar_domain *domain, dma_addr_t iova,
+   u64 hpa, size_t size, int prot)
+{
+   int rc;
+   rc = domain_page_mapping(domain, iova, hpa, size, prot);
+   return rc;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
+
+void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+{
+   detach_domain_for_dev(domain, bus, devfn);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
+
+struct dmar_domain *
+intel_iommu_find_domain(struct pci_dev *pdev)
+{
+   return find_domain(pdev);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
+
+int intel_iommu_found(void)
+{
+   return g_num_of_iommus;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_found);
+
+u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 

Re: [Qemu-devel] [RFC][PATCH] Add HPET emulation to qemu

2008-07-10 Thread Samuel Thibault
Cool!
Does it now happen that qemu no longer wakes up every 10ms?  If not,
please try to make sure it happens, that would eventually fix that power
leak :)

Samuel
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: PCIPT: VT-d support

2008-07-10 Thread Ben-Ami Yassour
On Wed, 2008-07-09 at 10:49 -0500, Anthony Liguori wrote:
 Ben-Ami Yassour wrote:
  From: Kay, Allen M [EMAIL PROTECTED]
 
  This patch includes the functions to support VT-d for passthrough
  devices.
 
  [Ben: fixed memory pinning]
 
  Signed-off-by: Kay, Allen M [EMAIL PROTECTED]
  Signed-off-by: Weidong Han [EMAIL PROTECTED]
  Signed-off-by: Ben-Ami Yassour [EMAIL PROTECTED]
  ---
   arch/x86/kvm/Makefile  |2 +-
   arch/x86/kvm/vtd.c |  189 
  
   include/asm-x86/kvm_host.h |   17 
   include/asm-x86/kvm_para.h |   14 +++
   include/linux/kvm_host.h   |6 ++
   5 files changed, 227 insertions(+), 1 deletions(-)
   create mode 100644 arch/x86/kvm/vtd.c
 
  diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
  index d0e940b..5d9d079 100644
  --- a/arch/x86/kvm/Makefile
  +++ b/arch/x86/kvm/Makefile
  @@ -11,7 +11,7 @@ endif
   EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
   
   kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o 
  \
  -   i8254.o
  +   i8254.o vtd.o
   obj-$(CONFIG_KVM) += kvm.o
   kvm-intel-objs = vmx.o
   obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
  diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
  new file mode 100644
  index 000..5abeef1
  --- /dev/null
  +++ b/arch/x86/kvm/vtd.c
  @@ -0,0 +1,189 @@
  +/*
  + * Copyright (c) 2006, Intel Corporation.
  + *
  + * This program is free software; you can redistribute it and/or modify it
  + * under the terms and conditions of the GNU General Public License,
  + * version 2, as published by the Free Software Foundation.
  + *
  + * This program is distributed in the hope it will be useful, but WITHOUT
  + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
  for
  + * more details.
  + *
  + * You should have received a copy of the GNU General Public License along 
  with
  + * this program; if not, write to the Free Software Foundation, Inc., 59 
  Temple
  + * Place - Suite 330, Boston, MA 02111-1307 USA.
  + *
  + * Copyright (C) 2006-2008 Intel Corporation
  + * Author: Allen M. Kay [EMAIL PROTECTED]
  + * Author: Weidong Han [EMAIL PROTECTED]
  + */
  +
  +#include linux/list.h
  +#include linux/kvm_host.h
  +#include linux/pci.h
  +#include linux/dmar.h
  +#include linux/intel-iommu.h
  +
  +static int kvm_iommu_unmap_memslots(struct kvm *kvm);
  +
  +int kvm_iommu_map_pages(struct kvm *kvm,
  +   gfn_t base_gfn, unsigned long npages)
  +{
  +   gfn_t gfn = base_gfn;
  +   pfn_t pfn;
  +   struct page *page;
  +   int i, rc;
  +   struct dmar_domain *domain = kvm-arch.intel_iommu_domain;
  +
  +   if (!domain)
  +   return -EFAULT;
  +
  +   for (i = 0; i  npages; i++) {
  +   pfn = gfn_to_pfn(kvm, gfn);
  +   if (pfn_valid(pfn)) {

 
 As I've mentioned before, this is wrong.  We should add MMIO pages to 
 the VT-d tables but at any rate, pfn_valid() doesn't work for checking 
 if something is MMIO.

removing the check.

 
  +   rc = intel_iommu_page_mapping(domain,
  + gfn  PAGE_SHIFT,
  + pfn  PAGE_SHIFT,
  + PAGE_SIZE,
  + DMA_PTE_READ |
  + DMA_PTE_WRITE);
  +   if (rc) {
  +   page = pfn_to_page(pfn);
  +   put_page(page);

 
 This should be kvm_release_pfn_clean().

fixed in the new version.

 
  +   }
  +   } else {
  +   printk(KERN_DEBUG kvm_iommu_map_page:
  +  invalid pfn=%lx\n, pfn);
  +   return 0;
  +   }
  +   gfn++;
  +   }
  +   return 0;
  +}
  +
  +static int kvm_iommu_map_memslots(struct kvm *kvm)
  +{
  +   int i, rc;
  +   for (i = 0; i  kvm-nmemslots; i++) {
  +   rc = kvm_iommu_map_pages(kvm, kvm-memslots[i].base_gfn,
  +kvm-memslots[i].npages);
  +   if (rc)
  +   return rc;
  +   }
  +   return 0;
  +}
  +
  +int kvm_iommu_map_guest(struct kvm *kvm,
  +   struct kvm_pci_passthrough_dev *pci_pt_dev)
  +{
  +   struct pci_dev *pdev = NULL;
  +
  +   printk(KERN_DEBUG VT-d direct map: host bdf = %x:%x:%x\n,
  +  pci_pt_dev-host.busnr,
  +  PCI_SLOT(pci_pt_dev-host.devfn),
  +  PCI_FUNC(pci_pt_dev-host.devfn));
  +
  +   for_each_pci_dev(pdev) {
  +   if ((pdev-bus-number == pci_pt_dev-host.busnr) 
  +   (pdev-devfn == pci_pt_dev-host.devfn)) {
  +   break;
  +   }
  +   }
  +
  +   if (pdev == NULL) {
  +   if (kvm-arch.intel_iommu_domain) {
  +   

Re: KSM Algorithm

2008-07-10 Thread Izik Eidus

ציטוט Sukanto Ghosh:

Can anyone answer these queries regarding KSM ?

How does KSM offers its services through the  /dev/ksm device ?
  

ioctls


Are every guest pages scanned in KVM while using KSM or page-scanning
and sharing is triggered on some event (low memory, etc) ?
  
no, the scanning run all the time (with number pages to scan and sleep 
parameters...)



Is sharing done only between the pages which have been registered via
KSM_REGISTER_MEMORY_REGION ?
  

yes


What are these for ? KSM_CREATE_SHARED_MEMORY_AREA and KSM_CREATE_SCAN ?
  


KSM_CREATE_SHARED_MEMORY_AREA - to register memory area to be scanned for 
identical pages
KSM_CREATE_SCAN - to create the scanner that scan for this pages


Where are the shared pages kept ? In kernel memory ? Aren't the shared
pages always pinned in RAM (due to same reasons for pinned shadowed
pages) ?
  


in the version that was sent to the list it was kernel memory (meaning 
shared pages are not swappable
(just the pages that are shared not the pages that we scan, when they 
are split beacuse of copy on write it become swappable again
new version that i will send soon the pages are normal 
anonymous/userspace memory that is swappable



How much is the overhead involved due to this ?
  
depeand on the speed you tell it to scan, but the overhead is about ~5% 
for common cases



--
Regards,
Sukanto Ghosh
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
  


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: PCIPT: VT-d support

2008-07-10 Thread Yang, Sheng
On Thursday 10 July 2008 17:14:42 Ben-Ami Yassour wrote:
 From: Kay, Allen M [EMAIL PROTECTED]

 This patch includes the functions to support VT-d for passthrough
 devices.

 [Ben: fixed memory pinning]

 Signed-off-by: Kay, Allen M [EMAIL PROTECTED]
 Signed-off-by: Weidong Han [EMAIL PROTECTED]
 Signed-off-by: Ben-Ami Yassour [EMAIL PROTECTED]
 ---
  arch/x86/kvm/Makefile  |2 +-
  arch/x86/kvm/vtd.c |  176
  arch/x86/kvm/x86.c
 |   10 +++
  include/asm-x86/kvm_host.h |1 +
  include/linux/kvm_host.h   |6 ++
  virt/kvm/kvm_main.c|6 ++
  6 files changed, 200 insertions(+), 1 deletions(-)
  create mode 100644 arch/x86/kvm/vtd.c

 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
 index 531d635..dc67d90 100644
 --- a/virt/kvm/kvm_main.c
 +++ b/virt/kvm/kvm_main.c
 @@ -41,6 +41,7 @@
  #include linux/pagemap.h
  #include linux/mman.h
  #include linux/swap.h
 +#include linux/intel-iommu.h

  #include asm/processor.h
  #include asm/io.h
 @@ -422,6 +423,11 @@ int __kvm_set_memory_region(struct kvm *kvm,
   }

   kvm_free_physmem_slot(old, new);
 +
 + /* map the pages in iommu page table */
 + if (intel_iommu_found())
 + kvm_iommu_map_pages(kvm, base_gfn, npages);
 +

I don't understand why we need this along with  
kvm_iommu_map_memslots(). This works during the memory setup, and in 
kvm_iommu_map_guest() we do it again with the overlapped memory 
region?

I think even if we need pin all pages, we still just need do it 
once...

--
Thanks
Yang, Sheng
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: PCIPT: VT-d support

2008-07-10 Thread Yang, Sheng
On Thursday 10 July 2008 17:51:53 Ben-Ami Yassour wrote:
 On Thu, 2008-07-10 at 17:30 +0800, Yang, Sheng wrote:
  On Thursday 10 July 2008 17:14:42 Ben-Ami Yassour wrote:
   From: Kay, Allen M [EMAIL PROTECTED]
  
   This patch includes the functions to support VT-d for
   passthrough devices.
  
   [Ben: fixed memory pinning]
  
   Signed-off-by: Kay, Allen M [EMAIL PROTECTED]
   Signed-off-by: Weidong Han [EMAIL PROTECTED]
   Signed-off-by: Ben-Ami Yassour [EMAIL PROTECTED]
   ---
arch/x86/kvm/Makefile  |2 +-
arch/x86/kvm/vtd.c |  176
    arch/x86/kvm/x86.c
  
   |   10 +++
  
include/asm-x86/kvm_host.h |1 +
include/linux/kvm_host.h   |6 ++
virt/kvm/kvm_main.c|6 ++
6 files changed, 200 insertions(+), 1 deletions(-)
create mode 100644 arch/x86/kvm/vtd.c
  
   diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
   index 531d635..dc67d90 100644
   --- a/virt/kvm/kvm_main.c
   +++ b/virt/kvm/kvm_main.c
   @@ -41,6 +41,7 @@
#include linux/pagemap.h
#include linux/mman.h
#include linux/swap.h
   +#include linux/intel-iommu.h
  
#include asm/processor.h
#include asm/io.h
   @@ -422,6 +423,11 @@ int __kvm_set_memory_region(struct kvm
   *kvm, }
  
 kvm_free_physmem_slot(old, new);
   +
   + /* map the pages in iommu page table */
   + if (intel_iommu_found())
   + kvm_iommu_map_pages(kvm, base_gfn, npages);
   +
 
  I don't understand why we need this along with
  kvm_iommu_map_memslots(). This works during the memory setup, and
  in kvm_iommu_map_guest() we do it again with the overlapped
  memory region?
 
  I think even if we need pin all pages, we still just need do it
  once...

 We map the entire guest memory on initialization by going over all
 the existing memory slots.
 If a new slot is created later then we need to map it as well, this
 is the call that you see here.

I think it's may be unnecessary to map pages when device assigned. The 
table can be set up along with set_memory_region(), it covered all 
memory slots already, or I miss something here?

-- 
Thanks
Yang, Sheng
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/5] kvmtrace: make cycle calculation architecture aware

2008-07-10 Thread Yang, Sheng
On Wednesday 09 July 2008 23:03:19 Hollis Blanchard wrote:
 On Wed, 2008-07-09 at 11:17 +0200, Christian Ehrhardt wrote:
  So the question that is left before changing that is, if the
  original author had something special in mind chosing cycles
  here. I added Eric on CC for that.
 
  I wait with my resubmission of the patch series until all
  architectures agree *hope* on using getnstimeofday() - after an
  ack from all sides I would revise my patch series and submit that
  changes alltogether.

 I got an email bounce from Eric the last time I tried to email him,
 so I'm not sure he's still with Intel.

 However, I don't think he had any special intention; I think he was
 just porting xentrace to KVM.

Eric had completed his internship in Intel, so...

I like the term timestamp too. I think he used cycles only because 
there is a function called get_cycles().

But instead of getnstimeofday(), I suggest using ktime_get() here. 
It's little more precise than getnstimeofday(), and ktime_t is more 
easily to be handled. And I think the overhead it brought can be 
ignored too.

--
Thanks
Yang, Sheng
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 7/9] kvm-userspace: kvmtrace_format: add ppc instruction emulation

2008-07-10 Thread ehrhardt
From: Christian Ehrhardt [EMAIL PROTECTED]

This patch adds the handling of the ppc instruction emulation trace records.
Due to the fact that those are more complex than the classic formats file
way this patch adds a check of the event id and maps to the internal handler
function if needed (other complex trace records that might appear in future
can hook up there too).
Additionally this fixes the ppc tlb trace record definitions in the formats
file now that the revised kernel patch series is submitted.

Signed-off-by: Christian Ehrhardt [EMAIL PROTECTED]
---

[diffstat]
 formats |   16 +--
 kvmtrace_format |  276 +---
 2 files changed, 268 insertions(+), 24 deletions(-)

[diff]

diff --git a/user/formats b/user/formats
--- a/user/formats
+++ b/user/formats
@@ -23,13 +23,9 @@
 0x00020013  %(tsc)d (+%(reltsc)8d)  LMSW  vcpu = 0x%(vcpu)08x  pid = 
0x%(pid)08x [ value = 0x%(1)08x ]
 0x00020014  %(tsc)d (+%(reltsc)8d)  APIC_ACCESS   vcpu = 0x%(vcpu)08x  pid = 
0x%(pid)08x [ offset = 0x%(1)08x ]
 0x00020015  %(tsc)d (+%(reltsc)8d)  TDP_FAULT vcpu = 0x%(vcpu)08x  pid = 
0x%(pid)08x [ errorcode = 0x%(1)08x, virt = 0x%(3)08x %(2)08x ]
-# ppc: context switch
-0x00020016  %(tsc)d (+%(reltsc)8d)  CONT_SWITCH   vcpu = 0x%(vcpu)08x  pid = 
0x%(pid)08x
-# ppc: tlb write
-0x00020017  %(tsc)d (+%(reltsc)8d)  TLB_WRITEvcpu = 0x%(vcpu)08x  pid = 
0x%(pid)08x [ index = 0x%(1)08x, tid = 0x%(2)08x, word1=0x%(3)08x, 
word2=0x%(4)08x, word3=0x%(5)08x ]
-# ppc: tlb invalidate
-0x00020018  %(tsc)d (+%(reltsc)8d)  TLB_INVAL vcpu = 0x%(vcpu)08x  pid = 
0x%(pid)08x [ index = 0x%(1)08x, tid = 0x%(2)08x, word1=0x%(3)08x, 
word2=0x%(4)08x, word3=0x%(5)08x ]
-# ppc: guest TLB write
-0x00020019  %(tsc)d (+%(reltsc)8d)  GTLB_WRITEvcpu = 0x%(vcpu)08x  pid = 
0x%(pid)08x [ index = 0x%(1)08x, tid = 0x%(2)08x, word1=0x%(3)08x, 
word2=0x%(4)08x, word3=0x%(5)08x ]
-# ppc: shadow TLB write
-0x00020020  %(tsc)d (+%(reltsc)8d)  STLB_WRITE   vcpu = 0x%(vcpu)08x  pid = 
0x%(pid)08x [ index = 0x%(1)08x, tid = 0x%(2)08x, word1=0x%(3)08x, 
word2=0x%(4)08x, word3=0x%(5)08x ]
+# ppc: tlb traces
+0x00020016  GTLB_WRITEvcpu = 0x%(vcpu)08x  pid = 0x%(pid)08x [ index = 
0x%(1)08x, tid = 0x%(2)08x, word1=0x%(3)08x, word2=0x%(4)08x, word3=0x%(5)08x ]
+0x00020017  STLB_WRITEvcpu = 0x%(vcpu)08x  pid = 0x%(pid)08x [ index = 
0x%(1)08x, tid = 0x%(2)08x, word1=0x%(3)08x, word2=0x%(4)08x, word3=0x%(5)08x ]
+0x00020018  STLB_INVALvcpu = 0x%(vcpu)08x  pid = 0x%(pid)08x [ index = 
0x%(1)08x, tid = 0x%(2)08x, word1=0x%(3)08x, word2=0x%(4)08x, word3=0x%(5)08x ]
+# ppc: instruction emulation - this type is handled more complex in 
kvmtrace_format, but listed to show the eventid and transported data
+#0x00020019  %(tsc)d (+%(reltsc)8d)  PPC_INSTR vcpu = 0x%(vcpu)08x  pid = 
0x%(pid)08x [ instr = 0x%(1)08x, pc = 0x%(2)08x, emul = 0x%(3)08x, nsec = 
%(4)08d ]
diff --git a/user/kvmtrace_format b/user/kvmtrace_format
--- a/user/kvmtrace_format
+++ b/user/kvmtrace_format
@@ -58,6 +58,250 @@
 def sighand(x,y):
 global interrupted
 interrupted = 1
+
+# ppc instruction decoding for event type 0x00020019 (PPC_INSTR)
+def get_op(instr):
+return (instr  26);
+
+def get_xop(instr):
+return (instr  1)  0x3ff;
+
+def get_sprn(instr):
+   return ((instr  16)  0x1f) | ((instr  6)  0x3e0)
+
+def get_dcrn(instr):
+   return ((instr  16)  0x1f) | ((instr  6)  0x3e0);
+
+def get_tlbwe_type(instr):
+   ws = (instr  11)  0x1f;
+   if ws == 0:
+   return PAGEID
+   elif ws == 1:
+   return XLAT
+   elif ws == 2:
+   return ATTRIB
+   else:
+   return UNKNOWN
+
+def get_name(instr):
+   if get_op(instr)==3:
+   return trap
+   elif get_op(instr)==19:
+   if get_xop(instr) == 50:
+   return rfi
+   else:
+   return unknown
+   elif get_op(instr)==31:
+   if get_xop(instr) == 83:
+   return mfmsr
+
+   elif get_xop(instr) == 87:
+   return lbzx
+
+   elif get_xop(instr) == 131:
+   return wrtee
+
+   elif get_xop(instr) == 146:
+   return mtmsr
+
+   elif get_xop(instr) == 163:
+   return wrteei
+
+   elif get_xop(instr) == 215:
+   return stbx
+
+   elif get_xop(instr) == 247:
+   return stbux
+
+   elif get_xop(instr) == 279:
+   return lhzx
+
+   elif get_xop(instr) == 311:
+   return lhzux
+
+   elif get_xop(instr) == 323:
+   return mfdcr
+
+   elif get_xop(instr) == 339:
+   return mfspr
+
+   elif get_xop(instr) == 407:
+   return 

[PATCH 0/5] kvmtrace: powerpc support and timestamps for KVM_TRACE

2008-07-10 Thread ehrhardt
From: Christian Ehrhardt [EMAIL PROTECTED]

Chaning kvmtrace code to use timestamp and add powerpc support.

This is the revised series including the feedback from the earlier submission
this week. I now also added the userspace changes which completes the series.

The series contains nine patches (4 generic / 5 powerpc code):

Subject: [PATCH 1/9] kvmtrace: Remove use of bit fields in kvm trace structure 
v3
Introducing a endian save way to store the flags event,tcs, num_data 
Subject: [PATCH 2/9] kvmtrace: replace get_cycles with getnstimeofday
use a cross host comparable timestamp source and make it 32/64 bit save
Subject: [PATCH 3/9] kvmtrace: rename cycles to timestamp
it is no more tracking cycles, so rename variables and change comments
Subject: [PATCH 3/9] kvmppc: kvmtrace: enable KVM_TRACE building for powerpc
enable KVM_TRACE for powerpc in Kconfig/Makefile
Subject: [PATCH 4/9] kvmppc: kvmtrace: adds trace points for ppc tlb activity v2
use KVM_TRACE to track tlb/shadow tlb activity on embedded powerpc
Subject: [PATCH 5/9] kvmppc: kvmtrace: trace powerpc instruction emulation
use KVM_TRACE to track instruction emulation overhead
Subject: [PATCH 7/9] kvm-userspace: kvmtrace_format: add ppc instruction 
emulation
extension to kvmtrace_format to handle complex event types e.g. ppc
instructon emulation
Subject: [PATCH 8/9] kvm-userspace: kvmtrace_format: add statistic section
adding statistic creation/prsentation to the kvmtrace data processing
Subject: [PATCH 9/9] kvm-userspace: kvmtrace: rename cycles to timestamp
it is no more tracking cycles, so rename variables and change comments

Signed-off-by: Christian Ehrhardt [EMAIL PROTECTED]
---

[diffstat]
 arch/powerpc/kvm/emulate.c   |2
 b/arch/powerpc/kvm/44x_tlb.c |   15 ++
 b/arch/powerpc/kvm/Kconfig   |   11 +
 b/arch/powerpc/kvm/Makefile  |6
 b/arch/powerpc/kvm/emulate.c |4
 b/include/linux/kvm.h|   17 ++
 b/user/formats   |   16 --
 b/user/kvmtrace_format   |  276 ---
 b/virt/kvm/kvm_trace.c   |   19 +-
 include/linux/kvm.h  |   10 +
 user/formats |   50 +++
 user/kvmtrace_format |  114 +
 virt/kvm/kvm_trace.c |   29 ++--
 13 files changed, 461 insertions(+), 108 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/9] kvmtrace: Remove use of bit fields in kvm trace structure v3

2008-07-10 Thread ehrhardt
From: Christian Ehrhardt [EMAIL PROTECTED]
From: Jerone Young [EMAIL PROTECTED]

This patch fixes kvmtrace use on big endian systems. When using bit fields the
compiler will lay data out in the wrong order expected when laid down into a
file.
This fixes it by using one variable instead of using bit fields.

Updates in v3:
- fixed macro definition bug in v2
- ensured in macro operator order
- fixed whitespace/indent issues
- removed superfluous initialization

Signed-off-by: Jerone Young [EMAIL PROTECTED]
Signed-off-by: Christian Ehrhardt [EMAIL PROTECTED]
---

[diffstat]
 include/linux/kvm.h  |   17 ++---
 virt/kvm/kvm_trace.c |   19 ++-
 2 files changed, 24 insertions(+), 12 deletions(-)

[diff]

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -311,9 +311,13 @@
 
 /* This structure represents a single trace buffer record. */
 struct kvm_trace_rec {
-   __u32 event:28;
-   __u32 extra_u32:3;
-   __u32 cycle_in:1;
+   /* variable rec_val
+* is split into:
+* bits 0 - 27  - event id
+* bits 28 -30  - number of extra data args of size u32
+* bits 31  - binary indicator for if tsc is in record
+*/
+   __u32 rec_val;
__u32 pid;
__u32 vcpu_id;
union {
@@ -326,6 +330,13 @@
} nocycle;
} u;
 } __attribute__((packed));
+
+#define TRACE_REC_EVENT_ID(val) \
+   (0x0fff  (val))
+#define TRACE_REC_NUM_DATA_ARGS(val) \
+   (0x7000  ((val)  28))
+#define TRACE_REC_TCS(val) \
+   (0x8000  ((val)  31))
 
 #define KVMIO 0xAE
 
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c
--- a/virt/kvm/kvm_trace.c
+++ b/virt/kvm/kvm_trace.c
@@ -54,12 +54,13 @@
struct kvm_trace *kt = kvm_trace;
struct kvm_trace_rec rec;
struct kvm_vcpu *vcpu;
-   inti, extra, size;
+   inti, size;
+   u32extra;
 
if (unlikely(kt-trace_state != KVM_TRACE_STATE_RUNNING))
return;
 
-   rec.event   = va_arg(*args, u32);
+   rec.rec_val = TRACE_REC_EVENT_ID(va_arg(*args, u32));
vcpu= va_arg(*args, struct kvm_vcpu *);
rec.pid = current-tgid;
rec.vcpu_id = vcpu-vcpu_id;
@@ -67,21 +68,21 @@
extra   = va_arg(*args, u32);
WARN_ON(!(extra = KVM_TRC_EXTRA_MAX));
extra   = min_t(u32, extra, KVM_TRC_EXTRA_MAX);
-   rec.extra_u32   = extra;
 
-   rec.cycle_in= p-cycle_in;
-
-   if (rec.cycle_in) {
+   rec.rec_val |= TRACE_REC_TCS(p-cycle_in)
+   | TRACE_REC_NUM_DATA_ARGS(extra);
+   
+   if (p-cycle_in) {
rec.u.cycle.cycle_u64 = get_cycles();
 
-   for (i = 0; i  rec.extra_u32; i++)
+   for (i = 0; i  extra; i++)
rec.u.cycle.extra_u32[i] = va_arg(*args, u32);
} else {
-   for (i = 0; i  rec.extra_u32; i++)
+   for (i = 0; i  extra; i++)
rec.u.nocycle.extra_u32[i] = va_arg(*args, u32);
}
 
-   size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32));
+   size = calc_rec_size(p-cycle_in, extra * sizeof(u32));
relay_write(kt-rchan, rec, size);
 }
 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 8/9] kvm-userspace: kvmtrace_format: add statistic section

2008-07-10 Thread ehrhardt
From: Christian Ehrhardt [EMAIL PROTECTED]

Usually people don't want to read thousands of trace log lines to interpret
the data, a condensed statistic about the traced events is usually better
to read.
This patch adds a new command line switch -s that tells kvmtrace_format to
generate statistics while processing the trace records. Those statistics are
then printed at the end of the output.
This patch contains a statistic function for the ppc instruction emulation.
An example output might look like that:

  mnemonic +  count
---+---
wrteei |   1260
 mfmsr |977
 mtspr |895
 wrtee |742
 mfspr |534
   rfi |179
 mtmsr | 90
   lbz | 53
   stb | 28
   sum =   4758
[...] more detailed statistics about spr, dcr and tlb usage

Signed-off-by: Christian Ehrhardt [EMAIL PROTECTED]
---

[diffstat]
 kvmtrace_format |   73 +++-
 1 file changed, 67 insertions(+), 6 deletions(-)

[diff]
diff --git a/user/kvmtrace_format b/user/kvmtrace_format
--- a/user/kvmtrace_format
+++ b/user/kvmtrace_format
@@ -4,7 +4,7 @@
 
 # Program for reformatting trace buffer output according to user-supplied rules
 
-import re, sys, string, signal, struct, os, getopt
+import re, sys, string, signal, struct, os, getopt, operator
 
 def usage():
 print  sys.stderr, \
@@ -29,6 +29,12 @@
   this script may not be able to keep up with the output of kvmtrace
   if it is piped directly.  In these circumstances you should have
   kvmtrace output to a file for processing off-line.
+
+  kvmtrace_format has the following additional switches
+  -c mhz - specifies the mhz of the traced machine used to convert
+   cycle data in trace records into time
+  -s - if this switch is set additional trace statistics are
+   created and printed at the end of the output
   
 sys.exit(1)
 
@@ -60,6 +66,33 @@
 interrupted = 1
 
 # ppc instruction decoding for event type 0x00020019 (PPC_INSTR)
+# some globals for statistic summaries
+stat_ppc_instr_mnemonic = {};
+stat_ppc_instr_spr = {};
+stat_ppc_instr_dcr = {};
+stat_ppc_instr_tlb = {};
+
+def ppc_instr_print_summary(sortedlist, colname):
+   print \n\n%14s + %10s % (colname, count)
+   print %s % (15*-+++11*-)
+   sum = 0
+   for value, key in sortedlist:
+   sum += key
+   print %14s | %10d % (value, key)
+   print %14s = %10d % (sum, sum)
+
+
+def ppc_instr_summary():
+   # don't print empty statistics
+if stat_ppc_instr_mnemonic:
+   
ppc_instr_print_summary(sorted(stat_ppc_instr_mnemonic.iteritems(), 
key=operator.itemgetter(1), reverse=True), mnemonic)
+if stat_ppc_instr_spr:
+   ppc_instr_print_summary(sorted(stat_ppc_instr_spr.iteritems(), 
key=operator.itemgetter(1), reverse=True), mnemonic-spr)
+if stat_ppc_instr_dcr:
+   ppc_instr_print_summary(sorted(stat_ppc_instr_dcr.iteritems(), 
key=operator.itemgetter(1), reverse=True), mnemonic-dcr)
+if stat_ppc_instr_tlb:
+   ppc_instr_print_summary(sorted(stat_ppc_instr_tlb.iteritems(), 
key=operator.itemgetter(1), reverse=True), mnemonic-tlb)
+
 def get_op(instr):
 return (instr  26);
 
@@ -293,28 +326,53 @@
return UNKNOWN
 
 def get_special(instr):
+   name = get_name(instr);
+   if stat_ppc_instr_mnemonic.has_key(name):
+   stat_ppc_instr_mnemonic[name] += 1
+   else:
+   stat_ppc_instr_mnemonic[name] = 1
+
if get_op(instr) == 31:
if (get_xop(instr) == 339) or (get_xop(instr) == 467):
sprn = get_sprn(instr);
-   return (- sprn 0x%03x %8s % (sprn, 
get_sprn_name(sprn)))
+   sprn_name = get_sprn_name(sprn);
+   stat_idx = name+-+sprn_name
+   if stat_ppc_instr_spr.has_key(stat_idx):
+   stat_ppc_instr_spr[stat_idx] += 1
+   else:
+   stat_ppc_instr_spr[stat_idx] = 1
+   return (- sprn 0x%03x %8s % (sprn, sprn_name))
elif (get_xop(instr) == 323 ) or (get_xop(instr) == 451):
-   return (- dcrn 0x%03x % get_dcrn(instr))
+   dcrn = get_dcrn(instr);
+   stat_idx = name+-+(%04X%dcrn)
+   if stat_ppc_instr_dcr.has_key(stat_idx):
+   stat_ppc_instr_dcr[stat_idx] += 1
+   else:
+   stat_ppc_instr_dcr[stat_idx] = 1
+   return (- dcrn 0x%03x % dcrn)
elif (get_xop(instr) == 978 ) or (get_xop(instr) == 451):
-  

Re: KSM Algorithm

2008-07-10 Thread Sukanto Ghosh
 Doesn't KSM notifies KVM about the shared pages so that KVM can update
 its sptes accordingly or is it done by KSM itself ?


 when using kvm, mmu notifiers is a must for ksm, (mmu notifiers update kvm
 about the changes in the host page table)

 What about KSM being used by someone other than KVM ? Does KSM updates
 the ptes of the shared pages itself ?


 it can be used by any application, it update the ptes of the host, what you
 mean by the shared pages itself?

by itself I meant 'ksm'. So ksm will take care of updating the ptes
of the shared guest pages.






 Where are the shared pages kept ? In kernel memory ? Aren't the shared
 pages always pinned in RAM (due to same reasons for pinned shadowed
 pages) ?



 in the version that was sent to the list it was kernel memory (meaning
 shared pages are not swappable
 (just the pages that are shared not the pages that we scan, when they are
 split beacuse of copy on write it become swappable again
 new version that i will send soon the pages are normal
 anonymous/userspace
 memory that is swappable



 What is the strategy/method which you will use to represent the shared
 memory as normal anonymous/userspace memory ? Then, these pages must
 be staying at one of the guest's address-space (whose pages have been
  shared) ?



 it possible due to another patch that i need to send, that allow modules
 register new reverse mapping call backs...,

ok

but in whose address-space will these shared pages stay ?





 How much is the overhead involved due to this ?



 depeand on the speed you tell it to scan, but the overhead is about ~5%
 for
 common cases


 Can I know the typical values of the parameters : sleep time, no. of
 pages to scan, which you used to arrive at the above mentioned
 overhead.


 i think it is about 256 2000 (256 pages per 2000 usleep)
 (note that most of the cpu is acutlay taken by the copying of the pages when
 a new shared page is created, i have possible way to change it and reduce
 the cpu even more
 but i dont know if i want to do it) (it add more IFs to the fast path inside
 the mm)







Thanks :)

-- 
Regards,
Sukanto Ghosh
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KSM Algorithm

2008-07-10 Thread Izik Eidus

ציטוט Sukanto Ghosh:

Doesn't KSM notifies KVM about the shared pages so that KVM can update
its sptes accordingly or is it done by KSM itself ?

  

when using kvm, mmu notifiers is a must for ksm, (mmu notifiers update kvm
about the changes in the host page table)



What about KSM being used by someone other than KVM ? Does KSM updates
the ptes of the shared pages itself ?

  

it can be used by any application, it update the ptes of the host, what you
mean by the shared pages itself?



by itself I meant 'ksm'. So ksm will take care of updating the ptes
of the shared guest pages.
  


yes



  
  

Where are the shared pages kept ? In kernel memory ? Aren't the shared
pages always pinned in RAM (due to same reasons for pinned shadowed
pages) ?


  

in the version that was sent to the list it was kernel memory (meaning
shared pages are not swappable
(just the pages that are shared not the pages that we scan, when they are
split beacuse of copy on write it become swappable again
new version that i will send soon the pages are normal
anonymous/userspace
memory that is swappable




What is the strategy/method which you will use to represent the shared
memory as normal anonymous/userspace memory ? Then, these pages must
be staying at one of the guest's address-space (whose pages have been
 shared) ?

  

it possible due to another patch that i need to send, that allow modules
register new reverse mapping call backs...,



ok

but in whose address-space will these shared pages stay ?

  


if we have 5 applications sharing the same page, we will have the page 
stay in 5 diffrent address-spaces...


  
  

How much is the overhead involved due to this ?


  

depeand on the speed you tell it to scan, but the overhead is about ~5%
for
common cases



Can I know the typical values of the parameters : sleep time, no. of
pages to scan, which you used to arrive at the above mentioned
overhead.

  

i think it is about 256 2000 (256 pages per 2000 usleep)
(note that most of the cpu is acutlay taken by the copying of the pages when
a new shared page is created, i have possible way to change it and reduce
the cpu even more
but i dont know if i want to do it) (it add more IFs to the fast path inside
the mm)



  



Thanks :)

  


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KSM Algorithm

2008-07-10 Thread Sukanto Ghosh
got it. thanks

On Fri, Jul 11, 2008 at 12:50 AM, Izik Eidus [EMAIL PROTECTED] wrote:
 ציטוט Sukanto Ghosh:

 Doesn't KSM notifies KVM about the shared pages so that KVM can update
 its sptes accordingly or is it done by KSM itself ?



 when using kvm, mmu notifiers is a must for ksm, (mmu notifiers update
 kvm
 about the changes in the host page table)



 What about KSM being used by someone other than KVM ? Does KSM updates
 the ptes of the shared pages itself ?



 it can be used by any application, it update the ptes of the host, what
 you
 mean by the shared pages itself?


 by itself I meant 'ksm'. So ksm will take care of updating the ptes
 of the shared guest pages.


 yes






 Where are the shared pages kept ? In kernel memory ? Aren't the shared
 pages always pinned in RAM (due to same reasons for pinned shadowed
 pages) ?




 in the version that was sent to the list it was kernel memory (meaning
 shared pages are not swappable
 (just the pages that are shared not the pages that we scan, when they
 are
 split beacuse of copy on write it become swappable again
 new version that i will send soon the pages are normal
 anonymous/userspace
 memory that is swappable




 What is the strategy/method which you will use to represent the shared
 memory as normal anonymous/userspace memory ? Then, these pages must
 be staying at one of the guest's address-space (whose pages have been
  shared) ?



 it possible due to another patch that i need to send, that allow modules
 register new reverse mapping call backs...,



 ok

 but in whose address-space will these shared pages stay ?



 if we have 5 applications sharing the same page, we will have the page stay
 in 5 diffrent address-spaces...





 How much is the overhead involved due to this ?




 depeand on the speed you tell it to scan, but the overhead is about ~5%
 for
 common cases



 Can I know the typical values of the parameters : sleep time, no. of
 pages to scan, which you used to arrive at the above mentioned
 overhead.



 i think it is about 256 2000 (256 pages per 2000 usleep)
 (note that most of the cpu is acutlay taken by the copying of the pages
 when
 a new shared page is created, i have possible way to change it and reduce
 the cpu even more
 but i dont know if i want to do it) (it add more IFs to the fast path
 inside
 the mm)








 Thanks :)







-- 
Regards,
Sukanto Ghosh


Re: [PATCH] mask out clflush

2008-07-10 Thread Anthony Liguori

Yang, Sheng wrote:

On Wednesday 09 July 2008 02:29:44 Glauber Costa wrote:
  

clflush is a non-privileged instruction that flushes the cacheline
given by its parameter, in terms of linear address. As it is
non-privileged, it is quite tricky, because a guest doing clflush
will actually be trying to flush a host kernel address.



The linear address was convert to host physical address, then cache 
line was flushed. Of course the host physical address was used by 
guest at the time. I don't understand why we need to prevent guest 
from flushing cache line related to itself...
  


The problem turned out to be that we aren't emulating clflush in 
x86_emulate.


Regards,

Anthony Liguori


--
Thanks
Yang, Sheng
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
  


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/5] kvmtrace: make cycle calculation architecture aware

2008-07-10 Thread Avi Kivity

Yang, Sheng wrote:

On Wednesday 09 July 2008 23:03:19 Hollis Blanchard wrote:
  

On Wed, 2008-07-09 at 11:17 +0200, Christian Ehrhardt wrote:


So the question that is left before changing that is, if the
original author had something special in mind chosing cycles
here. I added Eric on CC for that.

I wait with my resubmission of the patch series until all
architectures agree *hope* on using getnstimeofday() - after an
ack from all sides I would revise my patch series and submit that
changes alltogether.
  

I got an email bounce from Eric the last time I tried to email him,
so I'm not sure he's still with Intel.

However, I don't think he had any special intention; I think he was
just porting xentrace to KVM.



Eric had completed his internship in Intel, so...

I like the term timestamp too. I think he used cycles only because 
there is a function called get_cycles().


But instead of getnstimeofday(), I suggest using ktime_get() here. 
It's little more precise than getnstimeofday(), and ktime_t is more 
easily to be handled. And I think the overhead it brought can be 
ignored too.
  


What is the overhead of ktime_get()?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] mask out clflush

2008-07-10 Thread Avi Kivity

Anthony Liguori wrote:

Yang, Sheng wrote:

On Wednesday 09 July 2008 02:29:44 Glauber Costa wrote:
 

clflush is a non-privileged instruction that flushes the cacheline
given by its parameter, in terms of linear address. As it is
non-privileged, it is quite tricky, because a guest doing clflush
will actually be trying to flush a host kernel address.



The linear address was convert to host physical address, then cache 
line was flushed. Of course the host physical address was used by 
guest at the time. I don't understand why we need to prevent guest 
from flushing cache line related to itself...
  


The problem turned out to be that we aren't emulating clflush in 
x86_emulate.




Why would clflush trap?  Is it called from real mode?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KSM Algorithm

2008-07-10 Thread Sukanto Ghosh
One more query, what if multiple processes call ioctl KSM_CREATE_SCAN
? Will there be multiple scanners ?

Consider a scenario where two processes A  B separately call
KSM_CREATE_SCAN and then start registering some memory pages/areas via
KSM_CREATE_SHARED_MEMORY_AREA ioctl.
Lets say A registers 4 pages having content X, Y, Z and X . And,
process B registers 2 pages having content Z and X.
So, will there be only single copy each of X, Y and Z in the entire
system or will they be kept separately i.e the scanner for A maintains
X, Y, Z single copy each and scanner for B maintains X and Z
separately.


-- 
Regards,
Sukanto Ghosh
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KSM Algorithm

2008-07-10 Thread Izik Eidus

ציטוט Sukanto Ghosh:

One more query, what if multiple processes call ioctl KSM_CREATE_SCAN
? Will there be multiple scanners ?
  

yes


Consider a scenario where two processes A  B separately call
KSM_CREATE_SCAN and then start registering some memory pages/areas via
KSM_CREATE_SHARED_MEMORY_AREA ioctl.
Lets say A registers 4 pages having content X, Y, Z and X . And,
process B registers 2 pages having content Z and X.
So, will there be only single copy each of X, Y and Z in the entire
system or will they be kept separately i.e the scanner for A maintains
X, Y, Z single copy each and scanner for B maintains X and Z
separately.
  

there will be only one copy,
ksm is multi threaded safe.



  


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Ignore DEBUGCTL MSRs

2008-07-10 Thread Avi Kivity

Alexander Graf wrote:

Avi Kivity wrote:

Alexander Graf wrote:
Netware writes and reads to the DEBUGCTL and LAST*IP MSRs without 
further checks and is really confused to receive a #GP during that. 
To make it happy we should just make them stubs, which is exactly 
what SVM already does.


To support VMX too, I put these in the generic code. Maybe the SVM 
code could be cleaned up to use generic code too.




Please add a pr_unimpl() when bits that cause a real processor to do 
something are set.


Like this? I also removed the set handlers for the *IP MSRs, as these 
are read only and made it only handle debug bits, no perfmon bits.




With a changelog entry.


Signed-off-by: Alexander Graf [EMAIL PROTECTED]


diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fc0721e..10f5e95 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -609,6 +609,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 
data)
pr_unimpl(vcpu, %s: MSR_IA32_MCG_CTL 0x%llx, nop\n,
__func__, data);
break;
+   case MSR_IA32_DEBUGCTLMSR:
+   if (data  (DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
+   /* Values other than LBR and BTF are vendor-specific,
+  thus reserved and should throw a #GP */
+   return 1;
+   }
  

'' is too clever. ' ~(u64)(...)' is clearer.

Arithmetic and logical ops don't mix well.



+   pr_unimpl(vcpu, %s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n,
+   __func__, data);
  


We can avoid the printout if data == 0, since we support that case fully.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] mask out clflush

2008-07-10 Thread Avi Kivity

Anthony Liguori wrote:


It's equivalent to a read from a VT perspective so if the read would 
trap, the clflush instruction will trap.




Reads don't normally go through the emulator.  Is the guest clflush()ing 
mmio addresses?  Strange as these are not normally cached.



--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] mask out clflush

2008-07-10 Thread Avi Kivity

Glauber Costa wrote:

clflush is a non-privileged instruction that flushes the cacheline
given by its parameter, in terms of linear address. As it is non-privileged,
it is quite tricky, because a guest doing clflush will actually be trying to
flush a host kernel address.
  


We need to allow clflush for pci device assignment.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] mask out clflush

2008-07-10 Thread Anthony Liguori

Avi Kivity wrote:

Anthony Liguori wrote:


It's equivalent to a read from a VT perspective so if the read would 
trap, the clflush instruction will trap.




Reads don't normally go through the emulator.  Is the guest 
clflush()ing mmio addresses?  Strange as these are not normally cached.


It seems so, Glauber mentioned that the address was an MMIO address.

Regards,

Anthony Liguori

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Add HPET support to BIOS

2008-07-10 Thread Ryan Harper
* Elizabeth Kon [EMAIL PROTECTED] [2008-07-09 23:07]:
 This patch, written by Ryan Harper, adds HPET support to BIOS.

Based on hpet changes to Xen's hvm firmware/rombios.

 Signed-off-by: Beth Kon [EMAIL PROTECTED]
Signed-off-by: Ryan Harper [EMAIL PROTECTED]


-- 
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
[EMAIL PROTECTED]
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ kvm-Bugs-2009439 ] data corruption with virtio-blk

2008-07-10 Thread SourceForge.net
Bugs item #2009439, was opened at 2008-07-03 06:51
Message generated for change (Settings changed) made by markmc
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2009439group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Closed
Resolution: Fixed
Priority: 5
Private: No
Submitted By: Balaji Rao R (balajirrao)
Assigned to: Nobody/Anonymous (nobody)
Summary: data corruption with virtio-blk

Initial Comment:
kvm-userspace : kvm-70-138-g163308a

With a kernel from linux-2.6.git (v2.6.26-rc8-89-ge1441b9), data corruption is 
seen on block devices when exported through virtio. Once a block device is 
mounted and unmounted in a guest, the kernel   is not able to find a filesystem 
in the block device when  being remounted. This is seen from the host as well.


--

Comment By: Mark McLoughlin (markmc)
Date: 2008-07-07 09:32

Message:
Logged In: YES 
user_id=116392
Originator: NO

Should be fixed by:

 
http://git.kernel.org/?p=virt/kvm/kvm-userspace.git;a=commit;h=b5a5e894a



--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detailatid=893831aid=2009439group_id=180599
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: PCIPT: VT-d support

2008-07-10 Thread Muli Ben-Yehuda
On Thu, Jul 10, 2008 at 06:07:27PM +0800, Yang, Sheng wrote:

 I think it's may be unnecessary to map pages when device
 assigned. The table can be set up along with set_memory_region(), it
 covered all memory slots already, or I miss something here?

VT-d is only initialized after the slots are originally created, so
when VT-d is initialized is map all of the existing slots separately,
and then for each new slot that may be added we'll catch it via
set_memory_region().

Cheers,
Muli
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction

2008-07-10 Thread Avi Kivity

Marcelo Tosatti wrote:

On Mon, Jul 07, 2008 at 02:31:55PM -0300, Marcelo Tosatti wrote:
  

On Sun, Jul 06, 2008 at 12:15:56AM +0300, Avi Kivity wrote:


Marcelo Tosatti wrote:
  

On Sat, Jul 05, 2008 at 08:25:30PM +0300, Avi Kivity wrote:
  


@@ -1955,6 +1955,22 @@ void kvm_mmu_slot_remove_write_access(st
}
 }
 +int kvm_mmu_slot_has_shadowed_page(struct kvm *kvm, int slot)
+{
+   struct kvm_mmu_page *sp;
+   int ret = 0;
+
+   spin_lock(kvm-mmu_lock);
+   list_for_each_entry(sp, kvm-arch.active_mmu_pages, link) {
+   if (test_bit(slot, sp-slot_bitmap)) {
+   ret = -EINVAL;
+   break;
+   }
+   }
+   spin_unlock(kvm-mmu_lock);
+   return ret;
+}
+


I don't like the guest influencing host actions in this way.  It's 
just  a guest.


But I think it's unneeded.  kvm_mmu_zap_page() will mark a root 
shadow  page invalid and force all vcpus to reload it, so all that's 
needed is  to keep the mmu spinlock held while removing the slot.

  

You're still keeping a shadowed page around with sp-gfn pointing to
non-existant memslot. The code generally makes the assumption that
gfn_to_memslot(gfn) on shadowed info will not fail.

kvm_mmu_zap_page - unaccount_shadowed, for example.

  

The page has already been zapped, so we might as well  
unaccount_shadowed() on the first run.  It needs to be moved until after  
the reload_remote_mmus() call, though.
  


Oops, previous patch was unaccounting multiple times for invalid pages.
This should be better:

 
During RH6.2 graphical installation the following oops is triggered:


 BUG: unable to handle kernel NULL pointer dereference at 
 IP: [a00bf172] :kvm:gfn_to_rmap+0x3e/0x61
 Pid: 4559, comm: qemu-system-x86 Not tainted
 
The problem is that KVM allows shadow pagetable entries that

point to a removed memslot to exist. In this case the cirrus vram
mapping was removed, and the NULL dereference happened during
kvm_set_memory_alias()'s zap_all_pages().

So nuke all shadowed pages before memslot removal.
 
Signed-off-by: Marcelo Tosatti [EMAIL PROTECTED]



diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index a4cf4a2..76259da 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1455,6 +1455,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
return 0;
 }
 
+int kvm_arch_destroy_memory_region(struct kvm *kvm, int slot)

+{
+   return 0;
+}
 
  


This (and its friends) ought to be static inlines.

On the other hand, don't the other arches have to flush their tlbs?  
Xiantao/Hollis?  So maybe this function needs to be renamed 
kvm_flush_shadow() and implemented across the board.



diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b90da0b..5ef3a5e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -405,6 +405,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (mem-slot = kvm-nmemslots)
kvm-nmemslots = mem-slot + 1;
 
+	if (!npages) {

+   r = kvm_arch_destroy_memory_region(kvm, mem-slot);
+   if (r)
+   goto out_free;
+   }
+
  


Destructors should never fail, since there is no possible recovery.  And 
indeed you have 'return 0' in the actual implementation.  So I think the 
function better return void.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] mask out clflush

2008-07-10 Thread Glauber Costa
On Thu, Jul 10, 2008 at 11:20 AM, Anthony Liguori [EMAIL PROTECTED] wrote:
 Avi Kivity wrote:

 Anthony Liguori wrote:

 It's equivalent to a read from a VT perspective so if the read would
 trap, the clflush instruction will trap.


 Reads don't normally go through the emulator.  Is the guest clflush()ing
 mmio addresses?  Strange as these are not normally cached.

 It seems so, Glauber mentioned that the address was an MMIO address.

yes. It is address 0xc8821000, apparently part of a pci controller
initialization.

 Regards,

 Anthony Liguori





-- 
Glauber Costa.
Free as in Freedom
http://glommer.net

The less confident you are, the more serious you have to act.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] mask out clflush

2008-07-10 Thread Avi Kivity

Glauber Costa wrote:

On Thu, Jul 10, 2008 at 11:20 AM, Anthony Liguori [EMAIL PROTECTED] wrote:
  

Avi Kivity wrote:


Anthony Liguori wrote:
  

It's equivalent to a read from a VT perspective so if the read would
trap, the clflush instruction will trap.



Reads don't normally go through the emulator.  Is the guest clflush()ing
mmio addresses?  Strange as these are not normally cached.
  

It seems so, Glauber mentioned that the address was an MMIO address.



yes. It is address 0xc8821000, apparently part of a pci controller
initialization.
  


qemu pci starts at 0xe000 IIRC.  So maybe the guest is flushing 
random addresses just to be annoying.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/7][RFC] Enable kvm/ia-64 to build kvm components in userspace.

2008-07-10 Thread Avi Kivity

Zhang, Xiantao wrote:


I agree that for automatic testing it's more of a burden; but it needs
to be done, especially as some kvm features are only enabled on newer
kernels.

The external module is convenient, but it's not a substitute for the
real thing.



So, I have a question here, When will you drop external module support?
  


I don't really know.


You know, it blocks our auto-testing system now, we have to re-evaulate
the effort without external module support. 
  


I think it's very worthwhile to update the autotester to be able to 
build kernels. kvm will soon depend on core functionality (mmu 
notifiers) and since currently kvm is the only user (well, gru will be 
added soon, but ...) we need to test it ourselves.



If it won't be dropped in a few weeks, we are eager to get its support
for kvm-ia64.

  


Okay, please send the patches, but with diff -M so they're readable.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: PCIPT: VT-d support

2008-07-10 Thread Avi Kivity

Muli Ben-Yehuda wrote:

On Thu, Jul 10, 2008 at 06:07:27PM +0800, Yang, Sheng wrote:

  

I think it's may be unnecessary to map pages when device
assigned. The table can be set up along with set_memory_region(), it
covered all memory slots already, or I miss something here?



VT-d is only initialized after the slots are originally created, so
when VT-d is initialized is map all of the existing slots separately,
and then for each new slot that may be added we'll catch it via
set_memory_region().
  


It makes sense to initialize VT-d before.  Since memslots can be created 
and destroyed dynamically, with the current implementation we can see


 create slot
 create slot
 init VT-d
 create slot

which means we need to support both slot-creation-after-VT-d and 
init-VT-d-after-slot-creation.  If we initialize VT-d up front, we only 
need to support (and test) one scenario.


On the other hand, this means that you will not be able to assign 
devices unless you specified this when creating the VM; but I think this 
is fair.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] mask out clflush

2008-07-10 Thread Anthony Liguori

Avi Kivity wrote:

Glauber Costa wrote:
On Thu, Jul 10, 2008 at 11:20 AM, Anthony Liguori 
[EMAIL PROTECTED] wrote:
 

Avi Kivity wrote:
   

Anthony Liguori wrote:
 

It's equivalent to a read from a VT perspective so if the read would
trap, the clflush instruction will trap.


Reads don't normally go through the emulator.  Is the guest 
clflush()ing

mmio addresses?  Strange as these are not normally cached.
  

It seems so, Glauber mentioned that the address was an MMIO address.



yes. It is address 0xc8821000, apparently part of a pci controller
initialization.
  


qemu pci starts at 0xe000 IIRC.  So maybe the guest is flushing 
random addresses just to be annoying.


That's a virtual address, not a physical address IIUC.

Regards,

Anthony Liguori


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] mask out clflush

2008-07-10 Thread Avi Kivity

Anthony Liguori wrote:


yes. It is address 0xc8821000, apparently part of a pci controller
initialization.
  


qemu pci starts at 0xe000 IIRC.  So maybe the guest is flushing 
random addresses just to be annoying.


That's a virtual address, not a physical address IIUC.



Ah, of course.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Add HPET support to BIOS

2008-07-10 Thread Sebastian Herbszt

Beth Kon wrote:

This patch, written by Ryan Harper, adds HPET support to BIOS.

Signed-off-by: Beth Kon [EMAIL PROTECTED]

diff --git a/bios/Makefile b/bios/Makefile
index 48022ea..3e73fb5 100644
--- a/bios/Makefile
+++ b/bios/Makefile
@@ -40,7 +40,7 @@ LIBS =  -lm
RANLIB = ranlib

BCC = bcc
-GCC = gcc -m32
+GCC = gcc -m32 -fno-stack-protector
HOST_CC = gcc
AS86 = as86

diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl
index d1bfa2c..1548c86 100755
--- a/bios/acpi-dsdt.dsl
+++ b/bios/acpi-dsdt.dsl
@@ -262,6 +262,24 @@ DefinitionBlock (
Return (MEMP)
}
}
+Device(HPET) {
+Name(_HID,  EISAID(PNP0103))
+Name(_UID, 0)


_UID is optional if only one timer block is present.


+Method (_STA, 0, NotSerialized) {
+Return(0x00)


Not present?


+}
+Name(_CRS, ResourceTemplate() {
+DWordMemory(
+ResourceConsumer, PosDecode, MinFixed, MaxFixed,
+NonCacheable, ReadWrite,
+0x,
+0xFED0,
+0xFED003FF,
+0x,
+0x0400 /* 1K memory: FED0 - FED003FF */
+)
+})
+}
}

Scope(\_SB.PCI0) {
@@ -628,7 +646,7 @@ DefinitionBlock (
{
Or (PRQ3, 0x80, PRQ3)
}
-Method (_CRS, 0, NotSerialized)
+Method (_CRS, 1, NotSerialized)
{
Name (PRR0, ResourceTemplate ()
{


Is this change related?


diff --git a/bios/rombios32.c b/bios/rombios32.c
index 2dc1d25..c1ec015 100755
--- a/bios/rombios32.c
+++ b/bios/rombios32.c
@@ -1182,7 +1182,7 @@ struct rsdp_descriptor /* Root System
Descriptor Pointer */
struct rsdt_descriptor_rev1
{
 ACPI_TABLE_HEADER_DEF   /* ACPI common table
header */
- uint32_t table_offset_entry [2]; /* Array
of pointers to other */
+ uint32_t table_offset_entry [3]; /* Array
of pointers to other */
 /* ACPI tables */
};

@@ -1322,6 +1322,30 @@ struct madt_processor_apic
#endif
};

+/*
+ * ACPI 2.0 Generic Address Space definition.
+ */
+struct acpi_20_generic_address {
+uint8_t  address_space_id;
+uint8_t  register_bit_width;
+uint8_t  register_bit_offset;
+uint8_t  reserved;
+uint64_t address;
+};
+
+/*
+ * HPET Description Table
+ */
+struct acpi_20_hpet {
+ACPI_TABLE_HEADER_DEF   /* ACPI common
table header */
+uint32_t   timer_block_id;
+struct acpi_20_generic_address addr;
+uint8_thpet_number;
+uint16_t   min_tick;
+uint8_tpage_protect;
+};
+#define ACPI_HPET_ADDRESS 0xFED0UL
+
struct madt_io_apic
{
 APIC_HEADER_DEF
@@ -1393,8 +1417,9 @@ void acpi_bios_init(void)
struct fadt_descriptor_rev1 *fadt;
struct facs_descriptor_rev1 *facs;
struct multiple_apic_table *madt;
+struct acpi_20_hpet *hpet;
uint8_t *dsdt;
-uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr,
dsdt_addr;
+uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr,
dsdt_addr, hpet_addr;
uint32_t acpi_tables_size, madt_addr, madt_size;
int i;

@@ -1436,6 +1461,11 @@ void acpi_bios_init(void)
madt = (void *)(addr);
addr += madt_size;

+addr = (addr + 7)  ~7;
+hpet_addr = addr;
+hpet = (void *)(addr);
+addr += sizeof(*hpet);
+
acpi_tables_size = addr - base_addr;

BX_INFO(ACPI tables: RSDP addr=0x%08lx ACPI DATA addr=0x%08lx
size=0x%x\n,
@@ -1457,6 +1487,7 @@ void acpi_bios_init(void)
memset(rsdt, 0, sizeof(*rsdt));
rsdt-table_offset_entry[0] = cpu_to_le32(fadt_addr);
rsdt-table_offset_entry[1] = cpu_to_le32(madt_addr);
+rsdt-table_offset_entry[2] = cpu_to_le32(hpet_addr);
acpi_build_table_header((struct acpi_table_header *)rsdt,
RSDT, sizeof(*rsdt), 1);

@@ -1540,6 +1571,15 @@ void acpi_bios_init(void)
acpi_build_table_header((struct acpi_table_header *)madt,
APIC, madt_size, 1);
}
+
+/* HPET */
+memset(hpet, 0, sizeof(*hpet));
+hpet-timer_block_id = cpu_to_le32(0x8086a201);
+   // hpet-timer_block_id = cpu_to_le32(0x80862201);


This magic value could need some explanation so people don't have to look it 
up.
Something like:
8086 = pci vendor id
a201 = 10100011
1  LegacyReplacement IRQ Routing Capable
 0reserved
  1   COUNT_SIZE_CAP counter size
   00010 Number of Comparators
0001  Hardwave revision id

Also add a comment that it should be kept in sync with the emulation (hpet.c).

- Sebastian


+hpet-addr.address = cpu_to_le32(ACPI_HPET_ADDRESS);
+

Re: [Regression] kvm-userspace: VM freezes after booting FreeDOS

2008-07-10 Thread Mohammed Gamal
On Thu, Jul 10, 2008 at 6:47 PM, Mohammed Gamal [EMAIL PROTECTED] wrote:
 After updating my kvm-userspace tree to the latest git tree. I am no
 more able to run FreeDOS. The VM freezes after choosing any of the
 boot options. I am running both latest kvm.git and kvm-userspace.git


Forgot to mention that this problem doesn't occur with -no-kvm switch.
However, the problem still occurs with -no-kvm-pit and -no-kvm-irqchip
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Add HPET support to BIOS

2008-07-10 Thread Ryan Harper
* Sebastian Herbszt [EMAIL PROTECTED] [2008-07-10 10:46]:

Hey Sebastian, 

Thanks for the review,

 Beth Kon wrote:
 This patch, written by Ryan Harper, adds HPET support to BIOS.
 
 Signed-off-by: Beth Kon [EMAIL PROTECTED]
 
 diff --git a/bios/Makefile b/bios/Makefile
 index 48022ea..3e73fb5 100644
 --- a/bios/Makefile
 +++ b/bios/Makefile
 @@ -40,7 +40,7 @@ LIBS =  -lm
 RANLIB = ranlib
 
 BCC = bcc
 -GCC = gcc -m32
 +GCC = gcc -m32 -fno-stack-protector
 HOST_CC = gcc
 AS86 = as86
 
 diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl
 index d1bfa2c..1548c86 100755
 --- a/bios/acpi-dsdt.dsl
 +++ b/bios/acpi-dsdt.dsl
 @@ -262,6 +262,24 @@ DefinitionBlock (
 Return (MEMP)
 }
 }
 +Device(HPET) {
 +Name(_HID,  EISAID(PNP0103))
 +Name(_UID, 0)
 
 _UID is optional if only one timer block is present.

OK
 
 +Method (_STA, 0, NotSerialized) {
 +Return(0x00)
 
 Not present?

Was playing around with this when trying to get Linux to see the device
in the ACPI tables. AFAICT, Linux doesn't care about this value.  Should
be 1 here then?

 
 +}
 +Name(_CRS, ResourceTemplate() {
 +DWordMemory(
 +ResourceConsumer, PosDecode, MinFixed, MaxFixed,
 +NonCacheable, ReadWrite,
 +0x,
 +0xFED0,
 +0xFED003FF,
 +0x,
 +0x0400 /* 1K memory: FED0 - FED003FF */
 +)
 +})
 +}
 }
 
 Scope(\_SB.PCI0) {
 @@ -628,7 +646,7 @@ DefinitionBlock (
 {
 Or (PRQ3, 0x80, PRQ3)
 }
 -Method (_CRS, 0, NotSerialized)
 +Method (_CRS, 1, NotSerialized)
 {
 Name (PRR0, ResourceTemplate ()
 {
 
 Is this change related?

Doubtful, I'll confirm whether or not it is needed.

 
 diff --git a/bios/rombios32.c b/bios/rombios32.c
 index 2dc1d25..c1ec015 100755
 --- a/bios/rombios32.c
 +++ b/bios/rombios32.c
 @@ -1182,7 +1182,7 @@ struct rsdp_descriptor /* Root System
 Descriptor Pointer */
 struct rsdt_descriptor_rev1
 {
  ACPI_TABLE_HEADER_DEF   /* ACPI common table
 header */
 - uint32_t table_offset_entry [2]; /* Array
 of pointers to other */
 + uint32_t table_offset_entry [3]; /* Array
 of pointers to other */
  /* ACPI tables */
 };
 
 @@ -1322,6 +1322,30 @@ struct madt_processor_apic
 #endif
 };
 
 +/*
 + * ACPI 2.0 Generic Address Space definition.
 + */
 +struct acpi_20_generic_address {
 +uint8_t  address_space_id;
 +uint8_t  register_bit_width;
 +uint8_t  register_bit_offset;
 +uint8_t  reserved;
 +uint64_t address;
 +};
 +
 +/*
 + * HPET Description Table
 + */
 +struct acpi_20_hpet {
 +ACPI_TABLE_HEADER_DEF   /* ACPI common
 table header */
 +uint32_t   timer_block_id;
 +struct acpi_20_generic_address addr;
 +uint8_thpet_number;
 +uint16_t   min_tick;
 +uint8_tpage_protect;
 +};
 +#define ACPI_HPET_ADDRESS 0xFED0UL
 +
 struct madt_io_apic
 {
  APIC_HEADER_DEF
 @@ -1393,8 +1417,9 @@ void acpi_bios_init(void)
 struct fadt_descriptor_rev1 *fadt;
 struct facs_descriptor_rev1 *facs;
 struct multiple_apic_table *madt;
 +struct acpi_20_hpet *hpet;
 uint8_t *dsdt;
 -uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr,
 dsdt_addr;
 +uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr,
 dsdt_addr, hpet_addr;
 uint32_t acpi_tables_size, madt_addr, madt_size;
 int i;
 
 @@ -1436,6 +1461,11 @@ void acpi_bios_init(void)
 madt = (void *)(addr);
 addr += madt_size;
 
 +addr = (addr + 7)  ~7;
 +hpet_addr = addr;
 +hpet = (void *)(addr);
 +addr += sizeof(*hpet);
 +
 acpi_tables_size = addr - base_addr;
 
 BX_INFO(ACPI tables: RSDP addr=0x%08lx ACPI DATA addr=0x%08lx
 size=0x%x\n,
 @@ -1457,6 +1487,7 @@ void acpi_bios_init(void)
 memset(rsdt, 0, sizeof(*rsdt));
 rsdt-table_offset_entry[0] = cpu_to_le32(fadt_addr);
 rsdt-table_offset_entry[1] = cpu_to_le32(madt_addr);
 +rsdt-table_offset_entry[2] = cpu_to_le32(hpet_addr);
 acpi_build_table_header((struct acpi_table_header *)rsdt,
 RSDT, sizeof(*rsdt), 1);
 
 @@ -1540,6 +1571,15 @@ void acpi_bios_init(void)
 acpi_build_table_header((struct acpi_table_header *)madt,
 APIC, madt_size, 1);
 }
 +
 +/* HPET */
 +memset(hpet, 0, sizeof(*hpet));
 +hpet-timer_block_id = cpu_to_le32(0x8086a201);
 +   // hpet-timer_block_id = cpu_to_le32(0x80862201);
 
 This magic value could need some explanation so people don't have to look 
 it up.
 Something like:
 8086 = pci 

Re: [PATCH] Add HPET support to BIOS

2008-07-10 Thread Sebastian Herbszt

Ryan Harper wrote:



Hey Sebastian, 


Thanks for the review,


Beth Kon wrote:
This patch, written by Ryan Harper, adds HPET support to BIOS.

Signed-off-by: Beth Kon [EMAIL PROTECTED]

diff --git a/bios/Makefile b/bios/Makefile
index 48022ea..3e73fb5 100644
--- a/bios/Makefile
+++ b/bios/Makefile
@@ -40,7 +40,7 @@ LIBS =  -lm
RANLIB = ranlib

BCC = bcc
-GCC = gcc -m32
+GCC = gcc -m32 -fno-stack-protector
HOST_CC = gcc
AS86 = as86

diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl
index d1bfa2c..1548c86 100755
--- a/bios/acpi-dsdt.dsl
+++ b/bios/acpi-dsdt.dsl
@@ -262,6 +262,24 @@ DefinitionBlock (
Return (MEMP)
}
}
+Device(HPET) {
+Name(_HID,  EISAID(PNP0103))
+Name(_UID, 0)

_UID is optional if only one timer block is present.


OK


+Method (_STA, 0, NotSerialized) {
+Return(0x00)

Not present?


Was playing around with this when trying to get Linux to see the device
in the ACPI tables. AFAICT, Linux doesn't care about this value.  Should
be 1 here then?


I would suggest 0x0F (present, enabled and more).

It would be nice to runtime detect the presence of the hpet and return the
proper value, e.g. 0x0 if not present and skip the HPET ACPI table creation.
The Xen DSDT does it with the help of a bios info table which gets created at
runtime. It detects the hpet by reading the vendor id from HPET_BASE.
Something like this might also be possible inside the DSDT (OperationRegion,
Field and LEqual).



+}
+Name(_CRS, ResourceTemplate() {
+DWordMemory(
+ResourceConsumer, PosDecode, MinFixed, MaxFixed,
+NonCacheable, ReadWrite,
+0x,
+0xFED0,
+0xFED003FF,
+0x,
+0x0400 /* 1K memory: FED0 - FED003FF */
+)
+})
+}
}

Scope(\_SB.PCI0) {
@@ -628,7 +646,7 @@ DefinitionBlock (
{
Or (PRQ3, 0x80, PRQ3)
}
-Method (_CRS, 0, NotSerialized)
+Method (_CRS, 1, NotSerialized)
{
Name (PRR0, ResourceTemplate ()
{

Is this change related?


Doubtful, I'll confirm whether or not it is needed.



diff --git a/bios/rombios32.c b/bios/rombios32.c
index 2dc1d25..c1ec015 100755
--- a/bios/rombios32.c
+++ b/bios/rombios32.c
@@ -1182,7 +1182,7 @@ struct rsdp_descriptor /* Root System
Descriptor Pointer */
struct rsdt_descriptor_rev1
{
 ACPI_TABLE_HEADER_DEF   /* ACPI common table
header */
- uint32_t table_offset_entry [2]; /* Array
of pointers to other */
+ uint32_t table_offset_entry [3]; /* Array
of pointers to other */
 /* ACPI tables */
};

@@ -1322,6 +1322,30 @@ struct madt_processor_apic
#endif
};

+/*
+ * ACPI 2.0 Generic Address Space definition.
+ */
+struct acpi_20_generic_address {
+uint8_t  address_space_id;
+uint8_t  register_bit_width;
+uint8_t  register_bit_offset;
+uint8_t  reserved;
+uint64_t address;
+};
+
+/*
+ * HPET Description Table
+ */
+struct acpi_20_hpet {
+ACPI_TABLE_HEADER_DEF   /* ACPI common
table header */
+uint32_t   timer_block_id;
+struct acpi_20_generic_address addr;
+uint8_thpet_number;
+uint16_t   min_tick;
+uint8_tpage_protect;
+};
+#define ACPI_HPET_ADDRESS 0xFED0UL
+
struct madt_io_apic
{
 APIC_HEADER_DEF
@@ -1393,8 +1417,9 @@ void acpi_bios_init(void)
struct fadt_descriptor_rev1 *fadt;
struct facs_descriptor_rev1 *facs;
struct multiple_apic_table *madt;
+struct acpi_20_hpet *hpet;
uint8_t *dsdt;
-uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr,
dsdt_addr;
+uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr,
dsdt_addr, hpet_addr;
uint32_t acpi_tables_size, madt_addr, madt_size;
int i;

@@ -1436,6 +1461,11 @@ void acpi_bios_init(void)
madt = (void *)(addr);
addr += madt_size;

+addr = (addr + 7)  ~7;
+hpet_addr = addr;
+hpet = (void *)(addr);
+addr += sizeof(*hpet);
+
acpi_tables_size = addr - base_addr;

BX_INFO(ACPI tables: RSDP addr=0x%08lx ACPI DATA addr=0x%08lx
size=0x%x\n,
@@ -1457,6 +1487,7 @@ void acpi_bios_init(void)
memset(rsdt, 0, sizeof(*rsdt));
rsdt-table_offset_entry[0] = cpu_to_le32(fadt_addr);
rsdt-table_offset_entry[1] = cpu_to_le32(madt_addr);
+rsdt-table_offset_entry[2] = cpu_to_le32(hpet_addr);
acpi_build_table_header((struct acpi_table_header *)rsdt,
RSDT, sizeof(*rsdt), 1);

@@ -1540,6 +1571,15 @@ void acpi_bios_init(void)
acpi_build_table_header((struct acpi_table_header *)madt,
APIC, madt_size, 1);
}
+
+/* HPET */

Re: [Regression] kvm-userspace: VM freezes after booting FreeDOS

2008-07-10 Thread Anthony Liguori

Mohammed Gamal wrote:

On Thu, Jul 10, 2008 at 7:31 PM, Anthony Liguori [EMAIL PROTECTED] wrote:
  

Mohammed Gamal wrote:


After updating my kvm-userspace tree to the latest git tree. I am no
more able to run FreeDOS. The VM freezes after choosing any of the
boot options. I am running both latest kvm.git and kvm-userspace.git

  

Can you bisect to the commit that broke it?

Regards,

Anthony Liguori



I did, it seems that commit 04c3265c95c12e7c6e73cfd07357c05db6a1bda8
caused it. Everything works fine after I reverted it.
  


Thats:

commit 04c3265c95c12e7c6e73cfd07357c05db6a1bda8
Author: Jan Kiszka [EMAIL PROTECTED]
Date:   Fri May 16 09:21:47 2008 +0200

   kvm: qemu: consolidate kvm_eat_signal
  
   With the io thread signals are rare.  Remove looping over

   kvm_eat_signal and fold everything into kvm_main_loop_wait.
  
   Signed-off-by: Jan Kiszka [EMAIL PROTECTED]

   Signed-off-by: Avi Kivity [EMAIL PROTECTED]


Hrm, that's disturbing.  Unfortunately, the patch is a bit more than 
just code motion.  It does change the number of times the lock is 
released and makes some minor changes to the code.  Nothing obvious that 
would cause the breakage without digging into it.


How did you create your freedos image?  I'd like to try and figure out 
what the problem is here since it may be causing other issues.


Regards,

Anthony Liguori


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
  


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] mask out clflush

2008-07-10 Thread Glauber Costa
On Thu, Jul 10, 2008 at 12:39 PM, Avi Kivity [EMAIL PROTECTED] wrote:
 Anthony Liguori wrote:

 yes. It is address 0xc8821000, apparently part of a pci controller
 initialization.


 qemu pci starts at 0xe000 IIRC.  So maybe the guest is flushing
 random addresses just to be annoying.

 That's a virtual address, not a physical address IIUC.


 Ah, of course.

How's that one ?

 --
 error compiling committee.c: too many arguments to function





-- 
Glauber Costa.
Free as in Freedom
http://glommer.net

The less confident you are, the more serious you have to act.


0001-properly-decode-clflush.patch
Description: Binary data


Re: patch: qemu + hugetlbfs..

2008-07-10 Thread john cooper

Anthony Liguori wrote:


+#include asm/param.h
  


I don't think this is necessary anymore.  Depending on a Linux headers 
breaks the QEMU build on other unices so it's a bad thing.


It is no longer required, but see below.

hpage is a misnomer too as we aren't actually dependent on huge pages (this 
code should work equally well for tmpfs).


As it currently exists alloc_hpage_mem() is tied to
the notion of huge page allocation as it will reference
gethugepagesize() irrespective of *mem_path.  So even
in the case of tmpfs backed files, if the host kernel
has been configured with CONFIG_HUGETLBFS we will wind
up doing allocations of /dev/shm mapped files at
/proc/meminfo:Hugepagesize granularity.  Otherwise if
HUGETLBFS is not configured gethugepagesize() returns
zero and alloc_hpage_mem() itself will not perform the
allocation.

Probably not what was intended but probably not too
much of a concern as -mem-path /dev/shm is likely
only used in debug of this flag and associated logic.
I don't see it currently being worth the trouble to
correct from a squeaky clean POV, and doing so may
drag in far more than the header file we've just
booted above to deal with this architecture/config
dependency.

An updated patch is attached.

-john

--
[EMAIL PROTECTED]
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -234,6 +234,7 @@ int autostart = 1;
 int time_drift_fix = 0;
 unsigned int kvm_shadow_memory = 0;
 const char *mem_path = NULL;
+int mem_prealloc = 1;  /* force preallocation of physical target memory */
 int hpagesize = 0;
 const char *cpu_vendor_string;
 #ifdef TARGET_ARM
@@ -7809,7 +7810,10 @@ static void help(int exitcode)
 #endif
-tdfinject timer interrupts that got lost\n
-kvm-shadow-memory megs set the amount of shadow pages to be 
allocated\n
-   -mem-path   set the path to hugetlbfs/tmpfs mounted directory, 
also enables allocation of guest memory with huge pages\n
+   -mem-path   set the path to hugetlbfs/tmpfs mounted directory, 
also\n
+   enables allocation of guest memory with huge 
pages\n
+   -mem-prealloc   toggles preallocation of -mem-path backed physical 
memory\n
+   at startup.  Default is enabled.\n
   -option-rom rom load a file, rom, into the option ROM space\n
 #ifdef TARGET_SPARC
-prom-env variable=value  set OpenBIOS nvram variables\n
@@ -7932,6 +7936,7 @@ enum {
 QEMU_OPTION_tdf,
 QEMU_OPTION_kvm_shadow_memory,
 QEMU_OPTION_mempath,
+QEMU_OPTION_mem_prealloc
 };
 
 typedef struct QEMUOption {
@@ -8059,6 +8064,7 @@ const QEMUOption qemu_options[] = {
 { startdate, HAS_ARG, QEMU_OPTION_startdate },
 { tb-size, HAS_ARG, QEMU_OPTION_tb_size },
 { mem-path, HAS_ARG, QEMU_OPTION_mempath },
+{ mem-prealloc, 0, QEMU_OPTION_mem_prealloc },
 { NULL },
 };
 
@@ -8276,11 +8282,13 @@ static int gethugepagesize(void)
 return hugepagesize;
 }
 
-void *alloc_mem_area(unsigned long memory, const char *path)
+/* attempt to allocate memory mmap'ed to mem_path
+ */
+void *alloc_hpage_mem(unsigned long memory, const char *path)
 {
 char *filename;
 void *area;
-int fd;
+int fd, flags;
 
 if (asprintf(filename, %s/kvm.XX, path) == -1)
return NULL;
@@ -8308,26 +8316,27 @@ void *alloc_mem_area(unsigned long memor
  */
 ftruncate(fd, memory);
 
-area = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
-if (area == MAP_FAILED) {
-   perror(mmap);
-   close(fd);
-   return NULL;
-}
-
-return area;
+/* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
+ * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
+ * to sidestep this quirk.
+ */
+flags = mem_prealloc ? MAP_POPULATE|MAP_SHARED : MAP_PRIVATE;
+area = mmap(0, memory, PROT_READ|PROT_WRITE, flags, fd, 0);
+if (area != MAP_FAILED)
+   return (area);
+perror(alloc_hpage_mem: can't mmap hugetlbfs pages);
+close(fd);
+return (NULL);
 }
 
-void *qemu_alloc_physram(unsigned long memory)
+/* allocate guest memory as requested
+ */
+void *qemu_alloc_physram(unsigned long size)
 {
-void *area = NULL;
-
 if (mem_path)
-   area = alloc_mem_area(memory, mem_path);
-if (!area)
-   area = qemu_vmalloc(memory);
-
-return area;
+   return (alloc_hpage_mem(size, mem_path));
+else
+   return (qemu_vmalloc(size));
 }
 
 int main(int argc, char **argv)
@@ -8962,6 +8971,9 @@ int main(int argc, char **argv)
 case QEMU_OPTION_mempath:
mem_path = optarg;
break;
+case QEMU_OPTION_mem_prealloc:
+   mem_prealloc = !mem_prealloc;
+   break;
 case QEMU_OPTION_name:
 qemu_name = optarg;
 break;


Re: Live migration without SDL

2008-07-10 Thread Brian Jackson
You can get to the monitor the same way in vnc as you do with the sdl gui.

You can also have the monitor connected to a socket or network port.

See the qemu docs for more info.



On Thursday 10 July 2008 3:51:44 pm Ty! Boyack wrote:
 Folks,

 I'm afraid I'm missing something very basic here.  I'm looking to try
 out the live migration facility, and all the documentation I'm seeing
 shows to initiate the migration by going to the qemu monitor from the
 SDL windows (alt-ctrl-2).  However, I'd like to run my VMs with a VNC
 head, so I'm not sure where the interface is to initiate the migration
 without the SDL layer in place.

 Is the qemu monitor available through some other mechanism?

 This would be most helpful if the mechanism was reachable (and
 scriptable) from the HOST os, so that the HOST (in cooperation with
 another HOST) could initiate a migration of one or more GUEST VMs.

 If anyone could point me in the right direction, I would sure appreciate
 it.

 I have not subscribed to the list, so e-mail replies would be appreciated.

 -Ty!

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: patch: qemu + hugetlbfs..

2008-07-10 Thread Anthony Liguori

john cooper wrote:

Anthony Liguori wrote:

john cooper wrote:

As it currently exists alloc_hpage_mem() is tied to
the notion of huge page allocation as it will reference
gethugepagesize() irrespective of *mem_path.  So even
in the case of tmpfs backed files, if the host kernel
has been configured with CONFIG_HUGETLBFS we will wind
up doing allocations of /dev/shm mapped files at
/proc/meminfo:Hugepagesize granularity.


Which is fine.  It just means we round -m values up to even numbers.


Well, yes it will round the allocation.  But from a
minimally sufficient 4KB boundary to that of 4MB/2MB
relative to a 32/64 bit x86 host which is excessive.


Probably not what was intended but probably not too
much of a concern as -mem-path /dev/shm is likely
only used in debug of this flag and associated logic.
I don't see it currently being worth the trouble to
correct from a squeaky clean POV, and doing so may
drag in far more than the header file we've just
booted above to deal with this architecture/config
dependency.


Renaming a function to a name that's less accurate seems bad to me.  
I don't mean to be pedantic, but it seems like a strange thing to 
do.  I prefer it the way it was before.


I don't see any harm reverting the name.  But I do
believe it is largely cosmetic as given the above,
the current code does require some work to make it
independent of huge page assumptions.  Update attached.

-john


Looks good to me.

Acked-by: Anthony Liguori [EMAIL PROTECTED]

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Live migration without SDL

2008-07-10 Thread David Mair

Ty! Boyack wrote:
 Folks,

 I'm afraid I'm missing something very basic here.  I'm looking to try
 out the live migration facility, and all the documentation I'm seeing
 shows to initiate the migration by going to the qemu monitor from the
 SDL windows (alt-ctrl-2).  However, I'd like to run my VMs with a VNC
 head, so I'm not sure where the interface is to initiate the migration
 without the SDL layer in place.

I have no trouble switching to the monitor for a VM with a VNC head: alt-ctrl-2 
works as expected in the VNC client.


 Is the qemu monitor available through some other mechanism?

You can configure qemu to expose the monitor console via any of the serial port 
configuration types. That lets you make the monitor available at a udp or tcp port, 
e.g.:


# qemu-system-x86_64 other options -monitor tcp::

The double colon is intended. There are lots of additional features beyond that 
example.

 This would be most helpful if the mechanism was reachable (and
 scriptable) from the HOST os, so that the HOST (in cooperation with
 another HOST) could initiate a migration of one or more GUEST VMs.

That would be the monitor command line option then.

snip

--
David.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Regression] kvm-userspace: VM freezes after booting FreeDOS

2008-07-10 Thread Mohammed Gamal
On Fri, Jul 11, 2008 at 12:22 AM, Jan Kiszka [EMAIL PROTECTED] wrote:
 Mohammed Gamal wrote:
 On Thu, Jul 10, 2008 at 11:02 PM, Anthony Liguori [EMAIL PROTECTED] wrote:
 Mohammed Gamal wrote:
 On Thu, Jul 10, 2008 at 7:31 PM, Anthony Liguori [EMAIL PROTECTED]
 wrote:

 Mohammed Gamal wrote:

 After updating my kvm-userspace tree to the latest git tree. I am no
 more able to run FreeDOS. The VM freezes after choosing any of the
 boot options. I am running both latest kvm.git and kvm-userspace.git

snip

 After booting FreeDOS, there are a number of boot options with
 different memory extenders, after selecting any option the system
 freezes and I get [Stopped] in thr QEMU title bar.

 Stopped - interesting. Seems like something causes QEMU to stop the
 guest as if some breakpoint was injected.

 I just downloaded that image and gave it a try against vanilla kvm-70
 and my own tree which is augment with guest debugging related patches.
 The former shows your observed behavior (Boot from CD, provide an empty
 HD image - press '1' + ENTER - press '1' - Stopped). The latter kvm
 tree made QEMU leave with a #GP in the guest. That may point to a debug
 register related issue, and that patch you identified just happen to
 make it visible. However, will try to investigate.

 Jan

I'm interested in seeing these patches. If your tree is hosted online,
could you please provide me with its location so that I can merge it
with mine. If not, where can I get them from?
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM: MMU: nuke shadowed pgtable pages and pte's on memslot destruction

2008-07-10 Thread Marcelo Tosatti

KVM: MMU: improve invalid shadow root page handling

Harden kvm_mmu_zap_page() against invalid root pages that
had been shadowed from memslots that are gone.

Signed-off-by: Marcelo Tosatti [EMAIL PROTECTED]


diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ff7cf63..7f57da6 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -930,14 +930,17 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct 
kvm_mmu_page *sp)
}
kvm_mmu_page_unlink_children(kvm, sp);
if (!sp-root_count) {
-   if (!sp-role.metaphysical)
+   if (!sp-role.metaphysical  !sp-role.invalid)
unaccount_shadowed(kvm, sp-gfn);
hlist_del(sp-hash_link);
kvm_mmu_free_page(kvm, sp);
} else {
+   int invalid = sp-role.invalid;
list_move(sp-link, kvm-arch.active_mmu_pages);
sp-role.invalid = 1;
kvm_reload_remote_mmus(kvm);
+   if (!sp-role.metaphysical  !invalid)
+   unaccount_shadowed(kvm, sp-gfn);
}
kvm_mmu_reset_last_pte_updated(kvm);
 }

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 0/7][RFC] Enable kvm/ia-64 to build kvm components in userspace.

2008-07-10 Thread Zhang, Xiantao
Avi Kivity wrote:
 Zhang, Xiantao wrote:
 
 I agree that for automatic testing it's more of a burden; but it
 needs to be done, especially as some kvm features are only enabled
 on newer kernels. 
 
 The external module is convenient, but it's not a substitute for
 the real thing. 
 
 
 So, I have a question here, When will you drop external module
 support? 
 
 
 I don't really know.
 
 You know, it blocks our auto-testing system now, we have to
 re-evaulate the effort without external module support.
 
 
 I think it's very worthwhile to update the autotester to be able to
 build kernels. kvm will soon depend on core functionality (mmu
 notifiers) and since currently kvm is the only user (well, gru will be
 added soon, but ...) we need to test it ourselves.
 
 If it won't be dropped in a few weeks, we are eager to get its
 support for kvm-ia64. 
 
 
 
 Okay, please send the patches, but with diff -M so they're readable.

OK, I will regenerate the patches.  Thanks!
Xiantao
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/5] kvmtrace: make cycle calculation architecture aware

2008-07-10 Thread Yang, Sheng
On Thursday 10 July 2008 21:32:29 Avi Kivity wrote:
 Yang, Sheng wrote:
  On Wednesday 09 July 2008 23:03:19 Hollis Blanchard wrote:
  On Wed, 2008-07-09 at 11:17 +0200, Christian Ehrhardt wrote:
  So the question that is left before changing that is, if the
  original author had something special in mind chosing cycles
  here. I added Eric on CC for that.
 
  I wait with my resubmission of the patch series until all
  architectures agree *hope* on using getnstimeofday() - after an
  ack from all sides I would revise my patch series and submit
  that changes alltogether.
 
  I got an email bounce from Eric the last time I tried to email
  him, so I'm not sure he's still with Intel.
 
  However, I don't think he had any special intention; I think he
  was just porting xentrace to KVM.
 
  Eric had completed his internship in Intel, so...
 
  I like the term timestamp too. I think he used cycles only
  because there is a function called get_cycles().
 
  But instead of getnstimeofday(), I suggest using ktime_get()
  here. It's little more precise than getnstimeofday(), and ktime_t
  is more easily to be handled. And I think the overhead it brought
  can be ignored too.

 What is the overhead of ktime_get()?

Well, I just means it wrapped getnstimeofday(), and compared to 
rdtscll(), it got little overhead... :)

-- 
Thanks
Yang, Sheng


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


KVM Test result, kernel cc453ce.., userspace 6787f5a.. -- One Issue Fixed

2008-07-10 Thread Xu, Jiajun

Hi All,

This is today's KVM test result against kvm.git
cc453ceef31019dcddff453d35a608ca2c659fec and kvm-userspace.git
6787f5a048c17e4ec4eb7858804795888449cd96.

One Issue Fixed:

1. vista auto-unattended installation failed on kvm guests
https://sourceforge.net/tracker/?func=detailatid=893831aid=1991653group_id=180599 



Two Old Issues:

1. 32bits Rhel5/FC6 guest may fail to reboot after installation
https://sourceforge.net/tracker/?func=detailatid=893831aid=1991647group_id=180599 



2. failure to migrate guests with more than 4GB of RAM
https://sourceforge.net/tracker/index.php?func=detailaid=1971512group_id=180599atid=893831 




Test environment

Platform   
Woodcrest

CPU 4
Memory size 8G'

Details

IA32-pae: 1. boot guest with 256M
memory   PASS
2. boot guest with 1500M memory PASS
3. boot 4 same guest in parallel PASS
4. boot two windows xp guestPASS
5. boot linux and windows guest in parallel  PASS
6. save/restore 32-bit HVM guests  PASS
7. save/restore 32-bit HVM guests with 4 vcpus   PASS
8. live migration 32-bit HVM guests PASS
9. live migration 32-bit HVM guests with 4 vcpus  PASS
10. boot base kernel linux 
PASS

11. kernel build on SMP linux guestPASS
12. LTP on linux guest  
PASS

13. boot Windows 2000 without ACPI  PASS
14. boot Windows 2000 with ACPI enabled  PASS
15. boot Windows 2003 with ACPI enabled   PASS
16. boot Windows xp with ACPI enabled  PASS
17. boot Windows vista with ACPI enabled   PASS
18. boot SMP Windows 2000 with ACPI enabled  PASS
19. boot SMP Windows 2003 with ACPI enabled  PASS
20. boot SMP Windows xp with ACPI enabled  PASS
21. boot SMP Windows 2008 with ACPI enabled   PASS


IA32e: 1. boot 32-bit guest with 256M
memory   PASS
2. boot 64-bit guest with 256M memory   PASS
3. boot 32-bit guest with 1500M memory PASS
4. boot 64-bit guest with 1500M memory PASS
5. boot 4G pae
guest PASS
6. boot 4G 64-bit
guest  PASS
7. boot four 32-bit guest in
parallel  PASS
8. boot four 64-bit guest in
parallel  PASS
9. boot two 32-bit windows xp in parallel  PASS
10. boot 32-bit linux and 32 bit windows guest in parallel   PASS
11. boot four 32-bit different guest in para
PASS
12. save/restore 32-bit linux guests
PASS
13. save/restore 64-bit linux guests
PASS

14. save/restore 64-bit linux guests with 4 vcpus   PASS
15. save/restore 32-bit linux guests with 4 vcpus   PASS
16. live migration 64bit linux
guests PASS
17. live migration 32bit linux
guests PASS
18. live migration 64bit linux guests with 4 vcpus   PASS
19. live migration 32bit linux guests with 4 vcpus   PASS
20. boot 32-bit
x-server   PASS 21.
kernel build in 32-bit linux guest OS  PASS
22. kernel build in 64-bit linux guest OS  PASS
23. LTP on 32-bit linux guest OS   
PASS
24. LTP on 64-bit linux guest OS   
PASS

25. boot 64-bit guests with ACPI enabled PASS
26. boot 32-bit Windows 2000 without ACPIPASS
27. boot 32-bit Windows xp without ACPIPASS
28. boot 64-bit Windows xp with ACPI enabledPASS
29. boot 64-bit Windows vista with ACPI enabled PASS
30. boot 32-bit SMP Windows 2000 with ACPI enabled PASS
31. boot 32-bit SMP windows 2003 with ACPI enabled  PASS
32. boot 32-bit SMP Windows xp with ACPI enabledPASS
33. boot 64-bit SMP Windows vista with ACPI enabled PASS
34. boot 32-bit SMP windows 2008 with ACPI enabled  PASS
35. boot 

[PATCH 03/04] KVM: external module: Allow non-x86 archs to build in userspace.

2008-07-10 Thread Zhang, Xiantao
From 989c9551664988535608bb4051e654ffe5e5ddb0 Mon Sep 17 00:00:00 2001
From: Xiantao Zhang [EMAIL PROTECTED]
Date: Wed, 2 Jul 2008 17:04:19 +0800
Subject: [PATCH] KVM: external module: Allow non-x86 archs to build in
userspace.

This patch targets for allowing kvm/ia64 to build in userspace.
For other archs. it just needs to add its arch-specific stuff.

Signed-off-by: Xiantao Zhang [EMAIL PROTECTED]
---
 kernel/Kbuild|1 +
 kernel/Makefile  |   22 ++
 kernel/ia64/Kbuild   |7 +++
 kernel/ia64/Makefile.pre |   23 +++
 kernel/ia64/external-module-compat.h |   15 +++
 kernel/x86/Makefile.pre  |1 +
 6 files changed, 61 insertions(+), 8 deletions(-)
 create mode 100644 kernel/ia64/Kbuild
 create mode 100644 kernel/ia64/Makefile.pre
 create mode 100644 kernel/ia64/external-module-compat.h
 create mode 100644 kernel/x86/Makefile.pre

diff --git a/kernel/Kbuild b/kernel/Kbuild
index 7019ca0..ec34c43 100644
--- a/kernel/Kbuild
+++ b/kernel/Kbuild
@@ -1 +1,2 @@
 obj-$(CONFIG_X86) += x86/
+obj-$(CONFIG_IA64) += ia64/
diff --git a/kernel/Makefile b/kernel/Makefile
index 94d63bc..0d5d148 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -7,6 +7,8 @@ KVERREL = $(patsubst
/lib/modules/%/build,%,$(KERNELDIR))
 
 DESTDIR=
 
+MAKEFILE_PRE = $(ARCH_DIR)/Makefile.pre
+
 INSTALLDIR = $(patsubst %/build,%/extra,$(KERNELDIR))
 ORIGMODDIR = $(patsubst %/build,%/kernel,$(KERNELDIR))
 
@@ -27,10 +29,11 @@ unifdef = mv $1 $1.orig  \
 hack = $(call _hack,$T/$(strip $1))
 
 hack-files-x86 = kvm_main.c mmu.c vmx.c svm.c x86.c irq.h lapic.c
i8254.c
+hack-files-ia64 =
 
 hack-files = $(hack-files-$(ARCH_DIR))
 
-all::
+all:: header-link
 #  include header priority 1) $LINUX 2) $KERNELDIR 3)
include-compat
$(MAKE) -C $(KERNELDIR) M=`pwd` \
LINUXINCLUDE=-I`pwd`/include -Iinclude
-I`pwd`/include-compat \
@@ -38,7 +41,12 @@ all::
-include `pwd`/$(ARCH_DIR)/external-module-compat.h
$$@
 
-sync: header-sync source-sync
+sync: header-sync source-sync header-link prerequisite
+
+header-link:
+   rm -f include/asm
+   ln -sf asm-$(ARCH_DIR) include/asm
+   ln -sf asm-$(ARCH_DIR) include-compat/asm
 
 T = $(subst -sync,,$@)-tmp
 
@@ -56,15 +64,11 @@ header-sync:
do mkdir -p $$(dirname $$i); cmp -s $$i $T/$$i || cp
$T/$$i $$i; done
rm -rf $T
 
-   rm -f include/asm
-   ln -sf asm-$(ARCH_DIR) include/asm
-   ln -sf asm-$(ARCH_DIR) include-compat/asm
-
 source-sync:
rm -rf $T
rsync --exclude='*.mod.c' -R \
-$(LINUX)/arch/$(ARCH_DIR)/kvm/./*.[ch] \
-$(LINUX)/virt/kvm/./*.[ch] \
+$(LINUX)/arch/$(ARCH_DIR)/kvm/./*.[cSh] \
+$(LINUX)/virt/kvm/./*.[cSh] \
 $T/
 
set -e  for i in $(find $T -name '*.c'); do \
@@ -77,6 +81,8 @@ source-sync:
do cmp -s $(ARCH_DIR)/$$i $T/$$i || cp $T/$$i
$(ARCH_DIR)/$$i; done
rm -rf $T
 
+include $(MAKEFILE_PRE)
+
 install:
mkdir -p $(DESTDIR)/$(INSTALLDIR)
cp $(ARCH_DIR)/*.ko $(DESTDIR)/$(INSTALLDIR)
diff --git a/kernel/ia64/Kbuild b/kernel/ia64/Kbuild
new file mode 100644
index 000..764a493
--- /dev/null
+++ b/kernel/ia64/Kbuild
@@ -0,0 +1,7 @@
+obj-m := kvm.o kvm-intel.o
+
+kvm-objs := kvm_main.o ioapic.o coalesced_mmio.o kvm-ia64.o kvm_fw.o
+
+EXTRA_CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
+kvm-intel-objs := vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o
mmio.o \
+   vtlb.o process.o memset.o memcpy.o
diff --git a/kernel/ia64/Makefile.pre b/kernel/ia64/Makefile.pre
new file mode 100644
index 000..09f2a73
--- /dev/null
+++ b/kernel/ia64/Makefile.pre
@@ -0,0 +1,23 @@
+prerequisite: asm-offsets.h ia64/memset.S ia64/memcpy.S
+   cp -f $(LINUX)/arch/ia64/lib/memcpy.S ia64/memcpy.S
+   cp -f $(LINUX)/arch/ia64/lib/memset.S ia64/memset.S
+   cmp -s asm-offset.h ia64/asm-offset.h || mv -f asm-offsets.*
ia64/
+
+asm-offsets.h: asm-offsets.s
+   @(set -e; \
+ echo /*; \
+ echo  * DO NOT MODIFY.; \
+ echo  *; \
+ echo  * This file was auto-generated from $; \
+ echo  *; \
+ echo  */; \
+ echo ; \
+ echo #ifndef __KVM_ASM_OFFSETS_H__; \
+ echo #define __KVM_ASM_OFFSETS_H__; \
+ echo ; \
+ sed -ne /^-/{s:^-\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define
\1 \2 /* \3 */:; s:-::; p;}; \
+ echo ; \
+ echo #endif) $ $@
+
+asm-offsets.s: ia64/asm-offsets.c 
+   gcc -S -D__KERNEL__ -I$(LINUX)/include -I./include
ia64/asm-offsets.c
diff --git a/kernel/ia64/external-module-compat.h
b/kernel/ia64/external-module-compat.h
new file mode 100644
index 000..3c4001c
--- /dev/null
+++ b/kernel/ia64/external-module-compat.h
@@ -0,0 +1,15 @@
+/*
+ * Compatibility header for building as an external module.
+ */
+
+#include 

[PATCH 04/04] KVM: userspace: Enable rpm build for kvm/ia64

2008-07-10 Thread Zhang, Xiantao
From ce37b613776ea71ed2c75c8302985aecb5e6799f Mon Sep 17 00:00:00 2001
From: Xiantao Zhang [EMAIL PROTECTED]
Date: Fri, 11 Jul 2008 13:13:52 +0800
Subject: [PATCH] KVM: userspace: Enable rpm build for kvm/ia64

Enable make rpm for kvm/ia64.
Signed-off-by: Xiantao Zhang [EMAIL PROTECTED]
---
 Makefile |4 ++--
 kernel/kvm-kmod.spec |8 +---
 kvm.spec |6 --
 qemu/Makefile|1 +
 4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index 48a8dff..c7e7370 100644
--- a/Makefile
+++ b/Makefile
@@ -23,7 +23,7 @@ qemu: libkvm
 ifneq '$(filter $(ARCH), i386 x86_64)' ''
 qemu: extboot
 endif
-ifneq '$(filter $(ARCH), powerpc)' ''
+ifneq '$(filter $(ARCH), powerpc, ia64)' ''
 qemu: libfdt
 endif
 user: libkvm
@@ -73,7 +73,7 @@ install-rpm:
install -m 755 kvm_stat $(DESTDIR)/$(bindir)/kvm_stat
cp scripts/kvm $(DESTDIR)/$(initdir)/kvm
cp scripts/qemu-ifup $(DESTDIR)/$(confdir)/qemu-ifup
-   install -t $(DESTDIR)/etc/udev/rules.d scripts/*kvm*.rules
+   install -C scripts/*kvm*.rules $(DESTDIR)/etc/udev/rules.d
 
 install:
$(kcmd)make -C kernel DESTDIR=$(DESTDIR) install
diff --git a/kernel/kvm-kmod.spec b/kernel/kvm-kmod.spec
index b56fdf1..89b3d88 100644
--- a/kernel/kvm-kmod.spec
+++ b/kernel/kvm-kmod.spec
@@ -10,11 +10,11 @@ License:GPL
 URL:http://www.qumranet.com
 BuildRoot:  %{_tmppath}/%{name}-%{version}-%{release}
 
-ExclusiveArch: i386 x86_64
+ExclusiveArch: i386 x86_64 ia64
 
 %description
 This kernel module provides support for virtual machines using hardware
support
-(Intel VT or AMD SVM).
+(Intel VT-xVT-i or AMD SVM).
 
 %prep
 
@@ -27,7 +27,7 @@ rm -rf %{buildroot}
 %define kverrel unknown
 %define moddir /lib/modules/%{kverrel}/extra
 mkdir -p %{buildroot}/%{moddir}
-cp %{objdir}/%{kmod_name}.ko %{objdir}/%{kmod_name}-intel.ko
%{objdir}/%{kmod_name}-amd.ko %{buildroot}/%{moddir}
+cp %{objdir}/%{kmod_name}.ko %{objdir}/%{kmod_name}-*.ko
%{buildroot}/%{moddir}
 chmod u+x %{buildroot}/%{moddir}/%{kmod_name}*.ko
 
 %post 
@@ -43,7 +43,9 @@ depmod %{kverrel}
 
 %files
 %{moddir}/%{kmod_name}.ko
+%ifarch i386 x86_64
 %{moddir}/%{kmod_name}-amd.ko
+%endif
 %{moddir}/%{kmod_name}-intel.ko
 
 
diff --git a/kvm.spec b/kvm.spec
index af8271e..92acb0e 100644
--- a/kvm.spec
+++ b/kvm.spec
@@ -8,7 +8,7 @@ License:GPL
 URL:http://www.qumranet.com
 BuildRoot:  %{_tmppath}/%{name}-%{version}-%{release}
 
-ExclusiveArch:  i386 x86_64
+ExclusiveArch:  i386 x86_64 ia64
 
 Requires:  kvm-kmod bridge-utils
 
@@ -58,7 +58,7 @@ Source8: extboot.tar.gz
 
 %description
 The Kernel Virtual Machine provides a virtualization enviroment for
processors
-with hardware support for virtualization: Intel's VT and AMD's AMD-V.
+with hardware support for virtualization: Intel's VT-xVT-i and AMD's
AMD-V.
 
 %prep
 
@@ -82,7 +82,9 @@ cd ..
 ./configure --prefix=/usr/kvm %{qemuldflags}
 make -C libkvm
 make -C user
+%ifarch i386 x86_64
 make extboot
+%endif
 #(cd qemu;
 #./co
 #   kpath=$(readlink -f ../kernel/include)
diff --git a/qemu/Makefile b/qemu/Makefile
index 8ca17f7..627e255 100644
--- a/qemu/Makefile
+++ b/qemu/Makefile
@@ -231,6 +231,7 @@ endif
pxe-rtl8139.bin pxe-pcnet.bin pxe-e1000.bin extboot.bin
\
bamboo.dtb; \
 do \
+   [ -f $(SRC_PATH)/pc-bios/$$x ]  \
$(INSTALL) -m 644 $(SRC_PATH)/pc-bios/$$x
$(DESTDIR)$(datadir); \
done
 ifndef CONFIG_WIN32
-- 
1.5.2


0004-KVM-userspace-Enable-rpm-build-for-kvm-ia64.patch
Description: 0004-KVM-userspace-Enable-rpm-build-for-kvm-ia64.patch