[Xen-devel] [PATCH 03/16] xen/passthrough: extend hypercall to support rdm reservation policy

2015-07-22 Thread Ian Jackson
From: Tiejun Chen tiejun.c...@intel.com

This patch extends the existing hypercall to support rdm reservation policy.
We return error or just throw out a warning message depending on whether
the policy is strict or relaxed when reserving RDM regions in pfn space.
Note in some special cases, e.g. add a device to hwdomain, and remove a
device from user domain, 'relaxed' is fine enough since this is always safe
to hwdomain.

CC: Tim Deegan t...@xen.org
CC: Keir Fraser k...@xen.org
CC: Jan Beulich jbeul...@suse.com
CC: Andrew Cooper andrew.coop...@citrix.com
CC: Suravee Suthikulpanit suravee.suthikulpa...@amd.com
CC: Aravind Gopalakrishnan aravind.gopalakrish...@amd.com
CC: Ian Campbell ian.campb...@citrix.com
CC: Stefano Stabellini stefano.stabell...@citrix.com
CC: Yang Zhang yang.z.zh...@intel.com
CC: Kevin Tian kevin.t...@intel.com
Signed-off-by: Tiejun Chen tiejun.c...@intel.com
Reviewed-by: George Dunlap george.dun...@eu.citrix.com
Acked-by: Jan Beulich jbeul...@suse.com
---
 xen/arch/x86/mm/p2m.c   |7 +++--
 xen/drivers/passthrough/amd/pci_amd_iommu.c |3 ++-
 xen/drivers/passthrough/arm/smmu.c  |2 +-
 xen/drivers/passthrough/device_tree.c   |3 ++-
 xen/drivers/passthrough/pci.c   |   15 ---
 xen/drivers/passthrough/vtd/iommu.c |   37 +--
 xen/include/asm-x86/p2m.h   |2 +-
 xen/include/public/domctl.h |3 +++
 xen/include/xen/iommu.h |2 +-
 9 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 1e763dc..89616b7 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -901,7 +901,7 @@ int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, 
mfn_t mfn,
 }
 
 int set_identity_p2m_entry(struct domain *d, unsigned long gfn,
-   p2m_access_t p2ma)
+   p2m_access_t p2ma, unsigned int flag)
 {
 p2m_type_t p2mt;
 p2m_access_t a;
@@ -923,7 +923,10 @@ int set_identity_p2m_entry(struct domain *d, unsigned long 
gfn,
 ret = 0;
 else
 {
-ret = -EBUSY;
+if ( flag  XEN_DOMCTL_DEV_RDM_RELAXED )
+ret = 0;
+else
+ret = -EBUSY;
 printk(XENLOG_G_WARNING
Cannot setup identity map d%d:%lx,
 gfn already mapped to %lx.\n,
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c 
b/xen/drivers/passthrough/amd/pci_amd_iommu.c
index e83bb35..920b35a 100644
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -394,7 +394,8 @@ static int reassign_device(struct domain *source, struct 
domain *target,
 }
 
 static int amd_iommu_assign_device(struct domain *d, u8 devfn,
-   struct pci_dev *pdev)
+   struct pci_dev *pdev,
+   u32 flag)
 {
 struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(pdev-seg);
 int bdf = PCI_BDF2(pdev-bus, devfn);
diff --git a/xen/drivers/passthrough/arm/smmu.c 
b/xen/drivers/passthrough/arm/smmu.c
index 6cc4394..9a667e9 100644
--- a/xen/drivers/passthrough/arm/smmu.c
+++ b/xen/drivers/passthrough/arm/smmu.c
@@ -2605,7 +2605,7 @@ static void arm_smmu_destroy_iommu_domain(struct 
iommu_domain *domain)
 }
 
 static int arm_smmu_assign_dev(struct domain *d, u8 devfn,
-  struct device *dev)
+  struct device *dev, u32 flag)
 {
struct iommu_domain *domain;
struct arm_smmu_xen_domain *xen_domain;
diff --git a/xen/drivers/passthrough/device_tree.c 
b/xen/drivers/passthrough/device_tree.c
index 5d3842a..7ff79f8 100644
--- a/xen/drivers/passthrough/device_tree.c
+++ b/xen/drivers/passthrough/device_tree.c
@@ -52,7 +52,8 @@ int iommu_assign_dt_device(struct domain *d, struct 
dt_device_node *dev)
 goto fail;
 }
 
-rc = hd-platform_ops-assign_device(d, 0, dt_to_dev(dev));
+/* The flag field doesn't matter to DT device. */
+rc = hd-platform_ops-assign_device(d, 0, dt_to_dev(dev), 0);
 
 if ( rc )
 goto fail;
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index e30be43..c7bbf6e 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -1335,7 +1335,7 @@ static int device_assigned(u16 seg, u8 bus, u8 devfn)
 return pdev ? 0 : -EBUSY;
 }
 
-static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
+static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
 {
 struct hvm_iommu *hd = domain_hvm_iommu(d);
 struct pci_dev *pdev;
@@ -1371,7 +1371,7 @@ static int assign_device(struct domain *d, u16 seg, u8 
bus, u8 devfn)
 
 pdev-fault.count = 0;
 
-if ( (rc = hd-platform_ops-assign_device(d, devfn, pci_to_dev(pdev))) )
+if ( (rc = hd-platform_ops-assign_device(d, devfn, 

[Xen-devel] [PATCH 02/16] xen/vtd: create RMRR mapping

2015-07-22 Thread Ian Jackson
From: Tiejun Chen tiejun.c...@intel.com

RMRR reserved regions must be setup in the pfn space with an identity
mapping to reported mfn. However existing code has problem to setup
correct mapping when VT-d shares EPT page table, so lead to problem
when assigning devices (e.g GPU) with RMRR reported. So instead, this
patch aims to setup identity mapping in p2m layer, regardless of
whether EPT is shared or not. And we still keep creating VT-d table.

And we also need to introduce a pair of helper to create/clear this
sort of identity mapping as follows:

set_identity_p2m_entry():

If the gfn space is unoccupied, we just set the mapping. If space
is already occupied by desired identity mapping, do nothing.
Otherwise, failure is returned.

clear_identity_p2m_entry():

We just define macro to wrapper guest_physmap_remove_page() with
a returning value as necessary.

CC: Tim Deegan t...@xen.org
CC: Keir Fraser k...@xen.org
CC: Jan Beulich jbeul...@suse.com
CC: Andrew Cooper andrew.coop...@citrix.com
CC: Yang Zhang yang.z.zh...@intel.com
CC: Kevin Tian kevin.t...@intel.com
Reviewed-by: Kevin Tian kevin.t...@intel.com
Reviewed-by: Tim Deegan t...@xen.org
Acked-by: George Dunlap george.dun...@eu.citrix.com
Signed-off-by: Tiejun Chen tiejun.c...@intel.com
---
 xen/arch/x86/mm/p2m.c   |   40 +--
 xen/drivers/passthrough/vtd/iommu.c |5 ++---
 xen/include/asm-x86/p2m.h   |   13 +---
 3 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 6fe6387..1e763dc 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -584,14 +584,16 @@ p2m_remove_page(struct p2m_domain *p2m, unsigned long 
gfn, unsigned long mfn,
  p2m-default_access);
 }
 
-void
+int
 guest_physmap_remove_page(struct domain *d, unsigned long gfn,
   unsigned long mfn, unsigned int page_order)
 {
 struct p2m_domain *p2m = p2m_get_hostp2m(d);
+int rc;
 gfn_lock(p2m, gfn, page_order);
-p2m_remove_page(p2m, gfn, mfn, page_order);
+rc = p2m_remove_page(p2m, gfn, mfn, page_order);
 gfn_unlock(p2m, gfn, page_order);
+return rc;
 }
 
 int
@@ -898,6 +900,40 @@ int set_mmio_p2m_entry(struct domain *d, unsigned long 
gfn, mfn_t mfn,
 return set_typed_p2m_entry(d, gfn, mfn, p2m_mmio_direct, access);
 }
 
+int set_identity_p2m_entry(struct domain *d, unsigned long gfn,
+   p2m_access_t p2ma)
+{
+p2m_type_t p2mt;
+p2m_access_t a;
+mfn_t mfn;
+struct p2m_domain *p2m = p2m_get_hostp2m(d);
+int ret;
+
+if ( !paging_mode_translate(p2m-domain) )
+return 0;
+
+gfn_lock(p2m, gfn, 0);
+
+mfn = p2m-get_entry(p2m, gfn, p2mt, a, 0, NULL);
+
+if ( p2mt == p2m_invalid || p2mt == p2m_mmio_dm )
+ret = p2m_set_entry(p2m, gfn, _mfn(gfn), PAGE_ORDER_4K,
+p2m_mmio_direct, p2ma);
+else if ( mfn_x(mfn) == gfn  p2mt == p2m_mmio_direct  a == p2ma )
+ret = 0;
+else
+{
+ret = -EBUSY;
+printk(XENLOG_G_WARNING
+   Cannot setup identity map d%d:%lx,
+gfn already mapped to %lx.\n,
+   d-domain_id, gfn, mfn_x(mfn));
+}
+
+gfn_unlock(p2m, gfn, 0);
+return ret;
+}
+
 /* Returns: 0 for success, -errno for failure */
 int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
 {
diff --git a/xen/drivers/passthrough/vtd/iommu.c 
b/xen/drivers/passthrough/vtd/iommu.c
index 9849d0e..5aa482f 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1839,7 +1839,7 @@ static int rmrr_identity_mapping(struct domain *d, bool_t 
map,
 
 while ( base_pfn  end_pfn )
 {
-if ( intel_iommu_unmap_page(d, base_pfn) )
+if ( clear_identity_p2m_entry(d, base_pfn, 0) )
 ret = -ENXIO;
 base_pfn++;
 }
@@ -1855,8 +1855,7 @@ static int rmrr_identity_mapping(struct domain *d, bool_t 
map,
 
 while ( base_pfn  end_pfn )
 {
-int err = intel_iommu_map_page(d, base_pfn, base_pfn,
-   IOMMUF_readable|IOMMUF_writable);
+int err = set_identity_p2m_entry(d, base_pfn, p2m_access_rw);
 
 if ( err )
 return err;
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index b49c09b..190a286 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -503,9 +503,9 @@ static inline int guest_physmap_add_page(struct domain *d,
 }
 
 /* Remove a page from a domain's p2m table */
-void guest_physmap_remove_page(struct domain *d,
-   unsigned long gfn,
-   unsigned long mfn, unsigned int page_order);
+int guest_physmap_remove_page(struct domain *d,
+  unsigned long gfn,
+  unsigned 

[Xen-devel] [PATCH 13/16] libxl: construct e820 map with RDM information for HVM guest

2015-07-22 Thread Ian Jackson
From: Tiejun Chen tiejun.c...@intel.com

Here we'll construct a basic guest e820 table via
XENMEM_set_memory_map. This table includes lowmem, highmem
and RDMs if they exist, and hvmloader would need this info
later.

Note this guest e820 table would be same as before if the
platform has no any RDM or we disable RDM (by default).

CC: Ian Jackson ian.jack...@eu.citrix.com
CC: Stefano Stabellini stefano.stabell...@eu.citrix.com
CC: Ian Campbell ian.campb...@citrix.com
CC: Wei Liu wei.l...@citrix.com
Signed-off-by: Tiejun Chen tiejun.c...@intel.com
Acked-by: Ian Jackson ian.jack...@eu.citrix.com
Checked-by: Ian Jackson ian.jack...@eu.citrix.com
---
 tools/libxl/libxl_arch.h |7 
 tools/libxl/libxl_arm.c  |8 +
 tools/libxl/libxl_dom.c  |5 +++
 tools/libxl/libxl_x86.c  |   83 ++
 4 files changed, 103 insertions(+)

diff --git a/tools/libxl/libxl_arch.h b/tools/libxl/libxl_arch.h
index 9a80d43..bd030b6 100644
--- a/tools/libxl/libxl_arch.h
+++ b/tools/libxl/libxl_arch.h
@@ -55,4 +55,11 @@ int libxl__arch_vnuma_build_vmemrange(libxl__gc *gc,
 _hidden
 int libxl__arch_domain_map_irq(libxl__gc *gc, uint32_t domid, int irq);
 
+/* arch specific to construct memory mapping function */
+_hidden
+int libxl__arch_domain_construct_memmap(libxl__gc *gc,
+libxl_domain_config *d_config,
+uint32_t domid,
+struct xc_hvm_build_args *args);
+
 #endif
diff --git a/tools/libxl/libxl_arm.c b/tools/libxl/libxl_arm.c
index d306905..42ab6d8 100644
--- a/tools/libxl/libxl_arm.c
+++ b/tools/libxl/libxl_arm.c
@@ -969,6 +969,14 @@ int libxl__arch_domain_map_irq(libxl__gc *gc, uint32_t 
domid, int irq)
 return xc_domain_bind_pt_spi_irq(CTX-xch, domid, irq, irq);
 }
 
+int libxl__arch_domain_construct_memmap(libxl__gc *gc,
+libxl_domain_config *d_config,
+uint32_t domid,
+struct xc_hvm_build_args *args)
+{
+return 0;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 0b7c39d..a76d4b3 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -1012,6 +1012,11 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
 goto out;
 }
 
+if (libxl__arch_domain_construct_memmap(gc, d_config, domid, args)) {
+LOG(ERROR, setting domain memory map failed);
+goto out;
+}
+
 ret = hvm_build_set_params(ctx-xch, domid, info, state-store_port,
state-store_mfn, state-console_port,
state-console_mfn, state-store_domid,
diff --git a/tools/libxl/libxl_x86.c b/tools/libxl/libxl_x86.c
index 8cd15ca..b3cf3e2 100644
--- a/tools/libxl/libxl_x86.c
+++ b/tools/libxl/libxl_x86.c
@@ -445,6 +445,89 @@ int libxl__arch_domain_map_irq(libxl__gc *gc, uint32_t 
domid, int irq)
 }
 
 /*
+ * Here we're just trying to set these kinds of e820 mappings:
+ *
+ * #1. Low memory region
+ *
+ * Low RAM starts at least from 1M to make sure all standard regions
+ * of the PC memory map, like BIOS, VGA memory-mapped I/O and vgabios,
+ * have enough space.
+ * Note: Those stuffs below 1M are still constructed with multiple
+ * e820 entries by hvmloader. At this point we don't change anything.
+ *
+ * #2. RDM region if it exists
+ *
+ * #3. High memory region if it exists
+ *
+ * Note: these regions are not overlapping since we already check
+ * to adjust them. Please refer to libxl__domain_device_construct_rdm().
+ */
+#define GUEST_LOW_MEM_START_DEFAULT 0x10
+int libxl__arch_domain_construct_memmap(libxl__gc *gc,
+libxl_domain_config *d_config,
+uint32_t domid,
+struct xc_hvm_build_args *args)
+{
+int rc = 0;
+unsigned int nr = 0, i;
+/* We always own at least one lowmem entry. */
+unsigned int e820_entries = 1;
+struct e820entry *e820 = NULL;
+uint64_t highmem_size =
+args-highmem_end ? args-highmem_end - (1ull  32) : 0;
+
+/* Add all rdm entries. */
+for (i = 0; i  d_config-num_rdms; i++)
+if (d_config-rdms[i].policy != LIBXL_RDM_RESERVE_POLICY_INVALID)
+e820_entries++;
+
+
+/* If we should have a highmem range. */
+if (highmem_size)
+e820_entries++;
+
+if (e820_entries = E820MAX) {
+LOG(ERROR, Ooops! Too many entries in the memory map!\n);
+rc = ERROR_INVAL;
+goto out;
+}
+
+e820 = libxl__malloc(gc, sizeof(struct e820entry) * e820_entries);
+
+/* Low memory */
+e820[nr].addr = GUEST_LOW_MEM_START_DEFAULT;
+e820[nr].size = args-lowmem_end - GUEST_LOW_MEM_START_DEFAULT;
+e820[nr].type = E820_RAM;
+nr++;
+
+/* RDM mapping */
+for (i = 0; i 

[Xen-devel] [PATCH 12/16] tools: introduce a new parameter to set a predefined rdm boundary

2015-07-22 Thread Ian Jackson
From: Tiejun Chen tiejun.c...@intel.com

Previously we always fix that predefined boundary as 2G to handle
conflict between memory and rdm, but now this predefined boundar
can be changes with the parameter rdm_mem_boundary in .cfg file.

CC: Ian Jackson ian.jack...@eu.citrix.com
CC: Stefano Stabellini stefano.stabell...@eu.citrix.com
CC: Ian Campbell ian.campb...@citrix.com
CC: Wei Liu wei.l...@citrix.com
Signed-off-by: Tiejun Chen tiejun.c...@intel.com
Acked-by: Wei Liu wei.l...@citrix.com
Acked-by: Ian Jackson ian.jack...@eu.citrix.com
Checked-by: Ian Jackson ian.jack...@eu.citrix.com
---
 docs/man/xl.cfg.pod.5   |   22 ++
 tools/libxl/libxl.h |6 ++
 tools/libxl/libxl_create.c  |4 
 tools/libxl/libxl_dom.c |8 +---
 tools/libxl/libxl_types.idl |1 +
 tools/libxl/xl_cmdimpl.c|3 +++
 6 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
index e6e0f70..ce7ce85 100644
--- a/docs/man/xl.cfg.pod.5
+++ b/docs/man/xl.cfg.pod.5
@@ -845,6 +845,28 @@ More information about Xen gfx_passthru feature is 
available
 on the XenVGAPassthrough Lhttp://wiki.xen.org/wiki/XenVGAPassthrough
 wiki page.
 
+=item Brdm_mem_boundary=MBYTES
+
+Number of megabytes to set a boundary for checking rdm conflict.
+
+When RDM conflicts with RAM, RDM probably scatter the whole RAM space.
+Especially multiple RDM entries would worsen this to lead a complicated
+memory layout. So here we're trying to figure out a simple solution to
+avoid breaking existing layout. So when a conflict occurs,
+
+#1. Above a predefined boundary
+- move lowmem_end below reserved region to solve conflict;
+
+#2. Below a predefined boundary
+- Check strict/relaxed policy.
+strict policy leads to fail libxl. Note when both policies
+are specified on a given region, 'strict' is always preferred.
+relaxed policy issue a warning message and also mask this
+entry INVALID to indicate we shouldn't expose this entry to
+hvmloader.
+
+Here the default is 2G.
+
 =item Bdtdev=[ DTDEV_PATH, DTDEV_PATH, ... ]
 
 Specifies the host device tree nodes to passthrough to this guest. Each
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index 5a7308d..927b2d8 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -909,6 +909,12 @@ const char *libxl_defbool_to_string(libxl_defbool b);
 #define LIBXL_TIMER_MODE_DEFAULT -1
 #define LIBXL_MEMKB_DEFAULT ~0ULL
 
+/*
+ * We'd like to set a memory boundary to determine if we need to check
+ * any overlap with reserved device memory.
+ */
+#define LIBXL_RDM_MEM_BOUNDARY_MEMKB_DEFAULT (2048 * 1024)
+
 #define LIBXL_MS_VM_GENID_LEN 16
 typedef struct {
 uint8_t bytes[LIBXL_MS_VM_GENID_LEN];
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 5b57062..b27c53a 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -54,6 +54,10 @@ void libxl__rdm_setdefault(libxl__gc *gc, 
libxl_domain_build_info *b_info)
 {
 if (b_info-u.hvm.rdm.policy == LIBXL_RDM_RESERVE_POLICY_INVALID)
 b_info-u.hvm.rdm.policy = LIBXL_RDM_RESERVE_POLICY_RELAXED;
+
+if (b_info-u.hvm.rdm_mem_boundary_memkb == LIBXL_MEMKB_DEFAULT)
+b_info-u.hvm.rdm_mem_boundary_memkb =
+LIBXL_RDM_MEM_BOUNDARY_MEMKB_DEFAULT;
 }
 
 int libxl__domain_build_info_setdefault(libxl__gc *gc,
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 9af3b21..0b7c39d 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -930,12 +930,6 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
 int ret, rc = ERROR_FAIL;
 uint64_t mmio_start, lowmem_end, highmem_end;
 libxl_domain_build_info *const info = d_config-b_info;
-/*
- * Currently we fix this as 2G to guarantee how to handle
- * our rdm policy. But we'll provide a parameter to set
- * this dynamically.
- */
-uint64_t rdm_mem_boundary = 0x8000;
 
 memset(args, 0, sizeof(struct xc_hvm_build_args));
 /* The params from the configuration file are in Mb, which are then
@@ -974,7 +968,7 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
 args.mmio_start = mmio_start;
 
 rc = libxl__domain_device_construct_rdm(gc, d_config,
-rdm_mem_boundary,
+
info-u.hvm.rdm_mem_boundary_memkb*1024,
 args);
 if (rc) {
 LOG(ERROR, checking reserved device memory failed);
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 157fa59..9caaf44 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -503,6 +503,7 @@ libxl_domain_build_info = Struct(domain_build_info,[
(ms_vm_genid,  libxl_ms_vm_genid),
(serial_list,  

[Xen-devel] [PATCH 16/16] tools: parse to enable new rdm policy parameters

2015-07-22 Thread Ian Jackson
From: Tiejun Chen tiejun.c...@intel.com

This patch parses to enable user configurable parameters to specify
RDM resource and according policies which are defined previously,

Global RDM parameter:
rdm = strategy=host,policy=strict/relaxed
Per-device RDM parameter:
pci = [ 'sbdf, rdm_policy=strict/relaxed' ]

Default per-device RDM policy is same as default global RDM policy as being
'relaxed'. And the per-device policy would override the global policy like
others.

CC: Ian Jackson ian.jack...@eu.citrix.com
CC: Stefano Stabellini stefano.stabell...@eu.citrix.com
CC: Ian Campbell ian.campb...@citrix.com
CC: Wei Liu wei.l...@citrix.com
Signed-off-by: Tiejun Chen tiejun.c...@intel.com
Acked-by: Wei Liu wei.l...@citrix.com
---
 tools/libxl/libxlu_pci.c |   92 +-
 tools/libxl/libxlutil.h  |4 ++
 tools/libxl/xl_cmdimpl.c |   13 +++
 3 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/tools/libxl/libxlu_pci.c b/tools/libxl/libxlu_pci.c
index 26fb143..026413b 100644
--- a/tools/libxl/libxlu_pci.c
+++ b/tools/libxl/libxlu_pci.c
@@ -42,6 +42,9 @@ static int pcidev_struct_fill(libxl_device_pci *pcidev, 
unsigned int domain,
 #define STATE_OPTIONS_K 6
 #define STATE_OPTIONS_V 7
 #define STATE_TERMINAL  8
+#define STATE_TYPE  9
+#define STATE_RDM_STRATEGY  10
+#define STATE_RESERVE_POLICY11
 int xlu_pci_parse_bdf(XLU_Config *cfg, libxl_device_pci *pcidev, const char 
*str)
 {
 unsigned state = STATE_DOMAIN;
@@ -143,7 +146,18 @@ int xlu_pci_parse_bdf(XLU_Config *cfg, libxl_device_pci 
*pcidev, const char *str
 pcidev-permissive = atoi(tok);
 }else if ( !strcmp(optkey, seize) ) {
 pcidev-seize = atoi(tok);
-}else{
+} else if (!strcmp(optkey, rdm_policy)) {
+if (!strcmp(tok, strict)) {
+pcidev-rdm_policy = LIBXL_RDM_RESERVE_POLICY_STRICT;
+} else if (!strcmp(tok, relaxed)) {
+pcidev-rdm_policy = LIBXL_RDM_RESERVE_POLICY_RELAXED;
+} else {
+XLU__PCI_ERR(cfg, %s is not an valid PCI RDM property
+   policy: 'strict' or 'relaxed'.,
+ tok);
+goto parse_error;
+}
+} else {
 XLU__PCI_ERR(cfg, Unknown PCI BDF option: %s, optkey);
 }
 tok = ptr + 1;
@@ -167,6 +181,82 @@ parse_error:
 return ERROR_INVAL;
 }
 
+int xlu_rdm_parse(XLU_Config *cfg, libxl_rdm_reserve *rdm, const char *str)
+{
+unsigned state = STATE_TYPE;
+char *buf2, *tok, *ptr, *end;
+
+if (NULL == (buf2 = ptr = strdup(str)))
+return ERROR_NOMEM;
+
+for (tok = ptr, end = ptr + strlen(ptr) + 1; ptr  end; ptr++) {
+switch(state) {
+case STATE_TYPE:
+if (*ptr == '=') {
+state = STATE_RDM_STRATEGY;
+*ptr = '\0';
+if (strcmp(tok, strategy)) {
+XLU__PCI_ERR(cfg, Unknown RDM state option: %s, tok);
+goto parse_error;
+}
+tok = ptr + 1;
+}
+break;
+case STATE_RDM_STRATEGY:
+if (*ptr == '\0' || *ptr == ',') {
+state = STATE_RESERVE_POLICY;
+*ptr = '\0';
+if (!strcmp(tok, host)) {
+rdm-strategy = LIBXL_RDM_RESERVE_STRATEGY_HOST;
+} else {
+XLU__PCI_ERR(cfg, Unknown RDM strategy option: %s, tok);
+goto parse_error;
+}
+tok = ptr + 1;
+}
+break;
+case STATE_RESERVE_POLICY:
+if (*ptr == '=') {
+state = STATE_OPTIONS_V;
+*ptr = '\0';
+if (strcmp(tok, policy)) {
+XLU__PCI_ERR(cfg, Unknown RDM property value: %s, tok);
+goto parse_error;
+}
+tok = ptr + 1;
+}
+break;
+case STATE_OPTIONS_V:
+if (*ptr == ',' || *ptr == '\0') {
+state = STATE_TERMINAL;
+*ptr = '\0';
+if (!strcmp(tok, strict)) {
+rdm-policy = LIBXL_RDM_RESERVE_POLICY_STRICT;
+} else if (!strcmp(tok, relaxed)) {
+rdm-policy = LIBXL_RDM_RESERVE_POLICY_RELAXED;
+} else {
+XLU__PCI_ERR(cfg, Unknown RDM property policy value: %s,
+ tok);
+goto parse_error;
+}
+tok = ptr + 1;
+}
+default:
+break;
+}
+}
+
+free(buf2);
+
+if (tok != ptr || state != STATE_TERMINAL)
+goto parse_error;
+
+

[Xen-devel] [PATCH 04/16] xen: enable XENMEM_memory_map in hvm

2015-07-22 Thread Ian Jackson
From: Tiejun Chen tiejun.c...@intel.com

This patch enables XENMEM_memory_map in hvm. So hvmloader can
use it to setup the e820 mappings.

CC: Keir Fraser k...@xen.org
CC: Jan Beulich jbeul...@suse.com
CC: Andrew Cooper andrew.coop...@citrix.com
Signed-off-by: Tiejun Chen tiejun.c...@intel.com
Reviewed-by: Tim Deegan t...@xen.org
Reviewed-by: Kevin Tian kevin.t...@intel.com
Acked-by: Jan Beulich jbeul...@suse.com
Acked-by: George Dunlap george.dun...@eu.citrix.com
---
 xen/arch/x86/hvm/hvm.c |2 --
 xen/arch/x86/mm.c  |6 --
 2 files changed, 8 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index c07e3ef..d860579 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -4855,7 +4855,6 @@ static long hvm_memory_op(int cmd, 
XEN_GUEST_HANDLE_PARAM(void) arg)
 
 switch ( cmd  MEMOP_CMD_MASK )
 {
-case XENMEM_memory_map:
 case XENMEM_machine_memory_map:
 case XENMEM_machphys_mapping:
 return -ENOSYS;
@@ -4931,7 +4930,6 @@ static long hvm_memory_op_compat32(int cmd, 
XEN_GUEST_HANDLE_PARAM(void) arg)
 
 switch ( cmd  MEMOP_CMD_MASK )
 {
-case XENMEM_memory_map:
 case XENMEM_machine_memory_map:
 case XENMEM_machphys_mapping:
 return -ENOSYS;
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 342414f..8c887d8 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4717,12 +4717,6 @@ long arch_memory_op(unsigned long cmd, 
XEN_GUEST_HANDLE_PARAM(void) arg)
 return rc;
 }
 
-if ( is_hvm_domain(d) )
-{
-rcu_unlock_domain(d);
-return -EPERM;
-}
-
 e820 = xmalloc_array(e820entry_t, fmap.map.nr_entries);
 if ( e820 == NULL )
 {
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 09/16] tools: extend xc_assign_device() to support rdm reservation policy

2015-07-22 Thread Ian Jackson
From: Tiejun Chen tiejun.c...@intel.com

This patch passes rdm reservation policy to xc_assign_device() so the policy
is checked when assigning devices to a VM.

Note this also bring some fallout to python usage of xc_assign_device().

CC: Ian Jackson ian.jack...@eu.citrix.com
CC: Stefano Stabellini stefano.stabell...@eu.citrix.com
CC: Ian Campbell ian.campb...@citrix.com
CC: Wei Liu wei.l...@citrix.com
CC: David Scott dave.sc...@eu.citrix.com
Signed-off-by: Tiejun Chen tiejun.c...@intel.com
Acked-by: Wei Liu wei.l...@citrix.com
---
 tools/libxc/include/xenctrl.h   |3 ++-
 tools/libxc/xc_domain.c |9 -
 tools/libxl/libxl_pci.c |3 ++-
 tools/ocaml/libs/xc/xenctrl_stubs.c |   16 
 tools/python/xen/lowlevel/xc/xc.c   |   30 --
 5 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 9c5ef8b..3f2381a 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2067,7 +2067,8 @@ int xc_hvm_destroy_ioreq_server(xc_interface *xch,
 /* HVM guest pass-through */
 int xc_assign_device(xc_interface *xch,
  uint32_t domid,
- uint32_t machine_sbdf);
+ uint32_t machine_sbdf,
+ uint32_t flag);
 
 int xc_get_device_group(xc_interface *xch,
  uint32_t domid,
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index 3151103..df06314 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -1697,7 +1697,8 @@ int xc_domain_setdebugging(xc_interface *xch,
 int xc_assign_device(
 xc_interface *xch,
 uint32_t domid,
-uint32_t machine_sbdf)
+uint32_t machine_sbdf,
+uint32_t flag)
 {
 DECLARE_DOMCTL;
 
@@ -1705,6 +1706,7 @@ int xc_assign_device(
 domctl.domain = domid;
 domctl.u.assign_device.dev = XEN_DOMCTL_DEV_PCI;
 domctl.u.assign_device.u.pci.machine_sbdf = machine_sbdf;
+domctl.u.assign_device.flag = flag;
 
 return do_domctl(xch, domctl);
 }
@@ -1792,6 +1794,11 @@ int xc_assign_dt_device(
 
 domctl.u.assign_device.dev = XEN_DOMCTL_DEV_DT;
 domctl.u.assign_device.u.dt.size = size;
+/*
+ * DT doesn't own any RDM so actually DT has nothing to do
+ * for any flag and here just fix that as 0.
+ */
+domctl.u.assign_device.flag = 0;
 set_xen_guest_handle(domctl.u.assign_device.u.dt.path, path);
 
 rc = do_domctl(xch, domctl);
diff --git a/tools/libxl/libxl_pci.c b/tools/libxl/libxl_pci.c
index e0743f8..632c15e 100644
--- a/tools/libxl/libxl_pci.c
+++ b/tools/libxl/libxl_pci.c
@@ -894,6 +894,7 @@ static int do_pci_add(libxl__gc *gc, uint32_t domid, 
libxl_device_pci *pcidev, i
 FILE *f;
 unsigned long long start, end, flags, size;
 int irq, i, rc, hvm = 0;
+uint32_t flag = XEN_DOMCTL_DEV_RDM_RELAXED;
 
 if (type == LIBXL_DOMAIN_TYPE_INVALID)
 return ERROR_FAIL;
@@ -987,7 +988,7 @@ static int do_pci_add(libxl__gc *gc, uint32_t domid, 
libxl_device_pci *pcidev, i
 
 out:
 if (!libxl_is_stubdom(ctx, domid, NULL)) {
-rc = xc_assign_device(ctx-xch, domid, pcidev_encode_bdf(pcidev));
+rc = xc_assign_device(ctx-xch, domid, pcidev_encode_bdf(pcidev), 
flag);
 if (rc  0  (hvm || errno != ENOSYS)) {
 LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, xc_assign_device failed);
 return ERROR_FAIL;
diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c 
b/tools/ocaml/libs/xc/xenctrl_stubs.c
index 64f1137..b7de615 100644
--- a/tools/ocaml/libs/xc/xenctrl_stubs.c
+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
@@ -1172,12 +1172,17 @@ CAMLprim value stub_xc_domain_test_assign_device(value 
xch, value domid, value d
CAMLreturn(Val_bool(ret == 0));
 }
 
-CAMLprim value stub_xc_domain_assign_device(value xch, value domid, value desc)
+static int domain_assign_device_rdm_flag_table[] = {
+XEN_DOMCTL_DEV_RDM_RELAXED,
+};
+
+CAMLprim value stub_xc_domain_assign_device(value xch, value domid, value desc,
+value rflag)
 {
-   CAMLparam3(xch, domid, desc);
+   CAMLparam4(xch, domid, desc, rflag);
int ret;
int domain, bus, dev, func;
-   uint32_t sbdf;
+   uint32_t sbdf, flag;
 
domain = Int_val(Field(desc, 0));
bus = Int_val(Field(desc, 1));
@@ -1185,7 +1190,10 @@ CAMLprim value stub_xc_domain_assign_device(value xch, 
value domid, value desc)
func = Int_val(Field(desc, 3));
sbdf = encode_sbdf(domain, bus, dev, func);
 
-   ret = xc_assign_device(_H(xch), _D(domid), sbdf);
+   ret = Int_val(Field(rflag, 0));
+   flag = domain_assign_device_rdm_flag_table[ret];
+
+   ret = xc_assign_device(_H(xch), _D(domid), sbdf, flag);
 
if (ret  0)
failwith_xc(_H(xch));
diff --git a/tools/python/xen/lowlevel/xc/xc.c 
b/tools/python/xen/lowlevel/xc/xc.c
index 

Re: [Xen-devel] PV-vNUMA issue: topology is misinterpreted by the guest

2015-07-22 Thread Dario Faggioli
On Wed, 2015-07-22 at 11:32 -0400, Boris Ostrovsky wrote:
 On 07/22/2015 10:50 AM, Dario Faggioli wrote:

  Yep. Exacty. As Boris says, this is a generic scheduling issue, although
  it's tru that it's only (as far as I can tell) with vNUMA that it bite
  us so hard...
 
 I am not sure that it's only vNUMA. It's just that with vNUMA we can see 
 a warning (on your system) that something goes wrong. In other cases 
 (like scheduling, or sizing objects based on discovered cache sizes) we 
 don't see anything in the log but system/programs are making wrong 
 decisions. 

I'm not questioning that the guest scheduler is put in a position where
it may make weird and/or wrong choices... Let's put it like this: on
that very test box, or any other NUMA box I've worked on, I've never
seen performance affected _so_much_ as in the vNUMA case.

In fact, of course there are other issues (like the ones you're
mentioning, caused by this), but it's only with vNUMA that I see 2 out
of 4 vcpus completely lost! :-/

 (And your results above may well be the example of that)
 
Right. BTW, the example was meant at investigating what you suggested,
i.e., to have the guest topology follow the host topology. I tried, but
could not achieve it. Do you think I'm doing something wrong? May it be
that it's not always doable (ISTR Andrew saying that it is also the pcpu
where the vcpus are created that matters)?

I'm (re)asking because, if you think that's feasible, I can work on
that. I fit's not, well... :-/

Regards,
Dario
-- 
This happens because I choose it to happen! (Raistlin Majere)
-
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems RD Ltd., Cambridge (UK)


signature.asc
Description: This is a digitally signed message part
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [v11][PATCH 11/16] tools/libxl: detect and avoid conflicts with RDM

2015-07-22 Thread Ian Jackson
Chen, Tiejun writes (Re: [v11][PATCH 11/16] tools/libxl: detect and avoid 
conflicts with RDM):
 We just need to sync two patches:

Thanks.

 Note I just compiled with these changes since right now I can't access 
 any machine to test.

I intend to produce a git branch RSN.

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v4 2/3] libxl: localtime(3) can return NULL

2015-07-22 Thread Wei Liu
On Wed, Jul 22, 2015 at 03:19:08PM +0100, Ian Campbell wrote:
 On Fri, 2015-07-17 at 18:12 +0100, Wei Liu wrote:
  On Fri, Jul 17, 2015 at 06:05:45PM +0100, Ian Jackson wrote:
   Wei Liu writes ([PATCH v4 2/3] libxl: localtime(3) can return 
   NULL):
Signed-off-by: Wei Liu wei.l...@citrix.com
   
 tm = localtime(t);
+if (!tm) {
+LOG(ERROR, Failed to call localtime);
   
   localtime sets errno if it fails.  So you mean LOGE.
   
  
  Heh. Linux manpage doesn't say so. But
  
  http://pubs.opengroup.org/onlinepubs/009695399/functions/localtime.ht
  ml
  
  does say that.
  
  I will rework this patch and send it out next week with other 
  coverity
  scan inspired patches I accumulate.
 
 Should we not be using localtime_t in libxl? Afterall we don't know
 what other threads in the application might be doing.
 

I guess you mean localtime_r.

I agree with you we should use that one.

Wei.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] xen/HVM: atomically access pointers in bufioreq handling

2015-07-22 Thread Jan Beulich
 On 22.07.15 at 16:50, stefano.stabell...@eu.citrix.com wrote:
 On Wed, 22 Jul 2015, Jan Beulich wrote:
  --- a/xen-hvm.c
  +++ b/xen-hvm.c
  @@ -981,19 +981,30 @@ static void handle_ioreq(XenIOState *sta
   
   static int handle_buffered_iopage(XenIOState *state)
   {
  +buffered_iopage_t *buf_page = state-buffered_io_page;
   buf_ioreq_t *buf_req = NULL;
   ioreq_t req;
   int qw;
   
  -if (!state-buffered_io_page) {
  +if (!buf_page) {
   return 0;
   }
   
   memset(req, 0x00, sizeof(req));
   
  -while (state-buffered_io_page-read_pointer != 
  state-buffered_io_page-write_pointer) {
  -buf_req = state-buffered_io_page-buf_ioreq[
  -state-buffered_io_page-read_pointer % 
  IOREQ_BUFFER_SLOT_NUM];
  +for (;;) {
  +uint32_t rdptr = buf_page-read_pointer, wrptr;
  +
  +xen_rmb();
  
  We don't need this barrier.
 
 How would we not? We need to make sure we read in this order
 read_pointer, write_pointer, and read_pointer again (in the
 comparison).  Only that way we can be certain to hold a matching
 pair in hands at the end.
 
 See below
 
 
  +wrptr = buf_page-write_pointer;
  +xen_rmb();
  +if (rdptr != buf_page-read_pointer) {
  
  I think you have to use atomic_read to be sure that the second read to
  buf_page-read_pointer is up to date and not optimized away.
 
 No, suppressing such an optimization is an intended (side) effect
 of the barriers used.
 
 I understand what you are saying but I am not sure if your assumption
 is correct. Can the compiler optimize the second read anyway?

No, it can't, due to the barrier.

  But if I think that it would be best to simply use atomic_read to read
  both pointers at once using uint64_t as type, so you are sure to get a
  consistent view and there is no need for this check.
 
 But I'm specifically trying to avoid e.g. a locked cmpxchg8b here on
 ix86.
 
 OK, but we don't need cmpxchg8b, just:
 
 #define atomic_read(ptr)   (*(__typeof__(*ptr) volatile*) (ptr))

This only gives the impression of being atomic when the type is wider
than a machine word. There's no ix86 (i.e. 32-bit) instruction other
than LOCK CMPXCHG8B (and possibly MMX/SSE/AVX ones) allowing
to atomically read a 64-bit quantity.

 something like:
 
  for (;;) {
  uint64_t ptrs;
  uint32_t rdptr, wrptr;
  
  ptrs = atomic_read((uint64_t*)state-buffered_io_page-read_pointer);
  rdptr = (uint32_t)ptrs;
  wrptr = *(((uint32_t*)ptrs) + 1);
  
  if (rdptr == wrptr) {
  continue;
  }
  
  [work]
  
  atomic_add(buf_page-read_pointer, qw + 1);
  }
 
 it would work, wouldn't it?

Looks like so, but the amount of casts alone makes me wish for
no-one to consider this (but I agree that the casts could be
taken care of). Still I think (as btw done elsewhere) the lock
free access is preferable.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 08/16] tools/libxc: Expose new hypercall xc_reserved_device_memory_map

2015-07-22 Thread Wei Liu
On Wed, Jul 22, 2015 at 04:44:11PM +0100, Ian Jackson wrote:
 From: Tiejun Chen tiejun.c...@intel.com
 
 We will introduce the hypercall xc_reserved_device_memory_map
 approach to libxc. This helps us get rdm entry info according to
 different parameters. If flag == PCI_DEV_RDM_ALL, all entries
 should be exposed. Or we just expose that rdm entry specific to
 a SBDF.
 
 CC: Ian Jackson ian.jack...@eu.citrix.com
 CC: Stefano Stabellini stefano.stabell...@eu.citrix.com
 CC: Ian Campbell ian.campb...@citrix.com
 CC: Wei Liu wei.l...@citrix.com
 Signed-off-by: Tiejun Chen tiejun.c...@intel.com
 Reviewed-by: Kevin Tian kevin.t...@intel.com
 Signed-off-by: Ian Jackson ian.jack...@eu.citrix.com
 

Acked-by: Wei Liu wei.l...@citrix.com

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v13 00/16] Fix RMRR (avoid RDM)

2015-07-22 Thread Ian Jackson
Ian Jackson writes ([PATCH v13 00/16] Fix RMRR (avoid RDM)):
 Wei, can you please give your formal ack for the two changed patches ?
 
 Tiejun, can you please test this series ?  Hopefully if you say it
 still works after this, we can commit it tomorrow.

FAOD, I have not executed it, although I have checked that it builds
(on amd64).

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [v11][PATCH 11/16] tools/libxl: detect and avoid conflicts with RDM

2015-07-22 Thread Chen, Tiejun
OOPS! Please refer to this version: (One miss changing flag to flags in 
patch #11 although we can compile successfully.)



#1. To patch #8

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 2991333..9c5ef8b 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -1316,7 +1316,7 @@ int xc_get_machine_memory_map(xc_interface *xch,
   uint32_t max_entries);

 int xc_reserved_device_memory_map(xc_interface *xch,
-  uint32_t flag,
+  uint32_t flags,
   uint16_t seg,
   uint8_t bus,
   uint8_t devfn,
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index 298b3b5..1b074b7 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -686,7 +686,7 @@ int xc_domain_set_memory_map(xc_interface *xch,
 }

 int xc_reserved_device_memory_map(xc_interface *xch,
-  uint32_t flag,
+  uint32_t flags,
   uint16_t seg,
   uint8_t bus,
   uint8_t devfn,
@@ -695,11 +695,11 @@ int xc_reserved_device_memory_map(xc_interface *xch,
 {
 int rc;
 struct xen_reserved_device_memory_map xrdmmap = {
-.flag = flag,
-.seg = seg,
-.bus = bus,
-.devfn = devfn,
-.nr_entries = *max_entries
+.flags = flags,
+.nr_entries = *max_entries,
+.dev.pci.seg = seg,
+.dev.pci.bus = bus,
+.dev.pci.devfn = devfn,
 };
 DECLARE_HYPERCALL_BOUNCE(entries,
  sizeof(struct xen_reserved_device_memory) *

#2. To patch #11
diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
index 29476fc..8d103c3 100644
--- a/tools/libxl/libxl_dm.c
+++ b/tools/libxl/libxl_dm.c
@@ -94,7 +94,7 @@ const char *libxl__domain_device_model(libxl__gc *gc,

 static int
 libxl__xc_device_get_rdm(libxl__gc *gc,
- uint32_t flag,
+ uint32_t flags,
  uint16_t seg,
  uint8_t bus,
  uint8_t devfn,
@@ -107,7 +107,7 @@ libxl__xc_device_get_rdm(libxl__gc *gc,
  * We really can't presume how many entries we can get in advance.
  */
 *nr_entries = 0;
-r = xc_reserved_device_memory_map(CTX-xch, flag, seg, bus, devfn,
+r = xc_reserved_device_memory_map(CTX-xch, flags, seg, bus, devfn,
   NULL, nr_entries);
 assert(r = 0);
 /* 0 means we have no any rdm entry. */
@@ -119,7 +119,7 @@ libxl__xc_device_get_rdm(libxl__gc *gc,
 }

 GCNEW_ARRAY(*xrdm, *nr_entries);
-r = xc_reserved_device_memory_map(CTX-xch, flag, seg, bus, devfn,
+r = xc_reserved_device_memory_map(CTX-xch, flags, seg, bus, devfn,
   *xrdm, nr_entries);
 if (r)
 rc = ERROR_FAIL;
@@ -212,7 +212,7 @@ int libxl__domain_device_construct_rdm(libxl__gc *gc,
 unsigned int nr_entries;

 /* Collect all rdm info if exist. */
-rc = libxl__xc_device_get_rdm(gc, PCI_DEV_RDM_ALL,
+rc = libxl__xc_device_get_rdm(gc, XENMEM_RDM_ALL,
   0, 0, 0, nr_entries, xrdm);
 if (rc)
 goto out;
@@ -240,7 +240,7 @@ int libxl__domain_device_construct_rdm(libxl__gc *gc,
 devfn = PCI_DEVFN(d_config-pcidevs[i].dev,
   d_config-pcidevs[i].func);
 nr_entries = 0;
-rc = libxl__xc_device_get_rdm(gc, ~PCI_DEV_RDM_ALL,
+rc = libxl__xc_device_get_rdm(gc, 0,
   seg, bus, devfn, nr_entries, 
xrdm);

 if (rc)
 goto out;

Thanks
Tiejun

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2] x86/HVM: avoid pointer wraparound in bufioreq handling

2015-07-22 Thread Stefano Stabellini
On Wed, 22 Jul 2015, Jan Beulich wrote:
  On 22.07.15 at 16:49, stefano.stabell...@eu.citrix.com wrote:
  On Wed, 22 Jul 2015, Jan Beulich wrote:
   On 21.07.15 at 18:18, stefano.stabell...@eu.citrix.com wrote:
   On Thu, 18 Jun 2015, Jan Beulich wrote:
   --- a/xen/arch/x86/hvm/hvm.c
   +++ b/xen/arch/x86/hvm/hvm.c
   @@ -921,7 +921,7 @@ static void hvm_ioreq_server_disable(str

static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct 
   domain *d,
 domid_t domid, bool_t is_default,
   - bool_t handle_bufioreq, ioservid_t id)
   + int bufioreq_handling, ioservid_t id)
   
   uint8_t?
  
  Why? I'm generally against using fixed width types when you don't
  really need them. And using uint_least8_t or uint_fast8_t is neither
  an opton, nor would it make the code look reasonable. Plain int is
  just fine here.
  
  You are not just changing integer size but also switching from unsigned
  to signed implicitly. I think it is not a good coding practice.
 
 To me bool (and by implication bool_t) is neither a signed nor
 an unsigned type.

I meant that handle_bufioreq, a uint8_t, is actually transparently
passed to hvm_create_ioreq_server, that takes an int as argument.
I think it should be avoided, or casted explicitly to avoid confusion
in the future.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v4 2/3] libxl: localtime(3) can return NULL

2015-07-22 Thread Ian Campbell
On Wed, 2015-07-22 at 16:09 +0100, Wei Liu wrote:
 On Wed, Jul 22, 2015 at 03:19:08PM +0100, Ian Campbell wrote:
  On Fri, 2015-07-17 at 18:12 +0100, Wei Liu wrote:
   On Fri, Jul 17, 2015 at 06:05:45PM +0100, Ian Jackson wrote:
Wei Liu writes ([PATCH v4 2/3] libxl: localtime(3) can return 
NULL):
 Signed-off-by: Wei Liu wei.l...@citrix.com

  tm = localtime(t);
 +if (!tm) {
 +LOG(ERROR, Failed to call localtime);

localtime sets errno if it fails.  So you mean LOGE.

   
   Heh. Linux manpage doesn't say so. But
   
   http://pubs.opengroup.org/onlinepubs/009695399/functions/localtim
   e.ht
   ml
   
   does say that.
   
   I will rework this patch and send it out next week with other 
   coverity
   scan inspired patches I accumulate.
  
  Should we not be using localtime_t in libxl? Afterall we don't know
  what other threads in the application might be doing.
  
 
 I guess you mean localtime_r.

yes.

 I agree with you we should use that one.
 
 Wei.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2] x86/HVM: avoid pointer wraparound in bufioreq handling

2015-07-22 Thread Jan Beulich
 On 22.07.15 at 16:49, stefano.stabell...@eu.citrix.com wrote:
 On Wed, 22 Jul 2015, Jan Beulich wrote:
  On 21.07.15 at 18:18, stefano.stabell...@eu.citrix.com wrote:
  On Thu, 18 Jun 2015, Jan Beulich wrote:
  --- a/xen/arch/x86/hvm/hvm.c
  +++ b/xen/arch/x86/hvm/hvm.c
  @@ -921,7 +921,7 @@ static void hvm_ioreq_server_disable(str
   
   static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct 
  domain *d,
domid_t domid, bool_t is_default,
  - bool_t handle_bufioreq, ioservid_t id)
  + int bufioreq_handling, ioservid_t id)
  
  uint8_t?
 
 Why? I'm generally against using fixed width types when you don't
 really need them. And using uint_least8_t or uint_fast8_t is neither
 an opton, nor would it make the code look reasonable. Plain int is
 just fine here.
 
 You are not just changing integer size but also switching from unsigned
 to signed implicitly. I think it is not a good coding practice.

To me bool (and by implication bool_t) is neither a signed nor
an unsigned type.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [v11][PATCH 11/16] tools/libxl: detect and avoid conflicts with RDM

2015-07-22 Thread Chen, Tiejun

I intend to produce a git branch RSN.



On the staging? Tomorrow I can pull to check/test this.

Thanks
Tiejun

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] PV-vNUMA issue: topology is misinterpreted by the guest

2015-07-22 Thread Boris Ostrovsky

On 07/22/2015 10:50 AM, Dario Faggioli wrote:

On Wed, 2015-07-22 at 16:09 +0200, Juergen Gross wrote:

On 07/22/2015 03:58 PM, Boris Ostrovsky wrote:

What if I configure a guest to follow HW topology? I.e. I pin VCPUs to
appropriate cores/threads? With elfnote I am stuck with disabled topology.

Add an option to do exactly that: follow HW topology (pin vcpus,
configure vnuma)?


I thought about configuring things in such a way that they match the
host topology, as Boris is suggesting, too. And in that case, I think
arranging for doing so in toolstack, if PV vNUMA is identified (as I
think Juergen is suggesting) seems a good approach.

However, when I try to do that on my box, manually, but I don't seem to
be able to.

Here's what I tried. Since I have this host topology:
cpu_topology   :
cpu:coresocket node
   0:   010
   1:   010
   2:   110
   3:   110
   4:   910
   5:   910
   6:  1010
   7:  1010
   8:   001
   9:   001
  10:   101
  11:   101
  12:   901
  13:   901
  14:  1001
  15:  1001

I configured the guest like this:
vcpus   = '4'
memory  = '1024'
vnuma = [ [ pnode=0,size=512,vcpus=0-1,vdistances=10,20  ],
   [ pnode=1,size=512,vcpus=2-3,vdistances=20,10  ] ]
cpus=[0,1,8,9]

This means vcpus 0 and 1, which are assigned to vnode 0, are pinned to
pcpu 0 and 1, which are siblings, per the host topology.
Similarly, vcpus 2 and 3, assigned to vnode 1, are assigned to two
siblings pcpus on pnode 1.

This seems to be honoured:
# xl vcpu-list 4
NameID  VCPU   CPU State   Time(s) Affinity 
(Hard / Soft)
test 4 00   -b-  10.9  0 / 0-7
test 4 11   -b-   7.6  1 / 0-7
test 4 28   -b-   0.1  8 / 8-15
test 4 39   -b-   0.1  9 / 8-15

And yet, no joy:
# ssh root@192.168.1.101 yes  /dev/null 21 
# ssh root@192.168.1.101 yes  /dev/null 21 
# ssh root@192.168.1.101 yes  /dev/null 21 
# ssh root@192.168.1.101 yes  /dev/null 21 
# xl vcpu-list 4
NameID  VCPU   CPU State   Time(s) Affinity 
(Hard / Soft)
test 4 00   r--  16.4  0 / 0-7
test 4 11   r--  12.5  1 / 0-7
test 4 28   -b-   0.2  8 / 8-15
test 4 39   -b-   0.1  9 / 8-15

So, what am I doing wrong at following the hw topology?


Besides, this is not necessarily a NUMA-only issue, it's a scheduling
one (inside the guest) as well.

Sure. That's what Jan said regarding SUSE's xen-kernel. No toplogy info
(or a trivial one) might be better than the wrong one...


Yep. Exacty. As Boris says, this is a generic scheduling issue, although
it's tru that it's only (as far as I can tell) with vNUMA that it bite
us so hard...


I am not sure that it's only vNUMA. It's just that with vNUMA we can see 
a warning (on your system) that something goes wrong. In other cases 
(like scheduling, or sizing objects based on discovered cache sizes) we 
don't see anything in the log but system/programs are making wrong 
decisions. (And your results above may well be the example of that)


-boris



I mean, performance are always going to be inconsistent,
but it's only in that case that you basically _loose_ some of the
vcpus! :-O

Dario



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 08/16] tools/libxc: Expose new hypercall xc_reserved_device_memory_map

2015-07-22 Thread Ian Jackson
From: Tiejun Chen tiejun.c...@intel.com

We will introduce the hypercall xc_reserved_device_memory_map
approach to libxc. This helps us get rdm entry info according to
different parameters. If flag == PCI_DEV_RDM_ALL, all entries
should be exposed. Or we just expose that rdm entry specific to
a SBDF.

CC: Ian Jackson ian.jack...@eu.citrix.com
CC: Stefano Stabellini stefano.stabell...@eu.citrix.com
CC: Ian Campbell ian.campb...@citrix.com
CC: Wei Liu wei.l...@citrix.com
Signed-off-by: Tiejun Chen tiejun.c...@intel.com
Reviewed-by: Kevin Tian kevin.t...@intel.com
Signed-off-by: Ian Jackson ian.jack...@eu.citrix.com

---
v13: Mechanical changes to deal with changes to patch 01/
 XENMEM_reserved_device_memory_map.
---
 tools/libxc/include/xenctrl.h |8 
 tools/libxc/xc_domain.c   |   36 
 2 files changed, 44 insertions(+)

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index ce9029c..9c5ef8b 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -1314,6 +1314,14 @@ int xc_domain_set_memory_map(xc_interface *xch,
 int xc_get_machine_memory_map(xc_interface *xch,
   struct e820entry entries[],
   uint32_t max_entries);
+
+int xc_reserved_device_memory_map(xc_interface *xch,
+  uint32_t flags,
+  uint16_t seg,
+  uint8_t bus,
+  uint8_t devfn,
+  struct xen_reserved_device_memory entries[],
+  uint32_t *max_entries);
 #endif
 int xc_domain_set_time_offset(xc_interface *xch,
   uint32_t domid,
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index 6db8d13..3151103 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -684,6 +684,42 @@ int xc_domain_set_memory_map(xc_interface *xch,
 
 return rc;
 }
+
+int xc_reserved_device_memory_map(xc_interface *xch,
+  uint32_t flags,
+  uint16_t seg,
+  uint8_t bus,
+  uint8_t devfn,
+  struct xen_reserved_device_memory entries[],
+  uint32_t *max_entries)
+{
+int rc;
+struct xen_reserved_device_memory_map xrdmmap = {
+.flags = flags,
+.dev.pci.seg = seg,
+.dev.pci.bus = bus,
+.dev.pci.devfn = devfn,
+.nr_entries = *max_entries
+};
+DECLARE_HYPERCALL_BOUNCE(entries,
+ sizeof(struct xen_reserved_device_memory) *
+ *max_entries, XC_HYPERCALL_BUFFER_BOUNCE_OUT);
+
+if ( xc_hypercall_bounce_pre(xch, entries) )
+return -1;
+
+set_xen_guest_handle(xrdmmap.buffer, entries);
+
+rc = do_memory_op(xch, XENMEM_reserved_device_memory_map,
+  xrdmmap, sizeof(xrdmmap));
+
+xc_hypercall_bounce_post(xch, entries);
+
+*max_entries = xrdmmap.nr_entries;
+
+return rc;
+}
+
 int xc_get_machine_memory_map(xc_interface *xch,
   struct e820entry entries[],
   uint32_t max_entries)
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 06/16] hvmloader/pci: try to avoid placing BARs in RMRRs

2015-07-22 Thread Ian Jackson
From: George Dunlap george.dun...@eu.citrix.com

Try to avoid placing PCI BARs over RMRRs:

- If mmio_hole_size is not specified, and the existing MMIO range has
  RMRRs in it, and there is space to expand the hole in lowmem without
  moving more memory, then make the MMIO hole as large as possible.

- When placing RMRRs, find the next RMRR higher than the current base
  in the lowmem mmio hole.  If it overlaps, skip ahead of it and find
  the next one.

This certainly won't work in all cases, but it should work in a
significant number of cases.  Additionally, users should be able to
work around problems by setting mmio_hole_size larger in the guest
config.

Signed-off-by: George Dunlap george.dun...@eu.citrix.com
Signed-off-by: Tiejun Chen tiejun.c...@intel.com
Reviewed-by: Jan Beulich jbeul...@suse.com
Release-acked-by: Wei Liu wei.l...@citrix.com
---
 tools/firmware/hvmloader/pci.c |   64 
 1 file changed, 64 insertions(+)

diff --git a/tools/firmware/hvmloader/pci.c b/tools/firmware/hvmloader/pci.c
index 5ff87a7..ff81fba 100644
--- a/tools/firmware/hvmloader/pci.c
+++ b/tools/firmware/hvmloader/pci.c
@@ -38,6 +38,45 @@ uint64_t pci_hi_mem_start = 0, pci_hi_mem_end = 0;
 enum virtual_vga virtual_vga = VGA_none;
 unsigned long igd_opregion_pgbase = 0;
 
+/* Check if the specified range conflicts with any reserved device memory. */
+static bool check_overlap_all(uint64_t start, uint64_t size)
+{
+unsigned int i;
+
+for ( i = 0; i  memory_map.nr_map; i++ )
+{
+if ( memory_map.map[i].type == E820_RESERVED 
+ check_overlap(start, size,
+   memory_map.map[i].addr,
+   memory_map.map[i].size) )
+return true;
+}
+
+return false;
+}
+
+/* Find the lowest RMRR ending above base but below 4G. */
+static int find_next_rmrr(uint32_t base)
+{
+unsigned int i;
+int next_rmrr = -1;
+uint64_t end, min_end = 1ULL  32;
+
+for ( i = 0; i  memory_map.nr_map ; i++ )
+{
+end = memory_map.map[i].addr + memory_map.map[i].size;
+
+if ( memory_map.map[i].type == E820_RESERVED 
+ end  base  end = min_end )
+{
+next_rmrr = i;
+min_end = end;
+}
+}
+
+return next_rmrr;
+}
+
 void pci_setup(void)
 {
 uint8_t is_64bar, using_64bar, bar64_relocate = 0;
@@ -46,6 +85,7 @@ void pci_setup(void)
 uint32_t vga_devfn = 256;
 uint16_t class, vendor_id, device_id;
 unsigned int bar, pin, link, isa_irq;
+int next_rmrr;
 
 /* Resources assignable to PCI devices via BARs. */
 struct resource {
@@ -299,6 +339,15 @@ void pci_setup(void)
 || (((pci_mem_start  1)  PAGE_SHIFT)
 = hvm_info-low_mem_pgend)) )
 pci_mem_start = 1;
+
+/*
+ * Try to accommodate RMRRs in our MMIO region on a best-effort basis.
+ * If we have RMRRs in the range, then make pci_mem_start just after
+ * hvm_info-low_mem_pgend.
+ */
+if ( pci_mem_start  (hvm_info-low_mem_pgend  PAGE_SHIFT) 
+ check_overlap_all(pci_mem_start, pci_mem_end-pci_mem_start) )
+pci_mem_start = hvm_info-low_mem_pgend  PAGE_SHIFT;
 }
 
 if ( mmio_total  (pci_mem_end - pci_mem_start) )
@@ -352,6 +401,8 @@ void pci_setup(void)
 io_resource.base = 0xc000;
 io_resource.max = 0x1;
 
+next_rmrr = find_next_rmrr(pci_mem_start);
+
 /* Assign iomem and ioport resources in descending order of size. */
 for ( i = 0; i  nr_bars; i++ )
 {
@@ -407,6 +458,19 @@ void pci_setup(void)
 }
 
 base = (resource-base  + bar_sz - 1)  ~(uint64_t)(bar_sz - 1);
+
+/* If we're using mem_resource, check for RMRR conflicts. */
+while ( resource == mem_resource 
+next_rmrr = 0 
+check_overlap(base, bar_sz,
+  memory_map.map[next_rmrr].addr,
+  memory_map.map[next_rmrr].size) )
+{
+base = memory_map.map[next_rmrr].addr + 
memory_map.map[next_rmrr].size;
+base = (base + bar_sz - 1)  ~(bar_sz - 1);
+next_rmrr = find_next_rmrr(base);
+}
+
 bar_data |= (uint32_t)base;
 bar_data_upper = (uint32_t)(base  32);
 base += bar_sz;
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 05/16] hvmloader: get guest memory map into memory_map[]

2015-07-22 Thread Ian Jackson
From: Tiejun Chen tiejun.c...@intel.com

Now we get this map layout by call XENMEM_memory_map then
save them into one global variable memory_map[]. It should
include lowmem range, rdm range and highmem range. Note
rdm range and highmem range may not exist in some cases.

And here we need to check if any reserved memory conflicts with
[RESERVED_MEMORY_DYNAMIC_START, RESERVED_MEMORY_DYNAMIC_END).
This range is used to allocate memory in hvmloder level, and
we would lead hvmloader failed in case of conflict since its
another rare possibility in real world.

CC: Keir Fraser k...@xen.org
CC: Jan Beulich jbeul...@suse.com
CC: Andrew Cooper andrew.coop...@citrix.com
CC: Ian Jackson ian.jack...@eu.citrix.com
CC: Stefano Stabellini stefano.stabell...@eu.citrix.com
CC: Ian Campbell ian.campb...@citrix.com
CC: Wei Liu wei.l...@citrix.com
Signed-off-by: Tiejun Chen tiejun.c...@intel.com
Reviewed-by: Kevin Tian kevin.t...@intel.com
Reviewed-by: George Dunlap george.dun...@eu.citrix.com
Acked-by: Jan Beulich jbeul...@suse.com
---
 tools/firmware/hvmloader/e820.c  |   32 
 tools/firmware/hvmloader/e820.h  |7 +++
 tools/firmware/hvmloader/hvmloader.c |2 ++
 tools/firmware/hvmloader/util.c  |   26 ++
 tools/firmware/hvmloader/util.h  |   12 
 5 files changed, 79 insertions(+)

diff --git a/tools/firmware/hvmloader/e820.c b/tools/firmware/hvmloader/e820.c
index 2e05e93..7a414ab 100644
--- a/tools/firmware/hvmloader/e820.c
+++ b/tools/firmware/hvmloader/e820.c
@@ -23,6 +23,38 @@
 #include config.h
 #include util.h
 
+struct e820map memory_map;
+
+void memory_map_setup(void)
+{
+unsigned int nr_entries = E820MAX, i;
+int rc;
+uint64_t alloc_addr = RESERVED_MEMORY_DYNAMIC_START;
+uint64_t alloc_size = RESERVED_MEMORY_DYNAMIC_END - alloc_addr;
+
+rc = get_mem_mapping_layout(memory_map.map, nr_entries);
+
+if ( rc || !nr_entries )
+{
+printf(Get guest memory maps[%d] failed. (%d)\n, nr_entries, rc);
+BUG();
+}
+
+memory_map.nr_map = nr_entries;
+
+for ( i = 0; i  nr_entries; i++ )
+{
+if ( memory_map.map[i].type == E820_RESERVED 
+ check_overlap(alloc_addr, alloc_size,
+   memory_map.map[i].addr, memory_map.map[i].size) )
+{
+printf(Fail to setup memory map due to conflict);
+printf( on dynamic reserved memory range.\n);
+BUG();
+}
+}
+}
+
 void dump_e820_table(struct e820entry *e820, unsigned int nr)
 {
 uint64_t last_end = 0, start, end;
diff --git a/tools/firmware/hvmloader/e820.h b/tools/firmware/hvmloader/e820.h
index b2ead7f..8b5a9e0 100644
--- a/tools/firmware/hvmloader/e820.h
+++ b/tools/firmware/hvmloader/e820.h
@@ -15,6 +15,13 @@ struct e820entry {
 uint32_t type;
 } __attribute__((packed));
 
+#define E820MAX128
+
+struct e820map {
+unsigned int nr_map;
+struct e820entry map[E820MAX];
+};
+
 #endif /* __HVMLOADER_E820_H__ */
 
 /*
diff --git a/tools/firmware/hvmloader/hvmloader.c 
b/tools/firmware/hvmloader/hvmloader.c
index 25b7f08..84c588c 100644
--- a/tools/firmware/hvmloader/hvmloader.c
+++ b/tools/firmware/hvmloader/hvmloader.c
@@ -262,6 +262,8 @@ int main(void)
 
 init_hypercalls();
 
+memory_map_setup();
+
 xenbus_setup();
 
 bios = detect_bios();
diff --git a/tools/firmware/hvmloader/util.c b/tools/firmware/hvmloader/util.c
index 80d822f..122e3fa 100644
--- a/tools/firmware/hvmloader/util.c
+++ b/tools/firmware/hvmloader/util.c
@@ -27,6 +27,17 @@
 #include xen/memory.h
 #include xen/sched.h
 
+/*
+ * Check whether there exists overlap in the specified memory range.
+ * Returns true if exists, else returns false.
+ */
+bool check_overlap(uint64_t start, uint64_t size,
+   uint64_t reserved_start, uint64_t reserved_size)
+{
+return (start + size  reserved_start) 
+(start  reserved_start + reserved_size);
+}
+
 void wrmsr(uint32_t idx, uint64_t v)
 {
 asm volatile (
@@ -368,6 +379,21 @@ uuid_to_string(char *dest, uint8_t *uuid)
 *p = '\0';
 }
 
+int get_mem_mapping_layout(struct e820entry entries[], uint32_t *max_entries)
+{
+int rc;
+struct xen_memory_map memmap = {
+.nr_entries = *max_entries
+};
+
+set_xen_guest_handle(memmap.buffer, entries);
+
+rc = hypercall_memory_op(XENMEM_memory_map, memmap);
+*max_entries = memmap.nr_entries;
+
+return rc;
+}
+
 void mem_hole_populate_ram(xen_pfn_t mfn, uint32_t nr_mfns)
 {
 static int over_allocated;
diff --git a/tools/firmware/hvmloader/util.h b/tools/firmware/hvmloader/util.h
index f99c0f19..1100a3b 100644
--- a/tools/firmware/hvmloader/util.h
+++ b/tools/firmware/hvmloader/util.h
@@ -4,8 +4,10 @@
 #include stdarg.h
 #include stdint.h
 #include stddef.h
+#include stdbool.h
 #include xen/xen.h
 #include xen/hvm/hvm_info_table.h
+#include e820.h
 
 #define __STR(...) #__VA_ARGS__
 #define STR(...) 

[Xen-devel] [PATCH 15/16] xen/vtd: prevent from assign the device with shared rmrr

2015-07-22 Thread Ian Jackson
From: Tiejun Chen tiejun.c...@intel.com

Currently we're intending to cover this kind of devices
with shared RMRR simply since the case of shared RMRR is
a rare case according to our previous experiences. But
late we can group these devices which shared rmrr, and
then allow all devices within a group to be assigned to
same domain.

CC: Yang Zhang yang.z.zh...@intel.com
CC: Kevin Tian kevin.t...@intel.com
Signed-off-by: Tiejun Chen tiejun.c...@intel.com
Acked-by: Kevin Tian kevin.t...@intel.com
---
 xen/drivers/passthrough/vtd/iommu.c |   30 +++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/xen/drivers/passthrough/vtd/iommu.c 
b/xen/drivers/passthrough/vtd/iommu.c
index 8a8d763..ce5c295 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -2293,13 +2293,37 @@ static int intel_iommu_assign_device(
 if ( list_empty(acpi_drhd_units) )
 return -ENODEV;
 
+seg = pdev-seg;
+bus = pdev-bus;
+/*
+ * In rare cases one given rmrr is shared by multiple devices but
+ * obviously this would put the security of a system at risk. So
+ * we should prevent from this sort of device assignment.
+ *
+ * TODO: in the future we can introduce group device assignment
+ * interface to make sure devices sharing RMRR are assigned to the
+ * same domain together.
+ */
+for_each_rmrr_device( rmrr, bdf, i )
+{
+if ( rmrr-segment == seg 
+ PCI_BUS(bdf) == bus 
+ PCI_DEVFN2(bdf) == devfn 
+ rmrr-scope.devices_cnt  1 )
+{
+printk(XENLOG_G_ERR VTDPREFIX
+cannot assign %04x:%02x:%02x.%u
+with shared RMRR at %PRIx64 for Dom%d.\n,
+   seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+   rmrr-base_address, d-domain_id);
+return -EPERM;
+}
+}
+
 ret = reassign_device_ownership(hardware_domain, d, devfn, pdev);
 if ( ret )
 return ret;
 
-seg = pdev-seg;
-bus = pdev-bus;
-
 /* Setup rmrr identity mapping */
 for_each_rmrr_device( rmrr, bdf, i )
 {
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 11/16] tools/libxl: detect and avoid conflicts with RDM

2015-07-22 Thread Ian Jackson
From: Tiejun Chen tiejun.c...@intel.com

While building a VM, HVM domain builder provides struct hvm_info_table{}
to help hvmloader. Currently it includes two fields to construct guest
e820 table by hvmloader, low_mem_pgend and high_mem_pgend. So we should
check them to fix any conflict with RDM.

RMRR can reside in address space beyond 4G theoretically, but we never
see this in real world. So in order to avoid breaking highmem layout
we don't solve highmem conflict. Note this means highmem rmrr could still
be supported if no conflict.

But in the case of lowmem, RMRR probably scatter the whole RAM space.
Especially multiple RMRR entries would worsen this to lead a complicated
memory layout. And then its hard to extend hvm_info_table{} to work
hvmloader out. So here we're trying to figure out a simple solution to
avoid breaking existing layout. So when a conflict occurs,

#1. Above a predefined boundary (2G)
- move lowmem_end below reserved region to solve conflict;

#2. Below a predefined boundary (2G)
- Check strict/relaxed policy.
strict policy leads to fail libxl. Note when both policies
are specified on a given region, 'strict' is always preferred.
relaxed policy issue a warning message and also mask this entry 
INVALID
to indicate we shouldn't expose this entry to hvmloader.

Note later we need to provide a parameter to set that predefined boundary
dynamically.

CC: Ian Jackson ian.jack...@eu.citrix.com
CC: Stefano Stabellini stefano.stabell...@eu.citrix.com
CC: Ian Campbell ian.campb...@citrix.com
CC: Wei Liu wei.l...@citrix.com
Signed-off-by: Tiejun Chen tiejun.c...@intel.com
Reviewed-by: Kevin Tian kevin.t...@intel.com
---
v13: Mechanical changes to deal with changes to patch 01/
 XENMEM_reserved_device_memory_map.
---
 tools/libxl/libxl_create.c   |2 +-
 tools/libxl/libxl_dm.c   |  274 ++
 tools/libxl/libxl_dom.c  |   17 ++-
 tools/libxl/libxl_internal.h |   14 ++-
 tools/libxl/libxl_types.idl  |7 ++
 5 files changed, 311 insertions(+), 3 deletions(-)

diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 7c884c4..5b57062 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -407,7 +407,7 @@ int libxl__domain_build(libxl__gc *gc,
 
 switch (info-type) {
 case LIBXL_DOMAIN_TYPE_HVM:
-ret = libxl__build_hvm(gc, domid, info, state);
+ret = libxl__build_hvm(gc, domid, d_config, state);
 if (ret)
 goto out;
 
diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
index 634b8d2..40b2bba 100644
--- a/tools/libxl/libxl_dm.c
+++ b/tools/libxl/libxl_dm.c
@@ -92,6 +92,280 @@ const char *libxl__domain_device_model(libxl__gc *gc,
 return dm;
 }
 
+static int
+libxl__xc_device_get_rdm(libxl__gc *gc,
+ uint32_t flag,
+ uint16_t seg,
+ uint8_t bus,
+ uint8_t devfn,
+ unsigned int *nr_entries,
+ struct xen_reserved_device_memory **xrdm)
+{
+int rc = 0, r;
+
+/*
+ * We really can't presume how many entries we can get in advance.
+ */
+*nr_entries = 0;
+r = xc_reserved_device_memory_map(CTX-xch, flag, seg, bus, devfn,
+  NULL, nr_entries);
+assert(r = 0);
+/* 0 means we have no any rdm entry. */
+if (!r) goto out;
+
+if (errno != ENOBUFS) {
+rc = ERROR_FAIL;
+goto out;
+}
+
+GCNEW_ARRAY(*xrdm, *nr_entries);
+r = xc_reserved_device_memory_map(CTX-xch, flag, seg, bus, devfn,
+  *xrdm, nr_entries);
+if (r)
+rc = ERROR_FAIL;
+
+ out:
+if (rc) {
+*nr_entries = 0;
+*xrdm = NULL;
+LOG(ERROR, Could not get reserved device memory maps.\n);
+}
+return rc;
+}
+
+/*
+ * Check whether there exists rdm hole in the specified memory range.
+ * Returns true if exists, else returns false.
+ */
+static bool overlaps_rdm(uint64_t start, uint64_t memsize,
+ uint64_t rdm_start, uint64_t rdm_size)
+{
+return (start + memsize  rdm_start)  (start  rdm_start + rdm_size);
+}
+
+static void
+add_rdm_entry(libxl__gc *gc, libxl_domain_config *d_config,
+  uint64_t rdm_start, uint64_t rdm_size, int rdm_policy)
+{
+d_config-rdms = libxl__realloc(NOGC, d_config-rdms,
+(d_config-num_rdms+1) * sizeof(libxl_device_rdm));
+
+d_config-rdms[d_config-num_rdms].start = rdm_start;
+d_config-rdms[d_config-num_rdms].size = rdm_size;
+d_config-rdms[d_config-num_rdms].policy = rdm_policy;
+d_config-num_rdms++;
+}
+
+/*
+ * Check reported RDM regions and handle potential gfn conflicts according
+ * to user preferred policy.
+ *
+ * RDM can reside in address space beyond 4G theoretically, but we never
+ * see this in real world. So in order to 

[Xen-devel] [PATCH 10/16] tools: introduce some new parameters to set rdm policy

2015-07-22 Thread Ian Jackson
From: Tiejun Chen tiejun.c...@intel.com

This patch introduces user configurable parameters to specify RDM
resource and according policies,

Global RDM parameter:
rdm = strategy=host,policy=strict/relaxed
Per-device RDM parameter:
pci = [ 'sbdf, rdm_policy=strict/relaxed' ]

Global RDM parameter, strategy, allows user to specify reserved regions
explicitly, Currently, using 'host' to include all reserved regions reported
on this platform which is good to handle hotplug scenario. In the future
this parameter may be further extended to allow specifying random regions,
e.g. even those belonging to another platform as a preparation for live
migration with passthrough devices. By default this isn't set so we don't
check all rdms. Instead, we just check rdm specific to a given device if
you're assigning this kind of device. Note this option is not recommended
unless you can make sure any conflict does exist.

'strict/relaxed' policy decides how to handle conflict when reserving RDM
regions in pfn space. If conflict exists, 'strict' means an immediate error
so VM can't keep running, while 'relaxed' allows moving forward with a
warning message thrown out.

Default per-device RDM policy is same as default global RDM policy as being
'relaxed'. And the per-device policy would override the global policy like
others.

CC: Ian Jackson ian.jack...@eu.citrix.com
CC: Stefano Stabellini stefano.stabell...@eu.citrix.com
CC: Ian Campbell ian.campb...@citrix.com
CC: Wei Liu wei.l...@citrix.com
Signed-off-by: Tiejun Chen tiejun.c...@intel.com
Acked-by: Wei Liu wei.l...@citrix.com
Acked-by: Ian Jackson ian.jack...@eu.citrix.com
Checked-by: Ian Jackson ian.jack...@eu.citrix.com
---
 docs/man/xl.cfg.pod.5|   81 ++
 docs/misc/vtd.txt|   24 +
 tools/libxl/libxl_create.c   |7 
 tools/libxl/libxl_internal.h |2 ++
 tools/libxl/libxl_pci.c  |9 +
 tools/libxl/libxl_types.idl  |   18 ++
 6 files changed, 141 insertions(+)

diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
index 382f30b..e6e0f70 100644
--- a/docs/man/xl.cfg.pod.5
+++ b/docs/man/xl.cfg.pod.5
@@ -633,6 +633,79 @@ assigned slave device.
 
 =back
 
+=item Brdm=RDM_RESERVATION_STRING
+
+(HVM/x86 only) Specifies information about Reserved Device Memory (RDM),
+which is necessary to enable robust device passthrough. One example of RDM
+is reported through ACPI Reserved Memory Region Reporting (RMRR) structure
+on x86 platform.
+
+BRDM_RESERVE_STRING has the form C[KEY=VALUE,KEY=VALUE,... where:
+
+=over 4
+
+=item BKEY=VALUE
+
+Possible BKEYs are:
+
+=over 4
+
+=item Bstrategy=STRING
+
+Currently there is only one valid type:
+
+host means all reserved device memory on this platform should be checked to
+reserve regions in this VM's guest address space. This global rdm parameter
+allows user to specify reserved regions explicitly, and using host includes
+all reserved regions reported on this platform, which is useful when doing
+hotplug.
+
+By default this isn't set so we don't check all rdms. Instead, we just check
+rdm specific to a given device if you're assigning this kind of device. Note
+this option is not recommended unless you can make sure any conflict does 
exist.
+
+For example, you're trying to set memory = 2800 to allocate memory to one
+given VM but the platform owns two RDM regions like,
+
+Device A [sbdf_A]: RMRR region_A: base_addr ac6d3000 end_address ac6e6fff
+Device B [sbdf_B]: RMRR region_B: base_addr ad80 end_address afff
+
+In this conflict case,
+
+#1. If Bstrategy is set to host, for example,
+
+rdm = strategy=host,policy=strict or rdm = strategy=host,policy=relaxed
+
+It means all conflicts will be handled according to the policy
+introduced by Bpolicy as described below.
+
+#2. If Bstrategy is not set at all, but
+
+pci = [ 'sbdf_A, rdm_policy=x' ]
+
+It means only one conflict of region_A will be handled according to the policy
+introduced by Brdm_policy=STRING as described inside pci options.
+
+=item Bpolicy=STRING
+
+Specifies how to deal with conflicts when reserving reserved device
+memory in guest address space.
+
+When that conflict is unsolved,
+
+strict means VM can't be created, or the associated device can't be
+attached in the case of hotplug.
+
+relaxed allows VM to be created but may cause VM to crash if
+pass-through device accesses RDM. For exampl,e Windows IGD GFX driver
+always accessed RDM regions so it leads to VM crash.
+
+Note this may be overridden by rdm_policy option in PCI device configuration.
+
+=back
+
+=back
+
 =item Bpci=[ PCI_SPEC_STRING, PCI_SPEC_STRING, ... ]
 
 Specifies the host PCI devices to passthrough to this guest. Each 
BPCI_SPEC_STRING
@@ -695,6 +768,14 @@ dom0 without confirmation.  Please use with care.
 D0-D3hot power management states for the PCI device. False (0) by
 default.
 
+=item Brdm_policy=STRING
+
+(HVM/x86 only) This is same as policy option inside the rdm option 

[Xen-devel] [PATCH v13 00/16] Fix RMRR (avoid RDM)

2015-07-22 Thread Ian Jackson
This is about v13 of the RDM series.  Compared to v11, I have:
 - imported it into git (dropping change notices for all previous version)
 - brought in new versions of 01 (aka v12a) and 06 from Jan
 - made adjustments as needed by the new 01 from Jan
 - double-checked the acks etc., dropping unjustified acks

git branch here:
  http://xenbits.xen.org/gitweb/?p=people/iwj/xen.git;a=summary
  git://xenbits.xen.org/people/iwj/xen.git
  base.rdm-v13..wip.rdm-v13

*a   01   introduce XENMEM_reserved_device_memory_map
 a   02   xen/vtd: create RMRR mapping
 a   03   xen/passthrough: extend hypercall to support rdm reservation policy
 a   04   xen: enable XENMEM_memory_map in hvm
 a   05   hvmloader: get guest memory map into memory_map[]
*a   06   hvmloader/pci: try to avoid placing BARs in RMRRs
 a   07   hvmloader/e820: construct guest e820 table
* T  08   tools/libxc: Expose new hypercall xc_reserved_device_memory_map
 a   09   tools: extend xc_assign_device() to support rdm reservation policy
 a   10   tools: introduce some new parameters to set rdm policy
* T  11   tools/libxl: detect and avoid conflicts with RDM
 a   12   tools: introduce a new parameter to set a predefined rdm boundary
 a   13   libxl: construct e820 map with RDM information for HVM guest
 a   14   xen/vtd: enable USB device assignment
 a   15   xen/vtd: prevent from assign the device with shared rmrr
 a   16   tools: parse to enable new rdm policy parameters

* = changed in v13, since v11
a = acked enough to commit
T = tools maintainer ack required

Wei, can you please give your formal ack for the two changed patches ?

Tiejun, can you please test this series ?  Hopefully if you say it
still works after this, we can commit it tomorrow.

Thanks,
Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 07/16] hvmloader/e820: construct guest e820 table

2015-07-22 Thread Ian Jackson
From: Tiejun Chen tiejun.c...@intel.com

Now use the hypervisor-supplied memory map to build our final e820 table:
* Add regions for BIOS ranges and other special mappings not in the
  hypervisor map
* Add in the hypervisor supplied regions
* Adjust the lowmem and highmem regions if we've had to relocate
  memory (adding a highmem region if necessary)
* Sort all the ranges so that they appear in memory order.

CC: Keir Fraser k...@xen.org
CC: Jan Beulich jbeul...@suse.com
CC: Andrew Cooper andrew.coop...@citrix.com
CC: Ian Jackson ian.jack...@eu.citrix.com
CC: Stefano Stabellini stefano.stabell...@eu.citrix.com
CC: Ian Campbell ian.campb...@citrix.com
CC: Wei Liu wei.l...@citrix.com
Reviewed-by: George Dunlap george.dun...@eu.citrix.com
Reviewed-by: Jan Beulich jbeul...@suse.com
Signed-off-by: Tiejun Chen tiejun.c...@intel.com
---
 tools/firmware/hvmloader/e820.c |  109 ++-
 1 file changed, 96 insertions(+), 13 deletions(-)

diff --git a/tools/firmware/hvmloader/e820.c b/tools/firmware/hvmloader/e820.c
index 7a414ab..a6cacdf 100644
--- a/tools/firmware/hvmloader/e820.c
+++ b/tools/firmware/hvmloader/e820.c
@@ -105,7 +105,11 @@ int build_e820_table(struct e820entry *e820,
  unsigned int lowmem_reserved_base,
  unsigned int bios_image_base)
 {
-unsigned int nr = 0;
+unsigned int nr = 0, i, j;
+uint32_t low_mem_end = hvm_info-low_mem_pgend  PAGE_SHIFT;
+uint32_t add_high_mem = 0;
+uint64_t high_mem_end = (uint64_t)hvm_info-high_mem_pgend  PAGE_SHIFT;
+uint64_t map_start, map_size, map_end;
 
 if ( !lowmem_reserved_base )
 lowmem_reserved_base = 0xA;
@@ -149,13 +153,6 @@ int build_e820_table(struct e820entry *e820,
 e820[nr].type = E820_RESERVED;
 nr++;
 
-/* Low RAM goes here. Reserve space for special pages. */
-BUG_ON((hvm_info-low_mem_pgend  PAGE_SHIFT)  (2u  20));
-e820[nr].addr = 0x10;
-e820[nr].size = (hvm_info-low_mem_pgend  PAGE_SHIFT) - e820[nr].addr;
-e820[nr].type = E820_RAM;
-nr++;
-
 /*
  * Explicitly reserve space for special pages.
  * This space starts at RESERVED_MEMBASE an extends to cover various
@@ -191,16 +188,102 @@ int build_e820_table(struct e820entry *e820,
 nr++;
 }
 
+/* Low RAM goes here. Reserve space for special pages. */
+BUG_ON(low_mem_end  (2u  20));
 
-if ( hvm_info-high_mem_pgend )
+/*
+ * Construct E820 table according to recorded memory map.
+ *
+ * The memory map created by toolstack may include,
+ *
+ * #1. Low memory region
+ *
+ * Low RAM starts at least from 1M to make sure all standard regions
+ * of the PC memory map, like BIOS, VGA memory-mapped I/O and vgabios,
+ * have enough space.
+ *
+ * #2. Reserved regions if they exist
+ *
+ * #3. High memory region if it exists
+ *
+ * Note we just have one low memory entry and one high mmeory entry if
+ * exists.
+ *
+ * But we may have relocated RAM to allocate sufficient MMIO previously
+ * so low_mem_pgend would be changed over there. And here memory_map[]
+ * records the original low/high memory, so if low_mem_end is less than
+ * the original we need to revise low/high memory range firstly.
+ */
+for ( i = 0; i  memory_map.nr_map; i++ )
 {
-e820[nr].addr = ((uint64_t)1  32);
-e820[nr].size =
-((uint64_t)hvm_info-high_mem_pgend  PAGE_SHIFT) - e820[nr].addr;
-e820[nr].type = E820_RAM;
+map_start = memory_map.map[i].addr;
+map_size = memory_map.map[i].size;
+map_end = map_start + map_size;
+
+/* If we need to adjust lowmem. */
+if ( memory_map.map[i].type == E820_RAM 
+ low_mem_end  map_start  low_mem_end  map_end )
+{
+add_high_mem = map_end - low_mem_end;
+memory_map.map[i].size = low_mem_end - map_start;
+break;
+}
+}
+
+/* If we need to adjust highmem. */
+if ( add_high_mem )
+{
+/* Modify the existing highmem region if it exists. */
+for ( i = 0; i  memory_map.nr_map; i++ )
+{
+map_start = memory_map.map[i].addr;
+map_size = memory_map.map[i].size;
+map_end = map_start + map_size;
+
+if ( memory_map.map[i].type == E820_RAM 
+ map_start == ((uint64_t)1  32))
+{
+memory_map.map[i].size += add_high_mem;
+break;
+}
+}
+
+/* If there was no highmem region, just create one. */
+if ( i == memory_map.nr_map )
+{
+memory_map.map[i].addr = ((uint64_t)1  32);
+memory_map.map[i].size = add_high_mem;
+memory_map.map[i].type = E820_RAM;
+memory_map.nr_map++;
+}
+
+/* A sanity check if high memory is broken. */
+BUG_ON( high_mem_end !=
+  

[Xen-devel] [PATCH 01/16] introduce XENMEM_reserved_device_memory_map

2015-07-22 Thread Ian Jackson
From: Jan Beulich jbeul...@suse.com

This is a prerequisite for punching holes into HVM and PVH guests' P2M
to allow passing through devices that are associated with (on VT-d)
RMRRs.

Signed-off-by: Jan Beulich jbeul...@suse.com
Signed-off-by: Tiejun Chen tiejun.c...@intel.com
Acked-by: Kevin Tian kevin.t...@intel.com
Acked-by: Ian Campbell ian.campb...@citrix.com
---
v12a: Move interface structure union member to the end, while moving
 the whole public header block into a __XEN__ / __XEN_TOOLS__
 conditional block.
v12: Restore changes as much as possible to my original version, fixing
 a few issues that got introduced after handing it over. Unionize
 new public memop interface structure to allow for non-PCI to be
 supported later on. Check flags to have all currently undefined
 flags clear. Refine adjustments to xen/pci.h.
---
 xen/common/compat/memory.c   |   65 ++
 xen/common/memory.c  |   62 
 xen/drivers/passthrough/iommu.c  |   10 ++
 xen/drivers/passthrough/vtd/dmar.c   |   27 ++
 xen/drivers/passthrough/vtd/extern.h |1 +
 xen/drivers/passthrough/vtd/iommu.c  |1 +
 xen/include/public/memory.h  |   37 ++-
 xen/include/xen/iommu.h  |   10 ++
 xen/include/xen/pci.h|4 +++
 xen/include/xlat.lst |3 +-
 10 files changed, 218 insertions(+), 2 deletions(-)

diff --git a/xen/common/compat/memory.c b/xen/common/compat/memory.c
index b258138..002948b 100644
--- a/xen/common/compat/memory.c
+++ b/xen/common/compat/memory.c
@@ -17,6 +17,42 @@ CHECK_TYPE(domid);
 CHECK_mem_access_op;
 CHECK_vmemrange;
 
+#ifdef HAS_PASSTHROUGH
+struct get_reserved_device_memory {
+struct compat_reserved_device_memory_map map;
+unsigned int used_entries;
+};
+
+static int get_reserved_device_memory(xen_pfn_t start, xen_ulong_t nr,
+  u32 id, void *ctxt)
+{
+struct get_reserved_device_memory *grdm = ctxt;
+u32 sbdf = PCI_SBDF3(grdm-map.dev.pci.seg, grdm-map.dev.pci.bus,
+ grdm-map.dev.pci.devfn);
+
+if ( !(grdm-map.flags  XENMEM_RDM_ALL)  (sbdf != id) )
+return 0;
+
+if ( grdm-used_entries  grdm-map.nr_entries )
+{
+struct compat_reserved_device_memory rdm = {
+.start_pfn = start, .nr_pages = nr
+};
+
+if ( rdm.start_pfn != start || rdm.nr_pages != nr )
+return -ERANGE;
+
+if ( __copy_to_compat_offset(grdm-map.buffer, grdm-used_entries,
+ rdm, 1) )
+return -EFAULT;
+}
+
+++grdm-used_entries;
+
+return 1;
+}
+#endif
+
 int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) compat)
 {
 int split, op = cmd  MEMOP_CMD_MASK;
@@ -303,6 +339,35 @@ int compat_memory_op(unsigned int cmd, 
XEN_GUEST_HANDLE_PARAM(void) compat)
 break;
 }
 
+#ifdef HAS_PASSTHROUGH
+case XENMEM_reserved_device_memory_map:
+{
+struct get_reserved_device_memory grdm;
+
+if ( unlikely(start_extent) )
+return -ENOSYS;
+
+if ( copy_from_guest(grdm.map, compat, 1) ||
+ !compat_handle_okay(grdm.map.buffer, grdm.map.nr_entries) )
+return -EFAULT;
+
+if ( grdm.map.flags  ~XENMEM_RDM_ALL )
+return -EINVAL;
+
+grdm.used_entries = 0;
+rc = iommu_get_reserved_device_memory(get_reserved_device_memory,
+  grdm);
+
+if ( !rc  grdm.map.nr_entries  grdm.used_entries )
+rc = -ENOBUFS;
+grdm.map.nr_entries = grdm.used_entries;
+if ( __copy_to_guest(compat, grdm.map, 1) )
+rc = -EFAULT;
+
+return rc;
+}
+#endif
+
 default:
 return compat_arch_memory_op(cmd, compat);
 }
diff --git a/xen/common/memory.c b/xen/common/memory.c
index e5d49d8..61bb94c 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -748,6 +748,39 @@ static int construct_memop_from_reservation(
 return 0;
 }
 
+#ifdef HAS_PASSTHROUGH
+struct get_reserved_device_memory {
+struct xen_reserved_device_memory_map map;
+unsigned int used_entries;
+};
+
+static int get_reserved_device_memory(xen_pfn_t start, xen_ulong_t nr,
+  u32 id, void *ctxt)
+{
+struct get_reserved_device_memory *grdm = ctxt;
+u32 sbdf = PCI_SBDF3(grdm-map.dev.pci.seg, grdm-map.dev.pci.bus,
+ grdm-map.dev.pci.devfn);
+
+if ( !(grdm-map.flags  XENMEM_RDM_ALL)  (sbdf != id) )
+return 0;
+
+if ( grdm-used_entries  grdm-map.nr_entries )
+{
+struct xen_reserved_device_memory rdm = {
+.start_pfn = start, .nr_pages = nr
+};
+
+if ( 

Re: [Xen-devel] [v11][PATCH 11/16] tools/libxl: detect and avoid conflicts with RDM

2015-07-22 Thread Jan Beulich
 On 22.07.15 at 12:04, ian.jack...@eu.citrix.com wrote:
 Tiejun Chen writes ([v11][PATCH 11/16] tools/libxl: detect and avoid 
 conflicts with RDM):
 While building a VM, HVM domain builder provides struct hvm_info_table{}
 to help hvmloader. Currently it includes two fields to construct guest
 e820 table by hvmloader, low_mem_pgend and high_mem_pgend. So we should
 check them to fix any conflict with RDM.
 
 Acked-by: Ian Jackson ian.jack...@eu.citrix.com

Did you intentionally not move this forward to the v13 you just sent?

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 11/16] tools/libxl: detect and avoid conflicts with RDM

2015-07-22 Thread Wei Liu
On Wed, Jul 22, 2015 at 04:44:14PM +0100, Ian Jackson wrote:
 From: Tiejun Chen tiejun.c...@intel.com
 
 While building a VM, HVM domain builder provides struct hvm_info_table{}
 to help hvmloader. Currently it includes two fields to construct guest
 e820 table by hvmloader, low_mem_pgend and high_mem_pgend. So we should
 check them to fix any conflict with RDM.
 
 RMRR can reside in address space beyond 4G theoretically, but we never
 see this in real world. So in order to avoid breaking highmem layout
 we don't solve highmem conflict. Note this means highmem rmrr could still
 be supported if no conflict.
 
 But in the case of lowmem, RMRR probably scatter the whole RAM space.
 Especially multiple RMRR entries would worsen this to lead a complicated
 memory layout. And then its hard to extend hvm_info_table{} to work
 hvmloader out. So here we're trying to figure out a simple solution to
 avoid breaking existing layout. So when a conflict occurs,
 
 #1. Above a predefined boundary (2G)
 - move lowmem_end below reserved region to solve conflict;
 
 #2. Below a predefined boundary (2G)
 - Check strict/relaxed policy.
 strict policy leads to fail libxl. Note when both policies
 are specified on a given region, 'strict' is always preferred.
 relaxed policy issue a warning message and also mask this entry 
 INVALID
 to indicate we shouldn't expose this entry to hvmloader.
 
 Note later we need to provide a parameter to set that predefined boundary
 dynamically.
 
 CC: Ian Jackson ian.jack...@eu.citrix.com
 CC: Stefano Stabellini stefano.stabell...@eu.citrix.com
 CC: Ian Campbell ian.campb...@citrix.com
 CC: Wei Liu wei.l...@citrix.com
 Signed-off-by: Tiejun Chen tiejun.c...@intel.com
 Reviewed-by: Kevin Tian kevin.t...@intel.com

Ian, you forgot your S-o-B.

Acked-by: Wei Liu wei.l...@citrix.com

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [MINUTES] Monthly Xen.org Technical Call (2015-07-22)

2015-07-22 Thread Ian Campbell
This was the rescheduled 8 July call, on the topic of PVH and related work

= Attendees =

 * Ian Campbell
 * David Vrabel
 * Stefano Stabellini
 * Boris Ostrovsky
 * Elena Ufimtseva
 * Wei Liu
 * George Dunlap
 * Konrad Rzeszutek Wilk
 * Jan Beulich
 * Daniel Kiper
 * Andrew Cooper 
 * Roger Pau Monne

Apologies:

 * Ian Jackson

= Current Status =

Note on naming: Eventually we will end up with a thing called PVH. At
the moment there are two options for who we get there. For the
purposes of this discussion we need to distinguish the two
approaches. So we call them PVH classic (the approach following the
path start by Mukesh) and no-dm (the approach which Roger has
proposed patches for).

== PVH (classic) ==

Boris: Most important lack is 32-bit, AMD and migration support. To
get the code to support the first two features doesn't require major
additional code but doesn't get us where we wanted to go.

George: Does it work with shadow? Answer: No.

Andy: Also doesn't work with AMD.

Andy: What is wrong with migration?
Boris: It may be a trivial fix but it doesn't work in practice
Andy: Thought Roger had fixed
Roger: Fixed migration using PVH dom0, but not of a PVH guest.

Stefano: Also missing PCI passthrough

Jan: MMIO handling for dom0 is a problem with the current code,
e.g. MSI related issues.

Boris: 32-bit patches only support domU
Roger: Not sure of the point of 32-bit dom0 PVH

Someone/several: Should be no restriction on the mode of a PVH guest.

== PVH (no-dm/HVMlite) ==

Roger:

Picked up libxc and noticed that the PV vs HVM builders are very
different and the HVM one is not very good. Currently patches move the
builder for HVM to use the same code as the PV builder code. Not too
complicated, lots of hooks in place already. Adds another type of
guest with e.g. different hooks for populating memory, special pages.

Has been able to unify the way we build the two guest types, to the
point of one function in libxl being the decision point.

Can now choose a container type and then load whatever kernel,
hvmload, PV kernel, pv grub etc etc.

Remaining work is to disable emulated devices in Xen.

Start a no-dm guest as an HVM guest, 32-bit flat, same as with HVM
today with the entry point.

= Ongoing work =

(we sort of drifted into this)

Needs a kernel with a suitable 32-bit flat entry point, which may be
Xen specific or may be possible to share with native (depends on the
guest). Other than entry point doesn't need much other than PVH.

Konrad: Another entry point without page tables might be an issue in
terms of upstreaming to Linux. Going from the Xen entry point to the
Linux generic entry point is similar to the x86/PV.

Roger: The physical entry point should use a different ELF note, could
also check for 32 vs 64 but would prefer not to, i.e. would prefer to
have the guest take care of the mode starting from a common mode.

Konrad: The issue is that it expects a bootloader stub (internal to
Linux).

David: Can we use EFI firmware (replacing hvmloader) which provides a
known entry protocol etc and boots using the UEFI interfaces.

George: Need to think about microkernels too

David: EFI just for Linux, but microkernels do not have to use it.

Roger: dom0 is a problem for UEFI

David: Just use it as another multiboot module

Jan: Using OVMF/EFI as the basis, is: Is OVMF Xen aware.

Andy: it is aware of PV drivers

Jan: But not using all of the PV infra is the bigger part.

David: PV entry point is a non-starter. If the entry point for no-dm
is not the native entry point then it is not viable and won't be
accepted. Maybe not the full UEFI firmware, but some sort of firmware.

Ian: Do we mean the 32- or 16-bit real entry point?

David: Needs to see a more concrete proposal. It is fine to have a
stub to build a set of 64-bit page tables and enter at the 64-bit
entry point.

Konrad: Would need per OS stubs.

David: Doesn't see a problem with that.

Konrad: This path wouldn't enable the pvops hooks

David: Those hooks already exists.

Roger: But where can you set those hooks?

Konrad: Much later. What happens between early boot up stage and when
we can set those hooks.

David: Something which needs to get looked at.

Andy: Could move the native entry points per-hypervisor checks
earlier.

Konrad: This means working with the x86 maintainers.

David: Doesn't see any issue with that.

Andy: Also benefits other hypervisors not just us.

Summary:

  - Want boot up path to be as no-dm (flat 32-bit, no paging), with
different per-OS stubs depending on the OS, which setup whatever
is needed for the given OS.

Roger: Where does the stub live, in Xen or in guest OS?

David: Stub could live in the guest OS. Self contained, independent of
other x86 code. Just needs to go from Xen entry point to the native
entry point.

Roger: Needs to be bundled with Linux

Stefano: Similar model to the EFI stub.

Ian: Need some way to find the Xen entry point.

Konrad: Who is going to write that code.

Boris: Was 

Re: [Xen-devel] [PATCH v5 7/7] x86/PCI: intercept all PV Dom0 MMCFG writes

2015-07-22 Thread Andrew Cooper
On 20/07/15 15:23, Jan Beulich wrote:
 ... to hook up pci_conf_write_intercept() even for Dom0 not using
 method 1 accesses for the base part of PCI device config space.

 Signed-off-by: Jan Beulich jbeul...@suse.com
 ---
 Not entirely sure whether the complicated logging logic in x86/mm.c is
 actually worth it.

There is a behavioural change insofar that 64bit access will no longer
function, but I am not sure this matters too much, as such an access to
config space is almost certainly buggy.

The rest however looks sane, so

Reviewed-by: Andrew Cooper andrew.coop...@citrix.com

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] xen/HVM: atomically access pointers in bufioreq handling

2015-07-22 Thread Stefano Stabellini
On Wed, 22 Jul 2015, Stefano Stabellini wrote:
 On Wed, 22 Jul 2015, Jan Beulich wrote:
   On 22.07.15 at 16:50, stefano.stabell...@eu.citrix.com wrote:
   On Wed, 22 Jul 2015, Jan Beulich wrote:
--- a/xen-hvm.c
+++ b/xen-hvm.c
@@ -981,19 +981,30 @@ static void handle_ioreq(XenIOState *sta
 
 static int handle_buffered_iopage(XenIOState *state)
 {
+buffered_iopage_t *buf_page = state-buffered_io_page;
 buf_ioreq_t *buf_req = NULL;
 ioreq_t req;
 int qw;
 
-if (!state-buffered_io_page) {
+if (!buf_page) {
 return 0;
 }
 
 memset(req, 0x00, sizeof(req));
 
-while (state-buffered_io_page-read_pointer != 
state-buffered_io_page-write_pointer) {
-buf_req = state-buffered_io_page-buf_ioreq[
-state-buffered_io_page-read_pointer % 
IOREQ_BUFFER_SLOT_NUM];
+for (;;) {
+uint32_t rdptr = buf_page-read_pointer, wrptr;
+
+xen_rmb();

We don't need this barrier.
   
   How would we not? We need to make sure we read in this order
   read_pointer, write_pointer, and read_pointer again (in the
   comparison).  Only that way we can be certain to hold a matching
   pair in hands at the end.
   
   See below
   
   
+wrptr = buf_page-write_pointer;
+xen_rmb();
+if (rdptr != buf_page-read_pointer) {

I think you have to use atomic_read to be sure that the second read to
buf_page-read_pointer is up to date and not optimized away.
   
   No, suppressing such an optimization is an intended (side) effect
   of the barriers used.
   
   I understand what you are saying but I am not sure if your assumption
   is correct. Can the compiler optimize the second read anyway?
  
  No, it can't, due to the barrier.
 
 OK
 
 
But if I think that it would be best to simply use atomic_read to read
both pointers at once using uint64_t as type, so you are sure to get a
consistent view and there is no need for this check.
   
   But I'm specifically trying to avoid e.g. a locked cmpxchg8b here on
   ix86.
   
   OK, but we don't need cmpxchg8b, just:
   
   #define atomic_read(ptr)   (*(__typeof__(*ptr) volatile*) (ptr))
  
  This only gives the impression of being atomic when the type is wider
  than a machine word. There's no ix86 (i.e. 32-bit) instruction other
  than LOCK CMPXCHG8B (and possibly MMX/SSE/AVX ones) allowing
  to atomically read a 64-bit quantity.
 
 Damn!
 
 
   something like:
   
for (;;) {
uint64_t ptrs;
uint32_t rdptr, wrptr;

ptrs = 
   atomic_read((uint64_t*)state-buffered_io_page-read_pointer);
rdptr = (uint32_t)ptrs;
wrptr = *(((uint32_t*)ptrs) + 1);

if (rdptr == wrptr) {
continue;
}

[work]

atomic_add(buf_page-read_pointer, qw + 1);
}
   
   it would work, wouldn't it?
  
  Looks like so, but the amount of casts alone makes me wish for
  no-one to consider this (but I agree that the casts could be
  taken care of). Still I think (as btw done elsewhere) the lock
  free access is preferable.
 
 Actually I think it is conceptually easier to understand, but the
 current implementation of atomic_read not working with uint64_t on
 x86_32 is a real bummer. In that case I am OK with the lock free loop
 too. Thanks for the explanation.
 
 I'll queue this change up for the next QEMU release cycle.

I forgot about the commit message change. Please resend, then, provided
that everything is OK, I'll queue it up.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [linux-3.18 test] 59807: regressions - FAIL

2015-07-22 Thread osstest service owner
flight 59807 linux-3.18 real [real]
http://logs.test-lab.xenproject.org/osstest/logs/59807/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-amd64-xl-pvh-intel 11 guest-start  fail REGR. vs. 58581

Tests which are failing intermittently (not blocking):
 test-amd64-i386-xl-qemut-debianhvm-amd64-xsm 18 guest-start/debianhvm.repeat 
fail in 59785 pass in 59807
 test-amd64-amd64-rumpuserxen-amd64 15 
rumpuserxen-demo-xenstorels/xenstorels.repeat fail pass in 59785
 test-amd64-i386-qemut-rhel6hvm-amd 12 guest-start/redhat.repeat fail pass in 
59785

Regressions which are regarded as allowable (not blocking):
 test-armhf-armhf-libvirt  6 xen-boot  fail REGR. vs. 58581
 test-amd64-i386-xl-qemut-stubdom-debianhvm-amd64-xsm 9 debian-hvm-install fail 
baseline untested
 test-amd64-amd64-xl-qemut-stubdom-debianhvm-amd64-xsm 12 guest-localmigrate 
fail baseline untested
 test-amd64-i386-xl-qemut-stubdom-debianhvm-amd64-xsm 11 guest-saverestore fail 
in 59785 baseline untested
 test-armhf-armhf-xl-rtds 11 guest-startfail in 59785 baseline untested
 test-armhf-armhf-xl-credit2   6 xen-boot fail   like 58581
 test-armhf-armhf-xl   6 xen-boot fail   like 58581
 test-armhf-armhf-xl-multivcpu  6 xen-boot fail  like 58581
 test-armhf-armhf-xl-xsm   6 xen-boot fail   like 58581
 test-armhf-armhf-libvirt-xsm  6 xen-boot fail   like 58581
 test-amd64-amd64-xl-qemut-win7-amd64 16 guest-stop fail like 58581
 test-amd64-i386-xl-qemuu-win7-amd64 16 guest-stop  fail like 58581
 test-amd64-amd64-xl-qemuu-win7-amd64 16 guest-stop fail like 58581

Tests which did not succeed, but are not blocking:
 test-amd64-i386-freebsd10-i386  9 freebsd-install  fail never pass
 test-amd64-i386-freebsd10-amd64  9 freebsd-install fail never pass
 test-amd64-amd64-xl-pvh-amd  11 guest-start  fail   never pass
 test-amd64-i386-libvirt  12 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-xsm  12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  12 migrate-support-checkfail   never pass
 test-amd64-i386-xl-qemut-win7-amd64 16 guest-stop  fail never pass
 test-armhf-armhf-xl-cubietruck  6 xen-boot fail never pass
 test-armhf-armhf-xl-rtds 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 15 guest-start/debian.repeatfail   never pass
 test-amd64-amd64-libvirt-xsm 12 migrate-support-checkfail   never pass

version targeted for testing:
 linux866cebe251f4fb2b435f4ecfe6d3bb4025938533
baseline version:
 linuxd048c068d00da7d4cfa5ea7651933b99026958cf

Last test of basis58581  2015-06-15 09:42:22 Z   37 days
Failing since 58976  2015-06-29 19:43:23 Z   22 days   32 attempts
Testing same since59412  2015-07-11 00:18:42 Z   11 days   20 attempts


308 people touched revisions under test,
not listing them all

jobs:
 build-amd64-xsm  pass
 build-armhf-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-armhf-pvopspass
 build-i386-pvops pass
 build-amd64-rumpuserxen  pass
 build-i386-rumpuserxen   pass
 test-amd64-amd64-xl  pass
 test-armhf-armhf-xl  fail
 test-amd64-i386-xl   pass
 test-amd64-amd64-xl-qemut-debianhvm-amd64-xsmpass
 test-amd64-i386-xl-qemut-debianhvm-amd64-xsm pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64-xsmpass
 test-amd64-i386-xl-qemuu-debianhvm-amd64-xsm pass
 test-amd64-amd64-xl-qemut-stubdom-debianhvm-amd64-xsmfail
 test-amd64-i386-xl-qemut-stubdom-debianhvm-amd64-xsm fail
 test-amd64-amd64-libvirt-xsm 

Re: [Xen-devel] [PATCH] xen/HVM: atomically access pointers in bufioreq handling

2015-07-22 Thread Stefano Stabellini
On Wed, 22 Jul 2015, Jan Beulich wrote:
  On 22.07.15 at 16:50, stefano.stabell...@eu.citrix.com wrote:
  On Wed, 22 Jul 2015, Jan Beulich wrote:
   --- a/xen-hvm.c
   +++ b/xen-hvm.c
   @@ -981,19 +981,30 @@ static void handle_ioreq(XenIOState *sta

static int handle_buffered_iopage(XenIOState *state)
{
   +buffered_iopage_t *buf_page = state-buffered_io_page;
buf_ioreq_t *buf_req = NULL;
ioreq_t req;
int qw;

   -if (!state-buffered_io_page) {
   +if (!buf_page) {
return 0;
}

memset(req, 0x00, sizeof(req));

   -while (state-buffered_io_page-read_pointer != 
   state-buffered_io_page-write_pointer) {
   -buf_req = state-buffered_io_page-buf_ioreq[
   -state-buffered_io_page-read_pointer % 
   IOREQ_BUFFER_SLOT_NUM];
   +for (;;) {
   +uint32_t rdptr = buf_page-read_pointer, wrptr;
   +
   +xen_rmb();
   
   We don't need this barrier.
  
  How would we not? We need to make sure we read in this order
  read_pointer, write_pointer, and read_pointer again (in the
  comparison).  Only that way we can be certain to hold a matching
  pair in hands at the end.
  
  See below
  
  
   +wrptr = buf_page-write_pointer;
   +xen_rmb();
   +if (rdptr != buf_page-read_pointer) {
   
   I think you have to use atomic_read to be sure that the second read to
   buf_page-read_pointer is up to date and not optimized away.
  
  No, suppressing such an optimization is an intended (side) effect
  of the barriers used.
  
  I understand what you are saying but I am not sure if your assumption
  is correct. Can the compiler optimize the second read anyway?
 
 No, it can't, due to the barrier.

OK


   But if I think that it would be best to simply use atomic_read to read
   both pointers at once using uint64_t as type, so you are sure to get a
   consistent view and there is no need for this check.
  
  But I'm specifically trying to avoid e.g. a locked cmpxchg8b here on
  ix86.
  
  OK, but we don't need cmpxchg8b, just:
  
  #define atomic_read(ptr)   (*(__typeof__(*ptr) volatile*) (ptr))
 
 This only gives the impression of being atomic when the type is wider
 than a machine word. There's no ix86 (i.e. 32-bit) instruction other
 than LOCK CMPXCHG8B (and possibly MMX/SSE/AVX ones) allowing
 to atomically read a 64-bit quantity.

Damn!


  something like:
  
   for (;;) {
   uint64_t ptrs;
   uint32_t rdptr, wrptr;
   
   ptrs = atomic_read((uint64_t*)state-buffered_io_page-read_pointer);
   rdptr = (uint32_t)ptrs;
   wrptr = *(((uint32_t*)ptrs) + 1);
   
   if (rdptr == wrptr) {
   continue;
   }
   
   [work]
   
   atomic_add(buf_page-read_pointer, qw + 1);
   }
  
  it would work, wouldn't it?
 
 Looks like so, but the amount of casts alone makes me wish for
 no-one to consider this (but I agree that the casts could be
 taken care of). Still I think (as btw done elsewhere) the lock
 free access is preferable.

Actually I think it is conceptually easier to understand, but the
current implementation of atomic_read not working with uint64_t on
x86_32 is a real bummer. In that case I am OK with the lock free loop
too. Thanks for the explanation.

I'll queue this change up for the next QEMU release cycle.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v5 6/7] x86/MSI: properly track guest masking requests

2015-07-22 Thread Andrew Cooper
On 20/07/15 15:22, Jan Beulich wrote:
 ... by monitoring writes to the mask register.

 This allows reverting the main effect of the XSA-129 patches in qemu.

 Signed-off-by: Jan Beulich jbeul...@suse.com

Reviewed-by: Andrew Cooper andrew.coop...@citrix.com

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] PV-vNUMA issue: topology is misinterpreted by the guest

2015-07-22 Thread Boris Ostrovsky

On 07/22/2015 11:49 AM, Dario Faggioli wrote:

On Wed, 2015-07-22 at 11:32 -0400, Boris Ostrovsky wrote:

On 07/22/2015 10:50 AM, Dario Faggioli wrote:

Yep. Exacty. As Boris says, this is a generic scheduling issue, although
it's tru that it's only (as far as I can tell) with vNUMA that it bite
us so hard...

I am not sure that it's only vNUMA. It's just that with vNUMA we can see
a warning (on your system) that something goes wrong. In other cases
(like scheduling, or sizing objects based on discovered cache sizes) we
don't see anything in the log but system/programs are making wrong
decisions.


I'm not questioning that the guest scheduler is put in a position where
it may make weird and/or wrong choices... Let's put it like this: on
that very test box, or any other NUMA box I've worked on, I've never
seen performance affected _so_much_ as in the vNUMA case.

In fact, of course there are other issues (like the ones you're
mentioning, caused by this), but it's only with vNUMA that I see 2 out
of 4 vcpus completely lost! :-/


My guess would be that scheduling domains are not set up properly. And 
perhaps because your test has lots of IO the load doesn't cross 
thresholds that are needed to switch to another domain?


Does it work the same way if you run a compiled busy loop (i.e. 
'main(){while(1);}')?





(And your results above may well be the example of that)


Right. BTW, the example was meant at investigating what you suggested,
i.e., to have the guest topology follow the host topology. I tried, but
could not achieve it. Do you think I'm doing something wrong? May it be
that it's not always doable (ISTR Andrew saying that it is also the pcpu
where the vcpus are created that matters)?


I don't think this is currently doable with what we have for CPUID 
support in xl syntax. I am pretty sure we need to at least be able to 
specify all leaf 4's indexes. And we can't.


BTW, irrespective of this particular problem, adding support for indexed 
CPUID leaves would be a good idea.


-boris



I'm (re)asking because, if you think that's feasible, I can work on
that. I fit's not, well... :-/



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [linux-4.1 test] 59811: regressions - FAIL

2015-07-22 Thread osstest service owner
flight 59811 linux-4.1 real [real]
http://logs.test-lab.xenproject.org/osstest/logs/59811/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-i386-xl-qemut-stubdom-debianhvm-amd64-xsm 11 guest-saverestore fail 
REGR. vs. 59393
 test-amd64-i386-xl-xsm   13 guest-saverestore fail REGR. vs. 59393
 test-amd64-i386-pair   21 guest-migrate/src_host/dst_host fail REGR. vs. 59393
 test-amd64-i386-xl   13 guest-saverestore fail REGR. vs. 59393

Regressions which are regarded as allowable (not blocking):
 test-amd64-amd64-xl-qemuu-win7-amd64 16 guest-stopfail REGR. vs. 59393
 test-amd64-amd64-xl-qemut-win7-amd64 16 guest-stop fail like 59393

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-xl-pvh-intel 13 guest-saverestorefail  never pass
 test-amd64-amd64-xl-pvh-amd  11 guest-start  fail   never pass
 test-amd64-amd64-libvirt 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 11 guest-start  fail   never pass
 test-armhf-armhf-xl-xsm  12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  12 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt 12 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-xsm  12 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-cubietruck 12 migrate-support-checkfail never pass
 test-armhf-armhf-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  12 migrate-support-checkfail   never pass
 test-amd64-i386-xl-qemuu-win7-amd64 16 guest-stop  fail never pass
 test-amd64-i386-xl-qemut-win7-amd64 16 guest-stop  fail never pass
 test-armhf-armhf-xl-multivcpu 12 migrate-support-checkfail  never pass

version targeted for testing:
 linuxc8bde72f9af412de57f0ceae218d648640118b0b
baseline version:
 linux5cf9896dc5c72a6c68c36140568b755f697f7760

Last test of basis59393  2015-07-10 17:14:15 Z   12 days
Testing same since59811  2015-07-22 00:43:01 Z0 days1 attempts


People who touched revisions under test:
  Eric W. Biederman ebied...@xmission.com
  Aaron Lu aaron...@intel.com
  Alex Deucher alexander.deuc...@amd.com
  Alex Williamson alex.william...@redhat.com
  Alexander Sverdlin alexander.sverd...@nokia.com
  Alexander Usyskin alexander.usys...@intel.com
  Andrew Morton a...@linux-foundation.org
  Antonio Ospite a...@ao2.it
  Arnaldo Carvalho de Melo a...@kernel.org
  Arnaldo Carvalho de Melo a...@redhat.com
  Arun Chandran achand...@mvista.com
  Axel Lin axel@ingics.com
  Barak Yoresh barak.yor...@intel.com
  Bart Van Assche bart.vanass...@sandisk.com
  Bjorn Helgaas bhelg...@google.com
  Brian King brk...@linux.vnet.ibm.com
  Brian Norris computersforpe...@gmail.com
  Catalin Marinas catalin.mari...@arm.com
  Dave Martin dave.mar...@arm.com
  Dave P Martin dave.mar...@arm.com
  David Ahern david.ah...@oracle.com
  David Henningsson david.hennings...@canonical.com
  David Rientjes rient...@google.com
  David S. Miller da...@davemloft.net
  Dmitry Torokhov dmitry.torok...@gmail.com
  Dominik Brodowski li...@dominikbrodowski.net
  Doug Ledford dledf...@redhat.com
  Dr. H. Nikolaus Schaller h...@goldelico.com
  Eric W. Biederman ebied...@xmission.com
  Ezequiel Garcia ezequ...@vanguardiasur.com.ar
  Frodo Lai frodo@gmail.com
  Frodo Lai frodo_...@bcmcom.com
  Gabriele Mazzotta gabriele@gmail.com
  Geert Uytterhoeven geert+rene...@glider.be
  Giuseppe Cantavenera giuseppe.cantavenera@nokia.com
  Greg Kroah-Hartman gre...@linuxfoundation.org
  Gregory CLEMENT gregory.clem...@free-electrons.com
  Grygorii Strashko grygorii.stras...@linaro.org
  Hui Wang hui.w...@canonical.com
  Jacek Anaszewski j.anaszew...@samsung.com
  James Bottomley jbottom...@odin.com
  Jiri Kosina jkos...@suse.cz
  Joe Perches j...@perches.com
  Jonathan Cameron ji...@kernel.org
  Josh Poimboeuf jpoim...@redhat.com
  Krzysztof Kozlowski k.kozlow...@samsung.com
  Larry Finger larry.fin...@lwfinger.net
  Linus Torvalds torva...@linux-foundation.org
  Linus Walleij linus.wall...@linaro.org
  Liu Ying ying@freescale.com
  Liviu Dudau liviu.du...@arm.com
  Mark Brown broo...@kernel.org
  Mark Rutland mark.rutl...@arm.com
  Martin KaFai Lau ka...@fb.com
  Martin Sperl ker...@martin.sperl.org
  Max Filippov jcmvb...@gmail.com
  Maxime Coquelin maxime.coque...@st.com
  Miroslav Benes mbe...@suse.cz
  Noralf Trønnes nor...@tronnes.org
  Paul E. McKenney paul...@linux.vnet.ibm.com
  Peter Chen 

[Xen-devel] [qemu-mainline test] 59810: regressions - FAIL

2015-07-22 Thread osstest service owner
flight 59810 qemu-mainline real [real]
http://logs.test-lab.xenproject.org/osstest/logs/59810/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-i386-freebsd10-i386 12 guest-saverestore   fail REGR. vs. 59059
 test-amd64-i386-xl-qemuu-ovmf-amd64 11 guest-saverestore  fail REGR. vs. 59059
 test-amd64-i386-xl-qemuu-debianhvm-amd64 11 guest-saverestore fail REGR. vs. 
59059
 test-amd64-i386-xl-qemuu-debianhvm-amd64-xsm 11 guest-saverestore fail REGR. 
vs. 59059
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1 11 guest-saverestore fail REGR. vs. 
59059
 test-amd64-amd64-xl-qemuu-debianhvm-amd64 11 guest-saverestore fail REGR. vs. 
59059
 test-amd64-amd64-xl-qemuu-ovmf-amd64 11 guest-saverestore fail REGR. vs. 59059
 test-amd64-amd64-xl-qemuu-winxpsp3 11 guest-saverestore   fail REGR. vs. 59059
 test-amd64-i386-freebsd10-amd64 12 guest-saverestore  fail REGR. vs. 59059
 test-amd64-i386-xl-qemuu-winxpsp3 11 guest-saverestorefail REGR. vs. 59059
 test-amd64-amd64-xl-qemuu-debianhvm-amd64-xsm 11 guest-saverestore fail REGR. 
vs. 59059
 test-amd64-amd64-xl-qemuu-win7-amd64 11 guest-saverestore fail REGR. vs. 59059
 test-amd64-i386-xl-qemuu-win7-amd64 11 guest-saverestore  fail REGR. vs. 59059

Regressions which are regarded as allowable (not blocking):
 test-armhf-armhf-xl-rtds 11 guest-start   fail REGR. vs. 59059

Tests which did not succeed, but are not blocking:
 test-amd64-i386-libvirt  12 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-pvh-intel 11 guest-start  fail  never pass
 test-amd64-amd64-libvirt 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 12 migrate-support-checkfail  never pass
 test-amd64-i386-libvirt-xsm  12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-cubietruck 12 migrate-support-checkfail never pass
 test-armhf-armhf-xl-arndale  12 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-pvh-amd  11 guest-start  fail   never pass
 test-armhf-armhf-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-xsm  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  12 migrate-support-checkfail   never pass

version targeted for testing:
 qemuu774ee4772b6838b78741ea52d4bf26b8922244c5
baseline version:
 qemuu35360642d043c2a5366e8a04a10e5545e7353bd5

Last test of basis59059  2015-07-05 10:39:20 Z   17 days
Failing since 59109  2015-07-06 14:58:21 Z   16 days   23 attempts
Testing same since59810  2015-07-22 00:12:57 Z0 days1 attempts


People who touched revisions under test:
  Alberto Garcia be...@igalia.com
  Alex Williamson alex.william...@redhat.com
  Alexander Graf ag...@suse.de
  Alexey Kardashevskiy a...@ozlabs.ru
  Alvise Rigo a.r...@virtualopensystems.com
  Amit Shah amit.s...@redhat.com
  Andreas Färber afaer...@suse.de
  Andrew Bennett andrew.benn...@imgtec.com
  Andrew Jones drjo...@redhat.com
  Artyom Tarasenko atar4q...@gmail.com
  Aurelien Jarno aurel...@aurel32.net
  Benjamin Herrenschmidt b...@kernel.crashing.org
  Bharata B Rao bhar...@linux.vnet.com
  Bharata B Rao bhar...@linux.vnet.ibm.com
  Brian Kress kre...@moose.net
  Chen Hanxiao chenhanx...@cn.fujitsu.com
  Christian Borntraeger borntrae...@de.ibm.com
  Christoffer Dall christoffer.d...@linaro.org
  Christoph Hellwig h...@lst.de
  Claudio Fontana claudio.font...@huawei.com
  Cormac O'Brien i.am.cormac.obr...@gmail.com
  Cornelia Huck cornelia.h...@de.ibm.com
  Dana Rubin dana.ru...@ravellosystems.com
  Daniel P. Berrange berra...@redhat.com
  David Gibson da...@gibson.dropbear.id.au
  Denis V. Lunev d...@openvz.org
  Dmitry Osipenko dig...@gmail.com
  Dr. David Alan Gilbert dgilb...@redhat.com
  Eduardo Habkost ehabk...@redhat.com
  Eric Auger eric.au...@linaro.org
  Fam Zheng f...@redhat.com
  Frediano Ziglio fzig...@redhat.com
  Gabriel Laupre glau...@chelsio.com
  Gavin Shan gws...@linux.vnet.ibm.com
  Gerd Hoffmann kra...@redhat.com
  Gonglei arei.gong...@huawei.com
  Greg Kurz gk...@linux.vnet.ibm.com
  Hannes Reinecke h...@suse.de
  Hervé Poussineau hpous...@reactos.org
  Igor Mammedov imamm...@redhat.com
  James Hogan james.ho...@imgtec.com
  Jan Kiszka jan.kis...@siemens.com
  Jason Wang jasow...@redhat.com
  Jeff Cody jc...@redhat.com
  Johannes Schlatow schla...@ida.ing.tu-bs.de
  John Snow js...@redhat.com
  Josh Durgin jdur...@redhat.com
  Juan Quintela quint...@redhat.com
  Justin Ossevoort jus...@quarantainenet.nl
  Keith Busch keith.bu...@intel.com
  Kevin Wolf 

[Xen-devel] [PATCH v3 1/3] x86/ldt: Make modify_ldt synchronous

2015-07-22 Thread Andy Lutomirski
modify_ldt has questionable locking and does not synchronize
threads.  Improve it: redesign the locking and synchronize all
threads' LDTs using an IPI on all modifications.

This will dramatically slow down modify_ldt in multithreaded
programs, but there shouldn't be any multithreaded programs that
care about modify_ldt's performance in the first place.

Cc: sta...@vger.kernel.org
Signed-off-by: Andy Lutomirski l...@kernel.org
---
 arch/x86/include/asm/desc.h|  15 ---
 arch/x86/include/asm/mmu.h |   3 +-
 arch/x86/include/asm/mmu_context.h |  53 +++-
 arch/x86/kernel/cpu/common.c   |   4 +-
 arch/x86/kernel/cpu/perf_event.c   |  12 +-
 arch/x86/kernel/ldt.c  | 258 -
 arch/x86/kernel/process_64.c   |   4 +-
 arch/x86/kernel/step.c |   6 +-
 arch/x86/power/cpu.c   |   3 +-
 9 files changed, 209 insertions(+), 149 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index a0bf89fd2647..4e10d73cf018 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -280,21 +280,6 @@ static inline void clear_LDT(void)
set_ldt(NULL, 0);
 }
 
-/*
- * load one particular LDT into the current CPU
- */
-static inline void load_LDT_nolock(mm_context_t *pc)
-{
-   set_ldt(pc-ldt, pc-size);
-}
-
-static inline void load_LDT(mm_context_t *pc)
-{
-   preempt_disable();
-   load_LDT_nolock(pc);
-   preempt_enable();
-}
-
 static inline unsigned long get_desc_base(const struct desc_struct *desc)
 {
return (unsigned)(desc-base0 | ((desc-base1)  16) | ((desc-base2) 
 24));
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 09b9620a73b4..364d27481a52 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -9,8 +9,7 @@
  * we put the segment information here.
  */
 typedef struct {
-   void *ldt;
-   int size;
+   struct ldt_struct *ldt;
 
 #ifdef CONFIG_X86_64
/* True if mm supports a task running in 32 bit compatibility mode. */
diff --git a/arch/x86/include/asm/mmu_context.h 
b/arch/x86/include/asm/mmu_context.h
index 804a3a6030ca..3fcff70c398e 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -34,6 +34,49 @@ static inline void load_mm_cr4(struct mm_struct *mm) {}
 #endif
 
 /*
+ * ldt_structs can be allocated, used, and freed, but they are never
+ * modified while live.
+ */
+struct ldt_struct {
+   /*
+* Xen requires page-aligned LDTs with special permissions.  This is
+* needed to prevent us from installing evil descriptors such as
+* call gates.  On native, we could merge the ldt_struct and LDT
+* allocations, but it's not worth trying to optimize.
+*/
+   struct desc_struct *entries;
+   int size;
+};
+
+static inline void load_mm_ldt(struct mm_struct *mm)
+{
+   struct ldt_struct *ldt;
+   DEBUG_LOCKS_WARN_ON(!irqs_disabled());
+
+   /* lockless_dereference synchronizes with smp_store_release */
+   ldt = lockless_dereference(mm-context.ldt);
+
+   /*
+* Any change to mm-context.ldt is followed by an IPI to all
+* CPUs with the mm active.  The LDT will not be freed until
+* after the IPI is handled by all such CPUs.  This means that,
+* if the ldt_struct changes before we return, the values we see
+* will be safe, and the new values will be loaded before we run
+* any user code.
+*
+* NB: don't try to convert this to use RCU without extreme care.
+* We would still need IRQs off, because we don't want to change
+* the local LDT after an IPI loaded a newer value than the one
+* that we can see.
+*/
+
+   if (unlikely(ldt))
+   set_ldt(ldt-entries, ldt-size);
+   else
+   clear_LDT();
+}
+
+/*
  * Used for LDT copy/destruction.
  */
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
@@ -78,12 +121,12 @@ static inline void switch_mm(struct mm_struct *prev, 
struct mm_struct *next,
 * was called and then modify_ldt changed
 * prev-context.ldt but suppressed an IPI to this CPU.
 * In this case, prev-context.ldt != NULL, because we
-* never free an LDT while the mm still exists.  That
-* means that next-context.ldt != prev-context.ldt,
-* because mms never share an LDT.
+* never set context.ldt to NULL while the mm still
+* exists.  That means that next-context.ldt !=
+* prev-context.ldt, because mms never share an LDT.
 */
if (unlikely(prev-context.ldt != next-context.ldt))
-   load_LDT_nolock(next-context);
+   load_mm_ldt(next);
}
 #ifdef CONFIG_SMP
  else {
@@ -106,7 +149,7 @@ static inline void 

[Xen-devel] [PATCH v3 3/3] selftests/x86, x86/ldt: Add a selftest for modify_ldt

2015-07-22 Thread Andy Lutomirski
This tests general modify_ldt behavior (only writes, so far) as
well as synchronous updates via IPI.  It fails on old kernels.

I called this ldt_gdt because I'll add set_thread_area tests to
it at some point.

Signed-off-by: Andy Lutomirski l...@kernel.org
---
 tools/testing/selftests/x86/Makefile  |   2 +-
 tools/testing/selftests/x86/ldt_gdt.c | 492 ++
 2 files changed, 493 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/x86/ldt_gdt.c

diff --git a/tools/testing/selftests/x86/Makefile 
b/tools/testing/selftests/x86/Makefile
index caa60d56d7d1..4138387b892c 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -4,7 +4,7 @@ include ../lib.mk
 
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
-TARGETS_C_BOTHBITS := sigreturn single_step_syscall sysret_ss_attrs
+TARGETS_C_BOTHBITS := sigreturn single_step_syscall sysret_ss_attrs ldt_gdt
 TARGETS_C_32BIT_ONLY := entry_from_vm86
 
 TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
diff --git a/tools/testing/selftests/x86/ldt_gdt.c 
b/tools/testing/selftests/x86/ldt_gdt.c
new file mode 100644
index ..7723a12d42e1
--- /dev/null
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -0,0 +1,492 @@
+/*
+ * ldt_gdt.c - Test cases for LDT and GDT access
+ * Copyright (c) 2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+#include err.h
+#include stdio.h
+#include stdint.h
+#include signal.h
+#include setjmp.h
+#include stdlib.h
+#include string.h
+#include errno.h
+#include unistd.h
+#include sys/syscall.h
+#include asm/ldt.h
+#include sys/types.h
+#include sys/wait.h
+#include stdbool.h
+#include pthread.h
+#include sched.h
+#include linux/futex.h
+
+#define AR_ACCESSED(18)
+
+#define AR_TYPE_RODATA (0 * (19))
+#define AR_TYPE_RWDATA (1 * (19))
+#define AR_TYPE_RODATA_EXPDOWN (2 * (19))
+#define AR_TYPE_RWDATA_EXPDOWN (3 * (19))
+#define AR_TYPE_XOCODE (4 * (19))
+#define AR_TYPE_XRCODE (5 * (19))
+#define AR_TYPE_XOCODE_CONF(6 * (19))
+#define AR_TYPE_XRCODE_CONF(7 * (19))
+
+#define AR_DPL3(3 * (113))
+
+#define AR_S   (1  12)
+#define AR_P   (1  15)
+#define AR_AVL (1  20)
+#define AR_L   (1  21)
+#define AR_DB  (1  22)
+#define AR_G   (1  23)
+
+static int nerrs;
+
+static void check_invalid_segment(uint16_t index, int ldt)
+{
+   uint32_t has_limit = 0, has_ar = 0, limit, ar;
+   uint32_t selector = (index  3) | (ldt  2) | 3;
+
+   asm (lsl %[selector], %[limit]\n\t
+jnz 1f\n\t
+movl $1, %[has_limit]\n\t
+1:
+: [limit] =r (limit), [has_limit] +rm (has_limit)
+: [selector] r (selector));
+   asm (larl %[selector], %[ar]\n\t
+jnz 1f\n\t
+movl $1, %[has_ar]\n\t
+1:
+: [ar] =r (ar), [has_ar] +rm (has_ar)
+: [selector] r (selector));
+
+   if (has_limit || has_ar) {
+   printf([FAIL]\t%s entry %hu is valid but should be invalid\n,
+  (ldt ? LDT : GDT), index);
+   nerrs++;
+   } else {
+   printf([OK]\t%s entry %hu is invalid\n,
+  (ldt ? LDT : GDT), index);
+   }
+}
+
+static void check_valid_segment(uint16_t index, int ldt,
+   uint32_t expected_ar, uint32_t expected_limit)
+{
+   uint32_t has_limit = 0, has_ar = 0, limit, ar;
+   uint32_t selector = (index  3) | (ldt  2) | 3;
+
+   asm (lsl %[selector], %[limit]\n\t
+jnz 1f\n\t
+movl $1, %[has_limit]\n\t
+1:
+: [limit] =r (limit), [has_limit] +rm (has_limit)
+: [selector] r (selector));
+   asm (larl %[selector], %[ar]\n\t
+jnz 1f\n\t
+movl $1, %[has_ar]\n\t
+1:
+: [ar] =r (ar), [has_ar] +rm (has_ar)
+: [selector] r (selector));
+
+   if (!has_limit || !has_ar) {
+   printf([FAIL]\t%s entry %hu is invalid but should be valid\n,
+  (ldt ? LDT : GDT), index);
+   nerrs++;
+   return;
+   }
+
+   if (ar != expected_ar) {
+   printf([FAIL]\t%s entry %hu has AR 0x%08X but expected 
0x%08X\n,
+  (ldt ? LDT : GDT), index, ar, expected_ar);
+   nerrs++;
+   } else if (limit != expected_limit) {
+   printf([FAIL]\t%s entry %hu has limit 0x%08X but expected 
0x%08X\n,
+  (ldt ? LDT : GDT), index, limit, expected_limit);
+   nerrs++;
+   } else {
+   printf([OK]\t%s entry %hu has AR 0x%08X and limit 0x%08X\n,
+  (ldt ? LDT : GDT), index, ar, limit);
+   }
+}
+
+static bool install_valid_mode(const struct user_desc *desc, uint32_t ar,
+

[Xen-devel] [PATCH v3 0/3] x86: modify_ldt improvement, test, and config option

2015-07-22 Thread Andy Lutomirski
Here's v3.  It fixes the dazed and confused issue, I hope.  It's also
probably a good general attack surface reduction, and it replaces some
scary code with IMO less scary code.

Also, servers and embedded systems should probably turn off modify_ldt.
This makes that possible.

Xen people, can you take a look at this?

Changes from v2:
 - Allocate ldt_struct and the LDT entries separately.  This should fix Xen.
 - Stop using write_ldt_entry, since I'm pretty sure it's unnecessary now
   that we no longer mutate an in-use LDT.  (Xen people, can you check?)

Changes from v1:
 - The config option is new.
 - The test case is new.
 - Fixed a missing allocation failure check.
 - Fixed a use-after-free on fork().

Andy Lutomirski (3):
  x86/ldt: Make modify_ldt synchronous
  x86/ldt: Make modify_ldt optional
  selftests/x86, x86/ldt: Add a selftest for modify_ldt

 arch/x86/Kconfig  |  17 ++
 arch/x86/include/asm/desc.h   |  15 --
 arch/x86/include/asm/mmu.h|   5 +-
 arch/x86/include/asm/mmu_context.h|  68 -
 arch/x86/kernel/Makefile  |   3 +-
 arch/x86/kernel/cpu/common.c  |   4 +-
 arch/x86/kernel/cpu/perf_event.c  |  16 +-
 arch/x86/kernel/ldt.c | 258 ++
 arch/x86/kernel/process_64.c  |   6 +-
 arch/x86/kernel/step.c|   8 +-
 arch/x86/power/cpu.c  |   3 +-
 kernel/sys_ni.c   |   1 +
 tools/testing/selftests/x86/Makefile  |   2 +-
 tools/testing/selftests/x86/ldt_gdt.c | 492 ++
 14 files changed, 747 insertions(+), 151 deletions(-)
 create mode 100644 tools/testing/selftests/x86/ldt_gdt.c

-- 
2.4.3


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v5 10/15] x86/altp2m: add remaining support routines.

2015-07-22 Thread Jan Beulich
 On 21.07.15 at 20:33, ravi.sah...@intel.com wrote:
 We thought the code was easier to understand with min and max set to 
 INVALID.

But as you see it caused a lot of discussion instead.

 we can take your approach to avoid this inefficiency if you want.

Yes please.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 02/23] x86/boot: copy only text section from *.lnk file to *.bin file

2015-07-22 Thread Jan Beulich
 On 21.07.15 at 19:23, daniel.ki...@oracle.com wrote:
 First of all ld generates .got.plt section and objcopy copy it to binary 
 file.
 It is not needed because we do not link our stuff here with shared 
 libraries.
 So, we can use -R objcopy option to remove it (if you do not like -j .text).
 This way we could save 15 bytes (at least on my machines).

So I checked and did find no .got.plt at all on a machine using
binutils 2.25 and an empty one on a machine using an older
version. So I'm curious what you're seeing present in that
table (without any of your patches applied).

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] PCI Pass-through in Xen ARM - Draft 2.

2015-07-22 Thread Julien Grall

Hi Manish,

On 22/07/2015 06:41, Manish Jaggi wrote:

Can we keep this open and for now till there is agreement make
requesterid = bdf.
If you are ok, I will update and send Draft 3.


I think we can make an agreement on we are able to find a requesterID 
based on the BDF but not that requesterID = BDF. There is different 
reason for that: ARI, hardware quirks...


It would be nice if you can summarize how to find the requesterID, 
StreamIDs (SMMU), deviceID (IOMMU) and use the name accordingly in the spec.


Regards,

--
Julien Grall

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2 1/3] xen-blkfront: introduce blkfront_gather_backend_features()

2015-07-22 Thread Bob Liu
There is a bug when migrate from !feature-persistent host to feature-persistent
host, because domU still thinks new host/backend doesn't support persistent.
Dmesg like:
backed has not unmapped grant: 839
backed has not unmapped grant: 773
backed has not unmapped grant: 773
backed has not unmapped grant: 773
backed has not unmapped grant: 839

The fix is to recheck feature-persistent of new backend in blkif_recover().
See: https://lkml.org/lkml/2015/5/25/469

As Roger suggested, we can split the part of blkfront_connect that checks for
optional features, like persistent grants, indirect descriptors and
flush/barrier features to a separate function and call it from both
blkfront_connect and blkif_recover

Signed-off-by: Bob Liu bob@oracle.com
---
Changes in v2:
 * Also put blkfront_setup_indirect() inside
---
 drivers/block/xen-blkfront.c |  122 +++---
 1 file changed, 68 insertions(+), 54 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 5b45ee5..3b193cf 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -181,6 +181,7 @@ static DEFINE_SPINLOCK(minor_lock);
((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
 
 static int blkfront_setup_indirect(struct blkfront_info *info);
+static int blkfront_gather_backend_features(struct blkfront_info *info);
 
 static int get_id_from_freelist(struct blkfront_info *info)
 {
@@ -1514,7 +1515,7 @@ static int blkif_recover(struct blkfront_info *info)
info-shadow_free = info-ring.req_prod_pvt;
info-shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fff;
 
-   rc = blkfront_setup_indirect(info);
+   rc = blkfront_gather_backend_features(info);
if (rc) {
kfree(copy);
return rc;
@@ -1694,20 +1695,13 @@ static void blkfront_setup_discard(struct blkfront_info 
*info)
 
 static int blkfront_setup_indirect(struct blkfront_info *info)
 {
-   unsigned int indirect_segments, segs;
+   unsigned int segs;
int err, i;
 
-   err = xenbus_gather(XBT_NIL, info-xbdev-otherend,
-   feature-max-indirect-segments, %u, 
indirect_segments,
-   NULL);
-   if (err) {
-   info-max_indirect_segments = 0;
+   if (info-max_indirect_segments == 0)
segs = BLKIF_MAX_SEGMENTS_PER_REQUEST;
-   } else {
-   info-max_indirect_segments = min(indirect_segments,
- xen_blkif_max_segments);
+   else
segs = info-max_indirect_segments;
-   }
 
err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * 
BLK_RING_SIZE(info));
if (err)
@@ -1771,6 +1765,68 @@ out_of_memory:
 }
 
 /*
+ * Gather all backend feature-*
+ */
+static int blkfront_gather_backend_features(struct blkfront_info *info)
+{
+   int err;
+   int barrier, flush, discard, persistent;
+   unsigned int indirect_segments;
+
+   info-feature_flush = 0;
+
+   err = xenbus_gather(XBT_NIL, info-xbdev-otherend,
+   feature-barrier, %d, barrier,
+   NULL);
+
+   /*
+* If there's no feature-barrier defined, then it means
+* we're dealing with a very old backend which writes
+* synchronously; nothing to do.
+*
+* If there are barriers, then we use flush.
+*/
+   if (!err  barrier)
+   info-feature_flush = REQ_FLUSH | REQ_FUA;
+   /*
+* And if there is feature-flush-cache use that above
+* barriers.
+*/
+   err = xenbus_gather(XBT_NIL, info-xbdev-otherend,
+   feature-flush-cache, %d, flush,
+   NULL);
+
+   if (!err  flush)
+   info-feature_flush = REQ_FLUSH;
+
+   err = xenbus_gather(XBT_NIL, info-xbdev-otherend,
+   feature-discard, %d, discard,
+   NULL);
+
+   if (!err  discard)
+   blkfront_setup_discard(info);
+
+   err = xenbus_gather(XBT_NIL, info-xbdev-otherend,
+   feature-persistent, %u, persistent,
+   NULL);
+   if (err)
+   info-feature_persistent = 0;
+   else
+   info-feature_persistent = persistent;
+
+   err = xenbus_gather(XBT_NIL, info-xbdev-otherend,
+   feature-max-indirect-segments, %u, 
indirect_segments,
+   NULL);
+   if (err)
+   info-max_indirect_segments = 0;
+   else
+   info-max_indirect_segments = min(indirect_segments,
+ xen_blkif_max_segments);
+
+   return blkfront_setup_indirect(info);
+}
+
+/*
  * Invoked when the backend is finally 'ready' (and has told produced
  * the details about the physical device - #sectors, size, etc).
  */
@@ -1781,7 

[Xen-devel] [PATCH v2] xen-blkback: replace work_pending with work_busy in purge_persistent_gnt()

2015-07-22 Thread Bob Liu
The BUG_ON() in purge_persistent_gnt() will be triggered when previous purge
work haven't finished.
There is a work_pending() before this BUG_ON, but it doesn't account if the work
is still currently running.

Signed-off-by: Bob Liu bob@oracle.com
---
Change in v2:
 * Replace with work_busy()
---
 drivers/block/xen-blkback/blkback.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c 
b/drivers/block/xen-blkback/blkback.c
index ced9677..954c002 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -369,8 +369,8 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
return;
}
 
-   if (work_pending(blkif-persistent_purge_work)) {
-   pr_alert_ratelimited(Scheduled work from previous purge is 
still pending, cannot purge list\n);
+   if (work_busy(blkif-persistent_purge_work)) {
+   pr_alert_ratelimited(Scheduled work from previous purge is 
still busy, cannot purge list\n);
return;
}
 
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2 2/3] xen-blkfront: don't add indirect pages to list when !feature_persistent

2015-07-22 Thread Bob Liu
We should consider info-feature_persistent when adding indriect page to list
info-indirect_pages, else the BUG_ON() in blkif_free() would be triggered.

Signed-off-by: Bob Liu bob@oracle.com
---
 drivers/block/xen-blkfront.c |6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 3b193cf..5dd591d 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1125,8 +1125,10 @@ static void blkif_completion(struct blk_shadow *s, 
struct blkfront_info *info,
 * Add the used indirect page back to the list 
of
 * available pages for indirect grefs.
 */
-   indirect_page = 
pfn_to_page(s-indirect_grants[i]-pfn);
-   list_add(indirect_page-lru, 
info-indirect_pages);
+   if (!info-feature_persistent) {
+   indirect_page = 
pfn_to_page(s-indirect_grants[i]-pfn);
+   list_add(indirect_page-lru, 
info-indirect_pages);
+   }
s-indirect_grants[i]-gref = GRANT_INVALID_REF;
list_add_tail(s-indirect_grants[i]-node, 
info-grants);
}
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [v11][PATCH 11/16] tools/libxl: detect and avoid conflicts with RDM

2015-07-22 Thread Jan Beulich
 On 22.07.15 at 03:30, tiejun.c...@intel.com wrote:
 CC: Ian Jackson ian.jack...@eu.citrix.com
 CC: Stefano Stabellini stefano.stabell...@eu.citrix.com
 CC: Ian Campbell ian.campb...@citrix.com
 CC: Wei Liu wei.l...@citrix.com
 Acked-by: Wei Liu wei.l...@citrix.com
 Signed-off-by: Tiejun Chen tiejun.c...@intel.com
 Reviewed-by: Kevin Tian kevin.t...@intel.com
 ---
 v11:
 
 * Use GCNEW_ARRAY to replace libxl__malloc()
 
 * #define pfn_to_paddrk is missing safety () around x, and
   move this into libxl_internal.h
 
 * Rename set_rdm_entries() to add_rdm_entry() and put the
   increment at the end so that the assignments are
   to -rdms[d_config-num_rdms].
 
 * Simply make it so that if there are any rdms specified
   in the domain config, they are used instead of the
   automatically gathered information (from strategy and
   devices). So just return if d_config-rmds is valid.
 
 * Shorten some code comments.

I think it is not the first time that we're pointing out to you that
when you make not just cosmetic changes, review and ack tags
should be dropped.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v9 0/8] pci: add pci_iomap_wc() and pci_ioremap_wc_bar()

2015-07-22 Thread Ingo Molnar

* Bjorn Helgaas bhelg...@google.com wrote:

Let me know if these are OK or if there are any questions.

[0] http://lkml.kernel.org/r/20150625204703.gc4...@pd.tnic
[1] http://lkml.kernel.org/r/20150707095012.gq7...@wotan.suse.de
   
   Ingo,
   
   Just a friendly reminder. Let me know if there are any issues or 
   questions.
  
  It would be nice to get an Acked-by from Bjorn for the PCI API bits.
 
 I think the actual code of pci_ioremap_wc() and pci_ioremap_wc_bar() is fine 
 (although I might have named it pci_ioremap_bar_wc() for consistency).
 
 I declined to merge or ack them myself because they're obvious extensions of 
 pci_ioremap() and pci_ioremap_bar(), and I would prefer that they be exported 
 the same way, i.e., with EXPORT_SYMBOL(), not EXPORT_SYMBOL_GPL().

Huh? AFAICS pci_ioremap_bar() has been a _GPL export for a long time:

1684f5ddd4c0c   (Andrew Morton  2008-12-01 14:30:30 -0800   126)#ifdef 
CONFIG_HAS_IOMEM
1684f5ddd4c0c   (Andrew Morton  2008-12-01 14:30:30 -0800   127)void 
__iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar)
1684f5ddd4c0c   (Andrew Morton  2008-12-01 14:30:30 -0800   128){
1f7bf3bfb5d60   (Bjorn Helgaas  2015-03-12 12:30:11 -0500   129)struct 
resource *res = pdev-resource[bar];
1f7bf3bfb5d60   (Bjorn Helgaas  2015-03-12 12:30:11 -0500   130)
1684f5ddd4c0c   (Andrew Morton  2008-12-01 14:30:30 -0800   131)/*
1684f5ddd4c0c   (Andrew Morton  2008-12-01 14:30:30 -0800   132) * Make 
sure the BAR is actually a memory resource, not an IO resource
1684f5ddd4c0c   (Andrew Morton  2008-12-01 14:30:30 -0800   133) */
646c0282df042   (Bjorn Helgaas  2015-03-12 12:30:15 -0500   134)if 
(res-flags  IORESOURCE_UNSET || !(res-flags  IORESOURCE_MEM)) {
1f7bf3bfb5d60   (Bjorn Helgaas  2015-03-12 12:30:11 -0500   135)
dev_warn(pdev-dev, can't ioremap BAR %d: %pR\n, bar, res);
1684f5ddd4c0c   (Andrew Morton  2008-12-01 14:30:30 -0800   136)
return NULL;
1684f5ddd4c0c   (Andrew Morton  2008-12-01 14:30:30 -0800   137)}
1f7bf3bfb5d60   (Bjorn Helgaas  2015-03-12 12:30:11 -0500   138)return 
ioremap_nocache(res-start, resource_size(res));
1684f5ddd4c0c   (Andrew Morton  2008-12-01 14:30:30 -0800   139)}
1684f5ddd4c0c   (Andrew Morton  2008-12-01 14:30:30 -0800   
140)EXPORT_SYMBOL_GPL(pci_ioremap_bar);
1684f5ddd4c0c   (Andrew Morton  2008-12-01 14:30:30 -0800   141)#endif

commit 1684f5ddd4c0c happened in 2008, well before you became PCI maintainer in 
2012.

and I'd prefer keeping the same EXPORT_SYMBOL_GPL() pattern for the new APIs.

(ioremap_wc() is EXPORT_SYMBOL() mostly by accident, it's the odd one out.)

Also, FWIIW: I personally got essentially zero feedback and help from 
proprietary 
binary kernel module vendors in the past couple of years as x86 maintainer, 
despite a fair chunk of kernel crashes reported on distro kernels occuring in 
them...

Based on that very negative experience, when we introduce something as complex 
and 
as critical as new caching APIs, the last thing I want is to have obscure bugs 
in 
binary modules I cannot fix in any reasonable fashion. So even if the parent 
APIs 
of new APIs weren't already _GPL exports (as in this case), I'd export them as 
_GPL in this case.

 I think using EXPORT_SYMBOL_GPL to express individual political aims rather 
 than 
 as a hint about what might be derived work makes us look like zealots, and 
 that's not my style.

As far as I'm concerned it's a pure technological choice: I don't want to 
export 
certain types of hard to fix and critical functionality to drivers that I 
cannot 
then fix.

But I also applied EXPORT_SYMBOL() patches in the past, so I'm not one-sided 
about 
it.

Thanks,

Ingo

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v6 05/15] x86/altp2m: basic data structures and support routines.

2015-07-22 Thread Jan Beulich
 On 22.07.15 at 07:36, ravi.sah...@intel.com wrote:
 Sorry we did the ABI and some minor ones first in v6, the next rev should 
 have these and patch 10 sorted out.

Which means that it's a waste of time to look at v6. Before dropping
it from my inbox I did look at the two you specifically asked to be
looked at in another mail though.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v6 11/15] x86/altp2m: define and implement alternate p2m HVMOP types.

2015-07-22 Thread Jan Beulich
 On 21.07.15 at 01:58, edmund.h.wh...@intel.com wrote:
 --- a/xen/arch/x86/hvm/hvm.c
 +++ b/xen/arch/x86/hvm/hvm.c
 @@ -6135,6 +6135,141 @@ static int hvmop_get_param(
  return rc;
  }
  
 +static int do_altp2m_op(
 +XEN_GUEST_HANDLE_PARAM(void) arg)
 +{
 +struct xen_hvm_altp2m_op a;
 +struct domain *d = NULL;
 +int rc = 0;
 +
 +if ( !hvm_altp2m_supported() )
 +return -EOPNOTSUPP;
 +
 +if ( copy_from_guest(a, arg, 1) )
 +return -EFAULT;
 +
 +if ( a.pad1 || a.pad2 ||
 + (a.version != HVMOP_ALTP2M_INTERFACE_VERSION) ||
 + (a.cmd  HVMOP_altp2m_change_gfn) )
 +return -EINVAL;
 +
 +if ( a.cmd != HVMOP_altp2m_vcpu_enable_notify )
 +{
 +d = rcu_lock_domain_by_any_id(a.domain);
 +if ( d == NULL )
 +return -ESRCH;
 +}

else
d = rcu_lock_current_domain();

eliminating ...

 +if ( !is_hvm_domain(d ? d : current-domain) )

... a couple of conditional expressions like this one and the
conditional around the unlock at the end. With this adjusted
Acked-by: Jan Beulich jbeul...@suse.com

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH for-4.6 1/3] xen/x86/libxl: replace non-POSIX error codes used by PSR code

2015-07-22 Thread Roger Pau Monné
El 21/07/15 a les 18.11, Jan Beulich ha escrit:
 On 21.07.15 at 17:56, roger@citrix.com wrote:
 PSR was using EBADSLT and EUSERS which are not POSIX error codes, replace
 them with EINVAL and EOVERFLOW respectively.
 
 Considering that we use EINVAL for almost everything (well beyond
 parameter checking I'm afraid), I don't think using this value for
 something intended to yield a specific user mode error message is
 really a good choice. Looking at the two respective hypervisor side
 changes - how about e.g. using EDOM, EBADF, or ENXIO instead?

As suggested in a later email, replacing EBADSLT with ENOTSOCK seems
better here.

Roger.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 6/6] tools/libx{l, c}: Fix trivial Coverity defects in migration v2 code

2015-07-22 Thread Ian Campbell
On Mon, 2015-07-20 at 11:37 +0100, Andrew Cooper wrote:
 All of these are UNUSED_VALUE defects where a default vaule is

value.

 unconditionally overwritten.  They are not particularly interesting,
 bug wise, but keeping these defects at bay helps prevent real bugs
 going unnoticed in the volume.

The flip side is that if the code uses the init everything and goto
out on error idiom then the init might need to be put back if the code
changes, hopefully even a regular compiler would spot this though.

 No functional change.
 
 Signed-off-by: Andrew Cooper andrew.coop...@citrix.com

Acked-by: Ian Campbell ian.campb...@citrix.com


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [v10][PATCH 11/16] tools/libxl: detect and avoid conflicts with RDM

2015-07-22 Thread Ian Campbell
On Wed, 2015-07-22 at 17:18 +0800, Chen, Tiejun wrote:
 Thanks for your clarification to me.
 
  The solution to this is to be systematic in how you handle the 
  email
  based review of a series, not to add a further to the reviewer by 
  using
  IRC as well.
  
  For example, here is how I handle review of a series I am working 
  on:
  
  I keep all the replies to a series I'm working on marked unread. 
  When I
  come to rework the series I take the first reply to the first patch 
  and
  start a draft reply to it quoting the entire review.
  
  I then go through the comments one by one and either:
  
 * make the _complete_ code change, including adding the 
  Changes
   in vN bit to the commit log and delete that comment from 
  the
   reply
 
 Are you saying this case of resending this particular patch online?

No, I am talking about making the change in my local git tree. Only
once I have addressed _all_ of the review of the whole series would I
resent a complete new version of the series for further review.

 
  or
 * write a reply in my draft to that particular comment which 
  does
   one or more of:
  
 * Explain that I don't understand the suggestion,
   probably asking questions to try and clarify what 
  was
   being asked for.
 
 Yes, in this case we're arguing, I was really trying to send a sample 
 of 
 this code fragment to ask this before I sent out the complete change.

I was not talking about any specific case here.

  At the end of this process _every_ bit of review feedback is addressed
  either by making the requested change or by responding explaining the
  reason why the change hasn't been made. This is absolutely crucial. You
  should never silently ignore a piece of review, even if you don't
 
 I should double check each line but sometimes this doesn't mean I can 
 
 understand everything correctly to do right thing as you expect. But 
 this is really not what I intend to do.

I have outlined my strategy for dealing with review which helps
avoid/minimise the error rate (i.e. failing to address comments) when
dealing with reviews. I hope it will be helpful for you in forming your
own strategy.

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH for 4.6 0/6] Prune legacy migration and move migration v2 out of daft status

2015-07-22 Thread Andrew Cooper
On 22/07/15 12:22, Wei Liu wrote:
 On Wed, Jul 22, 2015 at 11:33:06AM +0100, Ian Campbell wrote:
 On Mon, 2015-07-20 at 11:37 +0100, Andrew Cooper wrote:
 This series is some cleanup following the integration of migration v2
 into the codebase.  It removes the legacy migration implementation
 (compatability is provided with the python conversion script), and
 adjusts the migration v2 docs/implementation to no longer be
 experimental.
 IMHO we should take this for 4.6, there is no point in shipping
 obsolete/unused code and leaving it there with the xc_domain_save name
 and the supported thing with a 2 suffix will only tempt people to
 continue to use or build on it.

 The only real risk here is breaking the build, since if it builds it
 will surely work.

 Wei, final call is yours.

 Yes. I agree with you. But please do check this doesn't conflict with
 COLOPre before applying.

 Wei.

There are no conflicts I am aware of, having rebased this series myself.

I will however put together a v2 series, covering the two comments from
review here.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH for-4.6 2/3] xen: replace non-POSIX error codes

2015-07-22 Thread George Dunlap
On Wed, Jul 22, 2015 at 10:43 AM, George Dunlap
george.dun...@eu.citrix.com wrote:
 On Tue, Jul 21, 2015 at 5:15 PM, Jan Beulich jbeul...@suse.com wrote:
 On 21.07.15 at 17:56, roger@citrix.com wrote:
 Some DOMCTLs returned non-POSIX error codes, replace them with POSIX
 compilant values instead. EBADRQC and EBADSLT are replaced by EINVAL, while
 EUSERS is replaced with EOVERFLOW.

 Same here basically - I'd appreciate if we could use EINVAL only as
 a last resort error value, to make errors distinguishable.

 What would be more helpful is if you suggested an alternate error code
 for the two EINVAL changes in this patch.

Sorry, I see you did suggest alternates in the other mail.  I retract
my complaint...

 In the case of the first one, it looks like this is an internal
 hypercall used only to do continuation of paging domctls.  Guests
 should never call this directly; -EINVAL seems perfectly suitable
 here.

But still think EINVAL is probably fine for this case.

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH OSSTEST v2 03/13] osstest migrate support check catch - variables

2015-07-22 Thread Ian Campbell
On Tue, 2015-07-21 at 18:36 +0100, Ian Jackson wrote:
 Wei Liu writes (Re: [PATCH OSSTEST v2 03/13] osstest migrate support 
 check catch - variables):
  Do I need to change anything in this patch? I guess not? It's not 
  very
  clear to me.
 
 Ian C was asking whether the patch (which I wrote) was right, in a
 particular respect.  I answered that it is correct.  So no, you don't
 need to do anything to this patch.

Right, that question was really address to Ian to confirm my
understanding of how Tcl works...

He's explained and I understood correctly, s:
Acked-by: Ian Campbell ian.campb...@citrix.com

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [qemu-upstream-unstable test] 59800: regressions - trouble: broken/fail/pass

2015-07-22 Thread osstest service owner
flight 59800 qemu-upstream-unstable real [real]
http://logs.test-lab.xenproject.org/osstest/logs/59800/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-i386-xl-qemuu-debianhvm-amd64-xsm 11 guest-saverestore fail REGR. 
vs. 58880
 test-armhf-armhf-xl-credit2  11 guest-start   fail REGR. vs. 58880

Tests which are failing intermittently (not blocking):
 test-armhf-armhf-xl-credit2   3 host-install(3)  broken in 59777 pass in 59800
 test-armhf-armhf-xl   3 host-install(3)   broken pass in 59777
 test-armhf-armhf-libvirt 17 leak-check/check   fail in 59777 pass in 59800
 test-armhf-armhf-xl-multivcpu 11 guest-start   fail in 59777 pass in 59800
 test-amd64-i386-xl-qemuu-ovmf-amd64 11 guest-saverestorefail pass in 59777
 test-amd64-i386-pair 15 debian-install/dst_host fail pass in 59777

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-xl  12 migrate-support-check fail in 59777 never pass
 test-amd64-i386-libvirt-xsm  12 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-pvh-intel 11 guest-start  fail  never pass
 test-amd64-i386-libvirt  12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-pvh-amd  11 guest-start  fail   never pass
 test-armhf-armhf-xl-rtds 11 guest-start  fail   never pass
 test-amd64-amd64-xl-qemuu-win7-amd64 16 guest-stop fail never pass
 test-armhf-armhf-xl-cubietruck 12 migrate-support-checkfail never pass
 test-amd64-i386-xl-qemuu-win7-amd64 16 guest-stop  fail never pass
 test-armhf-armhf-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-xsm  12 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 12 migrate-support-checkfail  never pass

version targeted for testing:
 qemuuc3eb5b77be3c731c2ecd6eddab403bb8dabc135a
baseline version:
 qemuuc4a962ec0c61aa9b860a3635c8424472e6c2cc2c

Last test of basis58880  2015-06-24 13:45:58 Z   27 days
Testing same since59777  2015-07-20 12:49:32 Z1 days2 attempts


People who touched revisions under test:
  Gerd Hoffmann kra...@redhat.com
  Marc-André Lureau marcandre.lur...@gmail.com
  Stefano Stabellini stefano.stabell...@eu.citrix.com

jobs:
 build-amd64-xsm  pass
 build-armhf-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-armhf-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl  pass
 test-armhf-armhf-xl  broken
 test-amd64-i386-xl   pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64-xsmpass
 test-amd64-i386-xl-qemuu-debianhvm-amd64-xsm fail
 test-amd64-amd64-libvirt-xsm pass
 test-armhf-armhf-libvirt-xsm pass
 test-amd64-i386-libvirt-xsm  pass
 test-amd64-amd64-xl-xsm  pass
 test-armhf-armhf-xl-xsm  pass
 test-amd64-i386-xl-xsm   pass
 test-amd64-amd64-xl-pvh-amd  fail
 test-amd64-i386-qemuu-rhel6hvm-amd   pass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64pass
 test-amd64-i386-xl-qemuu-debianhvm-amd64 pass
 test-amd64-i386-freebsd10-amd64  pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 pass
 test-amd64-i386-xl-qemuu-ovmf-amd64  fail
 test-amd64-amd64-xl-qemuu-win7-amd64 fail
 test-amd64-i386-xl-qemuu-win7-amd64  fail
 test-armhf-armhf-xl-arndale  

Re: [Xen-devel] [PATCH for 4.6 0/6] Prune legacy migration and move migration v2 out of daft status

2015-07-22 Thread Ian Campbell
On Mon, 2015-07-20 at 11:37 +0100, Andrew Cooper wrote:
 This series is some cleanup following the integration of migration v2
 into the codebase.  It removes the legacy migration implementation
 (compatability is provided with the python conversion script), and
 adjusts the migration v2 docs/implementation to no longer be
 experimental.

IMHO we should take this for 4.6, there is no point in shipping
obsolete/unused code and leaving it there with the xc_domain_save name
and the supported thing with a 2 suffix will only tempt people to
continue to use or build on it.

The only real risk here is breaking the build, since if it builds it
will surely work.

Wei, final call is yours.

Ian.

 
 There are no major changes in this series, but are important changes
 for the status of migration v2 in Xen 4.6
 
 Andrew Cooper (6):
   tools/libxc: Remove legacy migration implementation
   tools/libx{l,c}: Remove the toolstack_{save,restore} callbacks
   tools/libx{l,c}: Remove XC_DEVICE_MODEL_RESTORE_FILE
   tools/libx{l,c}: Drop '2' suffixes from xc_domain_{save,restore}2() 
 functions
   docs: Migration v2 is now no longer draft
   tools/libx{l,c}: Fix trivial Coverity defects in migration v2 code
 
  docs/specs/libxc-migration-stream.pandoc |   19 +-
  docs/specs/libxl-migration-stream.pandoc |   19 +-
  tools/libxc/Makefile |1 -
  tools/libxc/include/xenguest.h   |   34 -
  tools/libxc/xc_domain_restore.c  | 2411 
 --
  tools/libxc/xc_domain_save.c | 2198 
 ---
  tools/libxc/xc_nomigrate.c   |   20 -
  tools/libxc/xc_offline_page.c|   59 +
  tools/libxc/xc_sr_restore.c  |   15 +-
  tools/libxc/xc_sr_save.c |   17 +-
  tools/libxc/xg_save_restore.h|  247 ---
  tools/libxl/libxl.c  |2 +-
  tools/libxl/libxl_create.c   |2 +-
  tools/libxl/libxl_internal.h |2 +-
  tools/libxl/libxl_save_callout.c |2 -
  tools/libxl/libxl_save_helper.c  |4 +-
  tools/libxl/libxl_stream_read.c  |4 +-
  17 files changed, 107 insertions(+), 4949 deletions(-)
  delete mode 100644 tools/libxc/xc_domain_restore.c
  delete mode 100644 tools/libxc/xc_domain_save.c
 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 2/6] tools/libx{l, c}: Remove the toolstack_{save, restore} callbacks

2015-07-22 Thread Ian Campbell
On Wed, 2015-07-22 at 12:07 +0100, Andrew Cooper wrote:
 On 22/07/15 11:27, Ian Campbell wrote:
  On Mon, 2015-07-20 at 11:37 +0100, Andrew Cooper wrote:
   +TOOLSTACK (deprecated)
   +--
   +
   + *This record was only present for transitionary purposes 
   during
  transitionary isn't really a word... transitional is (even if it
  sounds a bit odd to me)?
 
 It very much is.
 
 http://www.oxforddictionaries.com/definition/english/transition
 
 It is an appropriate derivative in this case.

Fair enough, my spell checker mislead me on this one it seems.

 +  development.  It is should not be used.*
  Does it needs to be accepted on restore for N-N+1 purposes (or N+M 
  for
  M1 since you care about that)? Or does the conversion routine turn 
  it
  into something else?
 
 The libxc toolstack record was used in the #LIBXL_HVM_COMPAT phase, 
 but
 nothing (not even the conversion script) will generate one any more.

Perfect.



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH for 4.6 0/6] Prune legacy migration and move migration v2 out of daft status

2015-07-22 Thread Wei Liu
On Wed, Jul 22, 2015 at 12:26:19PM +0100, Andrew Cooper wrote:
 On 22/07/15 12:22, Wei Liu wrote:
  On Wed, Jul 22, 2015 at 11:33:06AM +0100, Ian Campbell wrote:
  On Mon, 2015-07-20 at 11:37 +0100, Andrew Cooper wrote:
  This series is some cleanup following the integration of migration v2
  into the codebase.  It removes the legacy migration implementation
  (compatability is provided with the python conversion script), and
  adjusts the migration v2 docs/implementation to no longer be
  experimental.
  IMHO we should take this for 4.6, there is no point in shipping
  obsolete/unused code and leaving it there with the xc_domain_save name
  and the supported thing with a 2 suffix will only tempt people to
  continue to use or build on it.
 
  The only real risk here is breaking the build, since if it builds it
  will surely work.
 
  Wei, final call is yours.
 
  Yes. I agree with you. But please do check this doesn't conflict with
  COLOPre before applying.
 
  Wei.
 
 There are no conflicts I am aware of, having rebased this series myself.
 

Do you mean you rebase on top of full series of  COLOPre (including
those patches that have not been applied)? That's what I'm looking at.
Hongyang could still post remaining part of COLOPre within this week.

Wei.

 I will however put together a v2 series, covering the two comments from
 review here.
 
 ~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH for-4.6 2/3] xen: replace non-POSIX error codes

2015-07-22 Thread George Dunlap
On Tue, Jul 21, 2015 at 5:15 PM, Jan Beulich jbeul...@suse.com wrote:
 On 21.07.15 at 17:56, roger@citrix.com wrote:
 Some DOMCTLs returned non-POSIX error codes, replace them with POSIX
 compilant values instead. EBADRQC and EBADSLT are replaced by EINVAL, while
 EUSERS is replaced with EOVERFLOW.

 Same here basically - I'd appreciate if we could use EINVAL only as
 a last resort error value, to make errors distinguishable.

What would be more helpful is if you suggested an alternate error code
for the two EINVAL changes in this patch.

In the case of the first one, it looks like this is an internal
hypercall used only to do continuation of paging domctls.  Guests
should never call this directly; -EINVAL seems perfectly suitable
here.

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [v11][PATCH 11/16] tools/libxl: detect and avoid conflicts with RDM

2015-07-22 Thread Ian Jackson
Tiejun Chen writes ([v11][PATCH 11/16] tools/libxl: detect and avoid conflicts 
with RDM):
 While building a VM, HVM domain builder provides struct hvm_info_table{}
 to help hvmloader. Currently it includes two fields to construct guest
 e820 table by hvmloader, low_mem_pgend and high_mem_pgend. So we should
 check them to fix any conflict with RDM.

Acked-by: Ian Jackson ian.jack...@eu.citrix.com

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 03/23] x86: zero BSS using stosl instead of stosb

2015-07-22 Thread Jan Beulich
 On 22.07.15 at 10:42, andrew.coop...@citrix.com wrote:
 In the case of having aligned source and destination on a 16-byte
 boundary (which we can trivially arrange), then ERMSB (to give it its
 Intel name) and rep stosl differ only in the setup cost; they still
 scale at the same rate for changes in length.
 
 Therefore, assuming we arrange for 16-byte alignment, using rep stosl
 would appear to be a single 60ish cycle hit over using ERMSB, but would
 be substantially more efficient than using rep stosb on a non-ERMSB system.
 
 Overall, I think 16 byte alignment and rep stosl is the best compromise.

Or leaving such code alone, with the assumption that over time the
setup cost (on a growing number of systems) outweighs the benefits
(on a shrinking set).

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [v11][PATCH 11/16] tools/libxl: detect and avoid conflicts with RDM

2015-07-22 Thread Jan Beulich
 On 22.07.15 at 10:52, tiejun.c...@intel.com wrote:
 On 2015/7/22 16:28, Jan Beulich wrote:
 On 22.07.15 at 03:30, tiejun.c...@intel.com wrote:
 CC: Ian Jackson ian.jack...@eu.citrix.com
 CC: Stefano Stabellini stefano.stabell...@eu.citrix.com
 CC: Ian Campbell ian.campb...@citrix.com
 CC: Wei Liu wei.l...@citrix.com
 Acked-by: Wei Liu wei.l...@citrix.com
 Signed-off-by: Tiejun Chen tiejun.c...@intel.com
 Reviewed-by: Kevin Tian kevin.t...@intel.com
 ---
 v11:

 * Use GCNEW_ARRAY to replace libxl__malloc()

 * #define pfn_to_paddrk is missing safety () around x, and
move this into libxl_internal.h

 * Rename set_rdm_entries() to add_rdm_entry() and put the
increment at the end so that the assignments are
to -rdms[d_config-num_rdms].

 * Simply make it so that if there are any rdms specified
in the domain config, they are used instead of the
automatically gathered information (from strategy and
devices). So just return if d_config-rmds is valid.

 * Shorten some code comments.

 I think it is not the first time that we're pointing out to you that
 when you make not just cosmetic changes, review and ack tags
 should be dropped.
 
 I don't recall this sort of requirement was mentioned. Instead, this is 
 new to me. So where can I found this warning you said previously?

Even if I misremember and didn't send such a request your way, if
you read xen-devel you'd have seen this quite a number of times.

 Furthermore, you ask me to drop Reviewed-by/Acked-by in this revision, 
 what's next? Just to this example,
 
 No.1 revision:
 
 Acked-by: Wei Liu wei.l...@citrix.com
 Reviewed-by: Kevin Tian kevin.t...@intel.com
 
 No.2 revision:
 
 I addressed some comments raised by Jackson. But you mean 
 Reviewed-by/Acked-by should be dropped.

Not unilaterally - it depends on the nature of the changes and
the area of code the tag was offered for (I'd tend to say that
reviews normally cover the whole patch, but acks may have
limited meaning and hence may be retained in a wider set of
cases).

 No.3 revision:
 
 I assume Jackson Ack or Review to this so I should leave one line like this,
 
 Reviewed-by: Ian Jackson ian.jack...@eu.citrix.com
 
 without two previous Acked-by/Reviewed-by, right? So looks like the 
 latter always override the former, right?
 
 And I also can't understand why we should drop Reviewed-by/Acked-by from 
 other guys. And, all new comments I addressed don't conflict with our 
 previous revision so why?

In the case at hand you even had to adjust the algorithm (whether
and how to honor incoming data). I.e. the question isn't whether
there's a conflict, but whether there's any adjustment that may,
from an abstract pov, result in the person having offered such a
tag to now possibly have a different opinion. You have to face it -
(s)he may be of the opinion that the change was wrong (reviewers
make mistakes just like contributors do) or the suggestion wrongly
carried out.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH OSSTEST v3 00/13] Libvirt migration and HVM tests

2015-07-22 Thread Wei Liu
--- /dev/fd/63  2015-07-22 11:10:58.179793965 +0100
+++ /dev/fd/62  2015-07-22 11:10:58.183793806 +0100
+xen-unstable   test-amd64-amd64-libvirt-pair   
  all_hostflags   
arch-amd64,arch-xen-amd64,suite-wheezy,purpose-test,equiv-1
+xen-unstable   test-amd64-amd64-libvirt-pair   
  archamd64
+xen-unstable   test-amd64-amd64-libvirt-pair   
  buildjobbuild-amd64
+xen-unstable   test-amd64-amd64-libvirt-pair   
  debian_arch amd64
+xen-unstable   test-amd64-amd64-libvirt-pair   
  debian_kernkind pvops
+xen-unstable   test-amd64-amd64-libvirt-pair   
  kernbuildjobbuild-amd64-pvops
+xen-unstable   test-amd64-amd64-libvirt-pair   
  kernkindpvops
+xen-unstable   test-amd64-amd64-libvirt-pair   
  libvirtbuildjob build-amd64-libvirt
+xen-unstable   test-amd64-amd64-libvirt-pair   
  toolstack   libvirt
+xen-unstable   test-amd64-amd64-libvirt-pair   
  xenbuildjob build-amd64
+xen-unstable   test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm  
  all_hostflags   
arch-amd64,arch-xen-amd64,suite-wheezy,purpose-test,hvm
+xen-unstable   test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm  
  archamd64
+xen-unstable   test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm  
  biosseabios
+xen-unstable   test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm  
  buildjobbuild-amd64-xsm
+xen-unstable   test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm  
  debianhvm_image debian-7.2.0-amd64-CD-1.iso
+xen-unstable   test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm  
  device_model_versionqemu-xen
+xen-unstable   test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm  
  enable_xsm  true
+xen-unstable   test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm  
  kernbuildjobbuild-amd64-pvops
+xen-unstable   test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm  
  kernkindpvops
+xen-unstable   test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm  
  libvirtbuildjob build-amd64-libvirt
+xen-unstable   test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm  
  toolstack   libvirt
+xen-unstable   test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm  
  xenbuildjob build-amd64-xsm
+xen-unstable   test-amd64-i386-libvirt-pair
  all_hostflags   
arch-i386,arch-xen-amd64,suite-wheezy,purpose-test,equiv-1
+xen-unstable   test-amd64-i386-libvirt-pair
  archi386
+xen-unstable   test-amd64-i386-libvirt-pair
  buildjobbuild-i386
+xen-unstable   test-amd64-i386-libvirt-pair
  debian_arch i386
+xen-unstable   test-amd64-i386-libvirt-pair
  debian_kernkind pvops
+xen-unstable   test-amd64-i386-libvirt-pair
  kernbuildjobbuild-i386-pvops
+xen-unstable   test-amd64-i386-libvirt-pair
  kernkindpvops
+xen-unstable   test-amd64-i386-libvirt-pair
  libvirtbuildjob build-i386-libvirt
+xen-unstable   test-amd64-i386-libvirt-pair
  toolstack   libvirt
+xen-unstable   test-amd64-i386-libvirt-pair
  xenbuildjob build-amd64
+xen-unstable   test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm   
  all_hostflags   
arch-i386,arch-xen-amd64,suite-wheezy,purpose-test,hvm
+xen-unstable   test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm   
  archi386
+xen-unstable   test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm   
  biosseabios
+xen-unstable   test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm   
  buildjobbuild-i386-xsm
+xen-unstable   test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm   
  debianhvm_image debian-7.2.0-amd64-CD-1.iso
+xen-unstable   test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm   
  device_model_versionqemu-xen
+xen-unstable   

[Xen-devel] [PATCH OSSTEST v3 04/13] toolstack/libvirt: guest migrate, save and restore support

2015-07-22 Thread Wei Liu
Signed-off-by: Wei Liu wei.l...@citrix.com
Acked-by: Ian Campbell ian.campb...@citrix.com
---
v3: move this patch earlier in series
---
 Osstest/Toolstack/libvirt.pm | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/Osstest/Toolstack/libvirt.pm b/Osstest/Toolstack/libvirt.pm
index 26a2fa3..5c6ca90 100644
--- a/Osstest/Toolstack/libvirt.pm
+++ b/Osstest/Toolstack/libvirt.pm
@@ -93,17 +93,22 @@ sub saverestore_check ($) {
 
 sub migrate ($) {
 my ($self,$gho,$dst,$timeout) = @_;
-die Migration is not yet supported on libvirt.;
+my $ho = $self-{Host};
+my $gn = $gho-{Name};
+target_cmd_root($ho, virsh migrate $gn $dst, $timeout);
 }
 
 sub save () {
 my ($self,$gho,$f,$timeout) = @_;
-die Save is not yet supported on libvirt.;
+my $ho = $self-{Host};
+my $gn = $gho-{Name};
+target_cmd_root($ho, virsh save $gn $f, $timeout);
 }
 
 sub restore () {
 my ($self,$gho,$f,$timeout) = @_;
-die Restore is not yet supported on libvirt.;
+my $ho = $self-{Host};
+target_cmd_root($ho, virsh restore $f, $timeout);
 }
 
 1;
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH OSSTEST v3 05/13] toolstack: distinguish local and remote migration support

2015-07-22 Thread Wei Liu
Libvirt supports migrating a guest to remote host but not local host.
Distinguish the concept of local migration support and remote migration
support.

Toolstack's migrate_check now takes an extra argument to indicate which
mode we're interested in.

In sg-run-job we still check for local migration support because that's
the implicit target in test-guest-migr. Libvirt will still be blocked.

Signed-off-by: Wei Liu wei.l...@citrix.com
---
v3: use tuple to get all three arguments at once
v2: use bool instead of local remote string
---
 Osstest/Toolstack/libvirt.pm | 18 +++---
 Osstest/Toolstack/xend.pm|  2 +-
 Osstest/Toolstack/xl.pm  |  4 ++--
 sg-run-job   |  2 +-
 ts-migrate-support-check |  7 +--
 5 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/Osstest/Toolstack/libvirt.pm b/Osstest/Toolstack/libvirt.pm
index 5c6ca90..32dca84 100644
--- a/Osstest/Toolstack/libvirt.pm
+++ b/Osstest/Toolstack/libvirt.pm
@@ -72,9 +72,21 @@ sub shutdown_wait ($$$) {
 guest_await_destroy($gho,$timeout);
 }
 
-sub migrate_check ($) {
-my ($self) = @_;
-die Migration check is not yet supported on libvirt.;
+sub migrate_check ($$) {
+my ($self, $local) = @_;
+my $rc;
+
+if ($local) {
+# local migration is not supported
+$rc = 1;
+} else {
+   my $ho = $self-{Host};
+   my $caps = target_cmd_output_root($ho, virsh capabilities);
+   $rc = ($caps =~ m/migration_features/) ? 0 : 1
+}
+
+logm(rc=$rc);
+return $rc;
 }
 
 sub check_for_command($$) {
diff --git a/Osstest/Toolstack/xend.pm b/Osstest/Toolstack/xend.pm
index fd54ae1..b435938 100644
--- a/Osstest/Toolstack/xend.pm
+++ b/Osstest/Toolstack/xend.pm
@@ -36,7 +36,7 @@ sub new {
 }
 
 # xend always supported migration
-sub migrate_check ($) { return 0; }
+sub migrate_check ($$) { return 0; }
 
 # xend always supported save / restore
 sub saverestore_check ($) { return 0; }
diff --git a/Osstest/Toolstack/xl.pm b/Osstest/Toolstack/xl.pm
index 1d014ca..67564b3 100644
--- a/Osstest/Toolstack/xl.pm
+++ b/Osstest/Toolstack/xl.pm
@@ -70,8 +70,8 @@ sub check_for_command($$) {
 return $rc;
 }
 
-sub migrate_check ($) {
-my ($self) = @_;
+sub migrate_check ($$) {
+my ($self,$local) = @_;
 return check_for_command($self, migrate);
 }
 
diff --git a/sg-run-job b/sg-run-job
index 61f88fb..16fcfc1 100755
--- a/sg-run-job
+++ b/sg-run-job
@@ -300,7 +300,7 @@ proc run-job/test-pair {} {
 }
 
 proc test-guest-migr {g} {
-set to_reap [spawn-ts . = ts-migrate-support-check + host $g]
+set to_reap [spawn-ts . = ts-migrate-support-check + host $g 1]
 set can_migrate [reap-ts $to_reap]
 if {!$can_migrate} return
 
diff --git a/ts-migrate-support-check b/ts-migrate-support-check
index cd41f68..a1440a2 100755
--- a/ts-migrate-support-check
+++ b/ts-migrate-support-check
@@ -22,6 +22,9 @@ use Osstest::TestSupport;
 
 tsreadconfig();
 
-our $ho = selecthost($ARGV[0]);
 
-exit(toolstack($ho)-migrate_check());
+# Mode should be either 1 (local) or 0 (remote)
+our ($whhost, $gn, $mode) = @ARGV;
+our $ho = selecthost($whhost);
+
+exit(toolstack($ho)-migrate_check($mode));
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH OSSTEST v3 09/13] ts-debian-hvm-install: stub out libvirt + ovmf / rombios

2015-07-22 Thread Wei Liu
Libvirt's configuration converter doesn't know how to deal with BIOS
selection. The end result is it always use the default one (seabios).
Stub out ovmf and rombios to avoid false positive results.

This restriction will be removed once libvirt's converter knows how to
deal with BIOS selection.

Note that we don't expect to see such configurations any time soon.
These configurations will be filtered in make-flight. The changes here
are more of an extra level of safety check.

Signed-off-by: Wei Liu wei.l...@citrix.com
---
v3: add more information to commit message
---
 ts-debian-hvm-install | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/ts-debian-hvm-install b/ts-debian-hvm-install
index f05b1a7..bd16506 100755
--- a/ts-debian-hvm-install
+++ b/ts-debian-hvm-install
@@ -28,6 +28,13 @@ if (@ARGV  $ARGV[0] =~ m/^--stage(\d+)$/) { $stage=$1; 
shift @ARGV; }
 
 defined($r{bios}) or die Need to define which bios to use;
 
+# Libvirt doesn't know anything about bios. It will always use the
+# default one (seabios). Stub out rombios and ovmf to avoid false
+# positive results.
+if ($r{bios} =~ m/ovmf|rombios/  $r{toolstack} eq 'libvirt') {
+die libvirt + $r{bios} is not supported yet.;
+}
+
 our ($whhost,$gn) = @ARGV;
 $whhost ||= 'host';
 $gn ||= 'debianhvm';
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH OSSTEST v3 01/13] toolstack: save / restore check

2015-07-22 Thread Wei Liu
Introduce check_for_command function and use it to check save / restore
functionality.

Signed-off-by: Wei Liu wei.l...@citrix.com
---
v3: remove $TOOLSTACK prefix.
v2: introduce $TOOLSTACK_check_for_command function.
---
 Osstest/Toolstack/libvirt.pm | 14 ++
 Osstest/Toolstack/xend.pm|  3 +++
 Osstest/Toolstack/xl.pm  | 16 +---
 3 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/Osstest/Toolstack/libvirt.pm b/Osstest/Toolstack/libvirt.pm
index 51a10de..26a2fa3 100644
--- a/Osstest/Toolstack/libvirt.pm
+++ b/Osstest/Toolstack/libvirt.pm
@@ -77,6 +77,20 @@ sub migrate_check ($) {
 die Migration check is not yet supported on libvirt.;
 }
 
+sub check_for_command($$) {
+my ($self,$cmd) = @_;
+my $ho = $self-{Host};
+my $help = target_cmd_output_root($ho, virsh help);
+my $rc = ($help =~ m/^\s*$cmd/m) ? 0 : 1;
+logm(rc=$rc);
+return $rc;
+}
+
+sub saverestore_check ($) {
+my ($self) = @_;
+return check_for_command($self, save);
+}
+
 sub migrate ($) {
 my ($self,$gho,$dst,$timeout) = @_;
 die Migration is not yet supported on libvirt.;
diff --git a/Osstest/Toolstack/xend.pm b/Osstest/Toolstack/xend.pm
index 972b3b1..fd54ae1 100644
--- a/Osstest/Toolstack/xend.pm
+++ b/Osstest/Toolstack/xend.pm
@@ -38,4 +38,7 @@ sub new {
 # xend always supported migration
 sub migrate_check ($) { return 0; }
 
+# xend always supported save / restore
+sub saverestore_check ($) { return 0; }
+
 1;
diff --git a/Osstest/Toolstack/xl.pm b/Osstest/Toolstack/xl.pm
index 3c3d348..1d014ca 100644
--- a/Osstest/Toolstack/xl.pm
+++ b/Osstest/Toolstack/xl.pm
@@ -61,15 +61,25 @@ sub shutdown_wait ($$$) {
 target_cmd_root($ho,$self-{_Command} shutdown -w${acpi_fallback} $gn, 
$timeout);
 }
 
-sub migrate_check ($) {
-my ($self) = @_;
+sub check_for_command($$) {
+my ($self,$cmd) = @_;
 my $ho = $self-{Host};
 my $help = target_cmd_output_root($ho, $self-{_Command}. help);
-my $rc = ($help =~ m/^\s*migrate/m) ? 0 : 1;
+my $rc = ($help =~ m/^\s*$cmd/m) ? 0 : 1;
 logm(rc=$rc);
 return $rc;
 }
 
+sub migrate_check ($) {
+my ($self) = @_;
+return check_for_command($self, migrate);
+}
+
+sub saverestore_check ($) {
+my ($self) = @_;
+return check_for_command($self, save);
+}
+
 sub migrate () {
 my ($self,$gho,$dho,$timeout) = @_;
 my $sho = $self-{Host};
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH OSSTEST v3 08/13] ts-libvirt-build: run libvirt test suite

2015-07-22 Thread Wei Liu
We're interested in xlconfigtest.

Signed-off-by: Wei Liu wei.l...@citrix.com
Acked-by: Ian Campbell ian.campb...@citrix.com
---
 ts-libvirt-build | 35 ++-
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/ts-libvirt-build b/ts-libvirt-build
index f764b53..713bea0 100755
--- a/ts-libvirt-build
+++ b/ts-libvirt-build
@@ -27,6 +27,7 @@ builddirsprops();
 
 our %submodmap = qw(gnulib gnulib);
 our $submodules;
+our $xenprefix;
 
 sub libvirtd_init ();
 
@@ -39,15 +40,6 @@ sub checkout () {
 }
 
 sub config() {
-my $xenprefix;
-foreach (qw(/usr/local /usr)) {
-   if (target_file_exists($ho, $xendist$_/lib/libxenctrl.so)) {
-   $xenprefix=$xendist$_;
-   last;
-   }
-}
-die no xen prefix unless $xenprefix;
-
 # Uses --no-git because otherwise autogen.sh will undo
 # submodulefixup's attempts to honour
 # revision_libvirt_gnulib. This in turn requires that we specify
@@ -75,6 +67,19 @@ sub build() {
 END
 }
 
+sub runtest() {
+ target_cmd_build($ho, 3600, $builddir, END);
+cd libvirt
+rm -f ../libvirt-test-suite-ok-stamp
+(LD_LIBRARY_PATH=$xenprefix/lib/ \\
+ make check VIR_TEST_EXPENSIVE=1 21  \\
+ touch ../libvirt-test-suite-ok-stamp) \\
+ | tee ../libvirt-test-suite-log
+test -f ../libvirt-test-suite-ok-stamp #/
+echo ok.
+END
+}
+
 sub install() {
 target_cmd_build($ho, 300, $builddir, END);
 mkdir -p dist
@@ -90,9 +95,21 @@ END
 END
 }
 
+sub setxenprefix() {
+foreach (qw(/usr/local /usr)) {
+if (target_file_exists($ho, $xendist$_/lib/libxenctrl.so)) {
+$xenprefix=$xendist$_;
+last;
+}
+}
+die no xen prefix unless $xenprefix;
+}
+
 checkout();
+setxenprefix();
 config();
 build();
+runtest();
 install();
 built_stash($ho, $builddir, 'dist', 'libvirtdist');
 
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH OSSTEST v3 02/13] Introduce ts-saverestore-support-check

2015-07-22 Thread Wei Liu
We need this script because we're going to separate the concept of save
/ restore and migration later.

Signed-off-by: Wei Liu wei.l...@citrix.com
---
v3: takes $gn
---
 ts-saverestore-support-check | 28 
 1 file changed, 28 insertions(+)
 create mode 100755 ts-saverestore-support-check

diff --git a/ts-saverestore-support-check b/ts-saverestore-support-check
new file mode 100755
index 000..fcbcb1c
--- /dev/null
+++ b/ts-saverestore-support-check
@@ -0,0 +1,28 @@
+#!/usr/bin/perl -w
+# This is part of osstest, an automated testing framework for Xen.
+# Copyright (C) 2015 Citrix Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see http://www.gnu.org/licenses/.
+
+use strict qw(vars);
+use DBI;
+use Osstest;
+use Osstest::TestSupport;
+
+tsreadconfig();
+
+our ($whhost, $gn) = @ARGV;
+our $ho = selecthost($whhost);
+
+exit(toolstack($ho)-saverestore_check());
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH OSSTEST v3 06/13] sg-run-job: remove save/restore dependency on local migration support

2015-07-22 Thread Wei Liu
Since we've introduced different checks for save / restore and local
migration, it's possible to run save / restore tests without running
local migration tests.

Signed-off-by: Wei Liu wei.l...@citrix.com
---
v3: ts-saverestore-support-check takes $g
---
 sg-run-job | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/sg-run-job b/sg-run-job
index 16fcfc1..6fad114 100755
--- a/sg-run-job
+++ b/sg-run-job
@@ -302,13 +302,20 @@ proc run-job/test-pair {} {
 proc test-guest-migr {g} {
 set to_reap [spawn-ts . = ts-migrate-support-check + host $g 1]
 set can_migrate [reap-ts $to_reap]
-if {!$can_migrate} return
+set to_reap [spawn-ts . = ts-saverestore-support-check + host $g]
+set can_saverestore [reap-ts $to_reap]
 
 foreach iteration {{} .2} {
-run-ts . =$iteration ts-guest-saverestore + host $g
-run-ts . =$iteration ts-guest-localmigrate + host $g
+if {$can_saverestore} {
+run-ts . =$iteration ts-guest-saverestore + host $g
+}
+if {$can_migrate} {
+run-ts . =$iteration ts-guest-localmigrate + host $g
+}
+}
+if {$can_migrate} {
+run-ts . = ts-guest-localmigrate x10 + host $g
 }
-run-ts . = ts-guest-localmigrate x10 + host $g
 }
 
 proc test-guest {g} {
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH OSSTEST v3 03/13] osstest migrate support check catch - variables

2015-07-22 Thread Wei Liu
From: Ian Jackson ian.jack...@eu.citrix.com

The goal here is to skip the following test steps if the check fails.

Instead of using catch to turn an exception into value, we can just
use spawn-ts and reap-ts to do that. This pattern is useful when we add
in extra check for save / restore check later.

Signed-off-by: Ian Jackson ian.jack...@eu.citrix.com
[ wei: write commit message ]
Signed-off-by: Wei Liu wei.l...@citrix.com
Acked-by: Ian Campbell ian.campb...@citrix.com
---
 sg-run-job | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sg-run-job b/sg-run-job
index d53fd83..61f88fb 100755
--- a/sg-run-job
+++ b/sg-run-job
@@ -300,7 +300,9 @@ proc run-job/test-pair {} {
 }
 
 proc test-guest-migr {g} {
-if {[catch { run-ts . = ts-migrate-support-check + host $g }]} return
+set to_reap [spawn-ts . = ts-migrate-support-check + host $g]
+set can_migrate [reap-ts $to_reap]
+if {!$can_migrate} return
 
 foreach iteration {{} .2} {
 run-ts . =$iteration ts-guest-saverestore + host $g
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 03/23] x86: zero BSS using stosl instead of stosb

2015-07-22 Thread Andrew Cooper
On 22/07/15 11:04, Jan Beulich wrote:
 On 22.07.15 at 10:42, andrew.coop...@citrix.com wrote:
 In the case of having aligned source and destination on a 16-byte
 boundary (which we can trivially arrange), then ERMSB (to give it its
 Intel name) and rep stosl differ only in the setup cost; they still
 scale at the same rate for changes in length.

 Therefore, assuming we arrange for 16-byte alignment, using rep stosl
 would appear to be a single 60ish cycle hit over using ERMSB, but would
 be substantially more efficient than using rep stosb on a non-ERMSB system.

 Overall, I think 16 byte alignment and rep stosl is the best compromise.
 Or leaving such code alone, with the assumption that over time the
 setup cost (on a growing number of systems) outweighs the benefits
 (on a shrinking set).

The BSS is large - 295k on the last compile I have from staging.  The
setup cost is lost in the nose compared to the elapsed time to write
that many zeroes to memory.

Therefore, on an ERMBS-capable system, the two options will complete in
the same amount of time.

However, on all AMD hardware and Intel hardware older than IvyBridge,
rep stosl is 4 times faster than rep stosb.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH for 4.6 0/6] Prune legacy migration and move migration v2 out of daft status

2015-07-22 Thread Wei Liu
On Wed, Jul 22, 2015 at 11:33:06AM +0100, Ian Campbell wrote:
 On Mon, 2015-07-20 at 11:37 +0100, Andrew Cooper wrote:
  This series is some cleanup following the integration of migration v2
  into the codebase.  It removes the legacy migration implementation
  (compatability is provided with the python conversion script), and
  adjusts the migration v2 docs/implementation to no longer be
  experimental.
 
 IMHO we should take this for 4.6, there is no point in shipping
 obsolete/unused code and leaving it there with the xc_domain_save name
 and the supported thing with a 2 suffix will only tempt people to
 continue to use or build on it.
 
 The only real risk here is breaking the build, since if it builds it
 will surely work.
 
 Wei, final call is yours.
 

Yes. I agree with you. But please do check this doesn't conflict with
COLOPre before applying.

Wei.

 Ian.
 
  
  There are no major changes in this series, but are important changes
  for the status of migration v2 in Xen 4.6
  
  Andrew Cooper (6):
tools/libxc: Remove legacy migration implementation
tools/libx{l,c}: Remove the toolstack_{save,restore} callbacks
tools/libx{l,c}: Remove XC_DEVICE_MODEL_RESTORE_FILE
tools/libx{l,c}: Drop '2' suffixes from xc_domain_{save,restore}2() 
  functions
docs: Migration v2 is now no longer draft
tools/libx{l,c}: Fix trivial Coverity defects in migration v2 code
  
   docs/specs/libxc-migration-stream.pandoc |   19 +-
   docs/specs/libxl-migration-stream.pandoc |   19 +-
   tools/libxc/Makefile |1 -
   tools/libxc/include/xenguest.h   |   34 -
   tools/libxc/xc_domain_restore.c  | 2411 
  --
   tools/libxc/xc_domain_save.c | 2198 
  ---
   tools/libxc/xc_nomigrate.c   |   20 -
   tools/libxc/xc_offline_page.c|   59 +
   tools/libxc/xc_sr_restore.c  |   15 +-
   tools/libxc/xc_sr_save.c |   17 +-
   tools/libxc/xg_save_restore.h|  247 ---
   tools/libxl/libxl.c  |2 +-
   tools/libxl/libxl_create.c   |2 +-
   tools/libxl/libxl_internal.h |2 +-
   tools/libxl/libxl_save_callout.c |2 -
   tools/libxl/libxl_save_helper.c  |4 +-
   tools/libxl/libxl_stream_read.c  |4 +-
   17 files changed, 107 insertions(+), 4949 deletions(-)
   delete mode 100644 tools/libxc/xc_domain_restore.c
   delete mode 100644 tools/libxc/xc_domain_save.c
  

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [v10][PATCH 11/16] tools/libxl: detect and avoid conflicts with RDM

2015-07-22 Thread Chen, Tiejun

Thanks for your clarification to me.


The solution to this is to be systematic in how you handle the email
based review of a series, not to add a further to the reviewer by using
IRC as well.

For example, here is how I handle review of a series I am working on:

I keep all the replies to a series I'm working on marked unread. When I
come to rework the series I take the first reply to the first patch and
start a draft reply to it quoting the entire review.

I then go through the comments one by one and either:

   * make the _complete_ code change, including adding the Changes
 in vN bit to the commit log and delete that comment from the
 reply


Are you saying this case of resending this particular patch online?


or
   * write a reply in my draft to that particular comment which does
 one or more of:

   * Explain that I don't understand the suggestion,
 probably asking questions to try and clarify what was
 being asked for.


Yes, in this case we're arguing, I was really trying to send a sample of 
this code fragment to ask this before I sent out the complete change.



   * Explain, with reasons, why I disagree with the
 suggestion
   * Explain, with reasons, why I only implemented part of
 the suggestion.

Only then do I move on to the next comment in that mail and repeat. At
the end I've either deleted all the comments from my draft (because
I've fully implemented everything) so the draft can be discarded or I
have an email to send explaining what I've not done and why. Only now
do I mark the original review email as read.

Then I move on to the next reply to that thread in my mail box and
repeat until I have been through every mail in the thread and addressed
_all_ of the comments.

At the end of this process _every_ bit of review feedback is addressed
either by making the requested change or by responding explaining the
reason why the change hasn't been made. This is absolutely crucial. You
should never silently ignore a piece of review, even if you don't


I should double check each line but sometimes this doesn't mean I can 
understand everything correctly to do right thing as you expect. But 
this is really not what I intend to do.


Thanks
Tiejun


understand it or disagree with it, always reply and explain why you
haven't.

If the review was particularly long or complex I will then do a second
pass through the review comments and check that every comment is either
mentioned in a Changes in vN changelog comment or I have replied to
it.

I do all of that before posting the next version. IMHO until a
contributor has shown they are diligent about addressing review
comments they should _never_ send out a series which only has review
partially addressed.

The presence of an IRC channel in no way changes the requirement to be
systematic and thorough when dealing with email review.

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH OSSTEST v3 11/13] make-flight: debian hvm tests with libvirt

2015-07-22 Thread Wei Liu
Since upstream QEMU is the default, that's what libvirt is using. We
generate test case to test libvirt with upstream QEMU.

Signed-off-by: Wei Liu wei.l...@citrix.com
---
See #0 for runvars diff

v3: make toolstack second argument, only generate -xsm test
---
 make-flight | 37 -
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/make-flight b/make-flight
index 3b31939..49f205e 100755
--- a/make-flight
+++ b/make-flight
@@ -100,7 +100,9 @@ job_create_test_filter_callback () {
 *)
   case $job in
 *-qemuu-*)
-  if [ x$toolstack != xxl ]; then return 1; fi
+  if [ x$toolstack != xxl -a x$toolstack != xlibvirt ];then
+  return 1;
+  fi
   ;;
   esac
   ;;
@@ -216,19 +218,20 @@ do_hvm_win7_x64_tests () {
 
 do_hvm_debian_test_one () {
   testname=$1
-  bios=$2
-  xsm=$3
-  stubdom=$4
+  toolstack=$2
+  bios=$3
+  xsm=$4
+  stubdom=$5
 
   stubdom_suffix=
   stubdom_runvar=
-  if [ x$stubdom != x ]; then
+  if [ x$stubdom = xtrue ]; then
   stubdom_suffix=-stubdom
   stubdom_runvar=debianhvm_stubdom=$stubdom
   fi
 
-  job_create_test 
test-$xenarch$kern-$dom0arch-xl$qemuu_suffix$stubdom_suffix-$testname-amd64\
-test-debianhvm xl $xenarch $dom0arch $qemuu_runvar \
+  job_create_test 
test-$xenarch$kern-$dom0arch-$toolstack$qemuu_suffix$stubdom_suffix-$testname-amd64\
+test-debianhvm $toolstack $xenarch $dom0arch $qemuu_runvar \
 enable_xsm=$xsm \
 $stubdom_runvar \
 debianhvm_image=debian-7.2.0-amd64-CD-1.iso \
@@ -243,20 +246,28 @@ do_hvm_debian_tests() {
 return
   fi
 
-  # QEMU upstream supports ovmf and seabios
+  # QEMU upstream supports
+  #   1. ovmf + xl
+  #   2. seabios + xl
+  #   3. seabios + libvirt
+  # For libvirt we only generate -xsm test case.
   if [ x$qemuu_suffix == x-qemuu ]; then
-do_hvm_debian_test_one ovmf ovmf false
+do_hvm_debian_test_one ovmf xl ovmf false
 for xsm in $xsms ; do
-  do_hvm_debian_test_one debianhvm seabios $xsm
+  do_hvm_debian_test_one debianhvm xl seabios $xsm
+  if [ x$xsm = xtrue ]; then
+  do_hvm_debian_test_one debianhvm libvirt seabios $xsm
+  fi
 done
   fi
 
   # QEMU traditional supports rombios and stubdom
+  # Only test xl with QEMU traditional
   if [ x$qemuu_suffix == x-qemut ]; then
 for xsm in $xsms ; do
-  do_hvm_debian_test_one debianhvm rombios $xsm
-  if [ x$xsm = xtrue ]; then
-  do_hvm_debian_test_one debianhvm rombios $xsm true
+  do_hvm_debian_test_one debianhvm xl rombios $xsm
+  if [ x$xsm = xtrue -a x$toolstack = xxl ]; then
+  do_hvm_debian_test_one debianhvm xl rombios $xsm true
   fi
 done
   fi
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH OSSTEST v3 12/13] make-flight, mfi-common: rename onetoolstack to pairtoolstack

2015-07-22 Thread Wei Liu
The name onetoolstack in confusing. Currently it's in fact referring
to the toolstack used to test pair migration, so rename it to
pairtoolstack.

No functional changes introduced.

Signed-off-by: Wei Liu wei.l...@citrix.com
Acked-by: Ian Campbell ian.campb...@citrix.com
---
 make-flight | 2 +-
 mfi-common  | 8 
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/make-flight b/make-flight
index 49f205e..dde85c9 100755
--- a/make-flight
+++ b/make-flight
@@ -466,7 +466,7 @@ test_matrix_do_one () {
 
   # Test live migration
   job_create_test test-$xenarch$kern-$dom0arch-pair test-pair \
-$onetoolstack $xenarch $dom0arch \
+$pairtoolstack $xenarch $dom0arch \
 !host !host_hostflags \
 $debian_runvars \
 all_hostflags=$most_hostflags,equiv-1
diff --git a/mfi-common b/mfi-common
index a9e966f..e517019 100644
--- a/mfi-common
+++ b/mfi-common
@@ -313,10 +313,10 @@ job_create_test () {
 test_matrix_iterate () {
 
   case $xenbranch in
-  xen-3.*-testing)  onetoolstack=xend ;;
-  xen-4.0-testing)  onetoolstack=xend ;;
-  xen-4.1-testing)  onetoolstack=xend ;;
-  *)onetoolstack=xl ;;
+  xen-3.*-testing)  pairtoolstack=xend ;;
+  xen-4.0-testing)  pairtoolstack=xend ;;
+  xen-4.1-testing)  pairtoolstack=xend ;;
+  *)pairtoolstack=xl ;;
   esac
 
   for xenarch in ${TEST_ARCHES- i386 amd64 armhf } ; do
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH OSSTEST v3 13/13] make-flight, mfi-common: create live migration test for libvirt

2015-07-22 Thread Wei Liu
Note that we start testing libvirt migration for 4.4 and above.

Signed-off-by: Wei Liu wei.l...@citrix.com
Acked-by: Ian Campbell ian.campb...@citrix.com
---
See #0 for runvars diff
---
 make-flight | 13 +++--
 mfi-common  |  4 +++-
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/make-flight b/make-flight
index dde85c9..3de9151 100755
--- a/make-flight
+++ b/make-flight
@@ -465,11 +465,20 @@ test_matrix_do_one () {
   done # qemuu_suffix
 
   # Test live migration
-  job_create_test test-$xenarch$kern-$dom0arch-pair test-pair \
-$pairtoolstack $xenarch $dom0arch \
+  for toolstack in $pairtoolstack; do
+# Don't change test case name for old test cases with xl and xend
+if [ x$toolstack = xxl -o x$toolstack = xxend ]; then
+  toolstack_suffix=
+else
+  toolstack_suffix=-$toolstack
+fi
+job_create_test test-$xenarch$kern-$dom0arch$toolstack_suffix-pair \
+test-pair \
+$toolstack $xenarch $dom0arch \
 !host !host_hostflags \
 $debian_runvars \
 all_hostflags=$most_hostflags,equiv-1
+  done
 
   if [ x$test_pvh = xy -a $xenarch = amd64 -a $dom0arch = amd64 ]; then
 
diff --git a/mfi-common b/mfi-common
index e517019..0e2e084 100644
--- a/mfi-common
+++ b/mfi-common
@@ -316,7 +316,9 @@ test_matrix_iterate () {
   xen-3.*-testing)  pairtoolstack=xend ;;
   xen-4.0-testing)  pairtoolstack=xend ;;
   xen-4.1-testing)  pairtoolstack=xend ;;
-  *)pairtoolstack=xl ;;
+  xen-4.2-testing)  pairtoolstack=xl ;;
+  xen-4.3-testing)  pairtoolstack=xl ;;
+  *)pairtoolstack=xl libvirt ;;
   esac
 
   for xenarch in ${TEST_ARCHES- i386 amd64 armhf } ; do
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH OSSTEST v3 10/13] TestSupport: don't put kernel= in HVM config when using xl and libvirt

2015-07-22 Thread Wei Liu
Setting kernel to hvmloader is ignored in xl but not in libvirt. Libvirt
config converter will translate that then pass it to QEMU. QEMU
complains there is no kernel called hvmloader and exits.

Remove this option for xl and libvirt.  Xl is not affected and libvirt
will be able to create HVM guest. Xend might still need it.

Signed-off-by: Wei Liu wei.l...@citrix.com
---
v3: xend still needs that option
---
 Osstest/TestSupport.pm | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Osstest/TestSupport.pm b/Osstest/TestSupport.pm
index 66dc218..37dbe55 100644
--- a/Osstest/TestSupport.pm
+++ b/Osstest/TestSupport.pm
@@ -1624,8 +1624,11 @@ sub more_prepareguest_hvm (;@) {
 }
 my $disks = join ,\t\t\n, map { '$_' } @disks;
 
+my $kernel = $ho-{Toolstack}-{Name} =~ m/xend/ ?
+   kernel  = 'hvmloader' : '';
+
 my $cfg = END;
-kernel  = 'hvmloader'
+$kernel
 builder = 'hvm'
 #
 disk= [
-- 
1.9.1


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [v10][PATCH 11/16] tools/libxl: detect and avoid conflicts with RDM

2015-07-22 Thread Ian Jackson
Chen, Tiejun writes (Re: [Xen-devel] [v10][PATCH 11/16] tools/libxl: detect 
and avoid conflicts with RDM):
  I then go through the comments one by one and either:
 
 * make the _complete_ code change, including adding the Changes
   in vN bit to the commit log and delete that comment from the
   reply
 
 Are you saying this case of resending this particular patch online?

Here is an example of what Ian C is talking about: You failed to
address all my comments before posting v10.  That caused me to post
this:

  This is now the third time I have posted that text.  It is the fifth
  request or clarification I have had to make about this very small
  area.  I have to say that I'm finding this rather frustrating.

You should have been systematic.  Like Ian C suggests.  I do it that
way too.  Instead, you repeatedly left things undone.



 Yes, in this case we're arguing, I was really trying to send a sample of 
 this code fragment to ask this before I sent out the complete change.

No, your v10 series was not a sample.


It is of course OK to post a sample.  But a reviewer will not read
such a sample thoroughly; they will not look for all problems.  The
reviewer will look at the sample, so that the reviewer can understand
your words.  They will consider the aspects of the sample that are
being discussed.  They will not consider other aspects of the sample.

You did post such a sample in 55ae2bb1.9030...@intel.com.  I read
the sample, to understand what your words meant.

Then later, you complained that:

  But indeed, before I post this whole patch online I also picked up
  this chunk of code to ask you to take a look that.

Ie you complained that I did not thoroughly review your sample.


You cannot have this both ways.  When you are posting a sample, it
is purely to illuminate the particular discussion.  When you are
posting a full patch for review, you must have addressed _every_
previous comment.


Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 3/6] tools/libx{l, c}: Remove XC_DEVICE_MODEL_RESTORE_FILE

2015-07-22 Thread Ian Campbell
On Mon, 2015-07-20 at 11:37 +0100, Andrew Cooper wrote:
 All handling of device model files is now at the libxl level.  Remove
 XC_DEVICE_MODEL_RESTORE_FILE and introduce 
 LIBXL_DEVICE_MODEL_RESTORE_FILE in
 its place.
 
 Signed-off-by: Andrew Cooper andrew.coop...@citrix.com
 CC: Ian Campbell ian.campb...@citrix.com
 CC: Ian Jackson ian.jack...@eu.citrix.com
 CC: Wei Liu wei.l...@citrix.com

Acked-by: Ian Campbell ian.campb...@citrix.com


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 2/6] tools/libx{l, c}: Remove the toolstack_{save, restore} callbacks

2015-07-22 Thread Ian Campbell
On Mon, 2015-07-20 at 11:37 +0100, Andrew Cooper wrote:
 
 +TOOLSTACK (deprecated)
 +--
 +
 + *This record was only present for transitionary purposes during

transitionary isn't really a word... transitional is (even if it
sounds a bit odd to me)?

 +  development.  It is should not be used.*

Does it needs to be accepted on restore for N-N+1 purposes (or N+M for
M1 since you care about that)? Or does the conversion routine turn it
into something else?




___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v12] introduce XENMEM_reserved_device_memory_map

2015-07-22 Thread Jan Beulich
This is a prerequisite for punching holes into HVM and PVH guests' P2M
to allow passing through devices that are associated with (on VT-d)
RMRRs.

Signed-off-by: Jan Beulich jbeul...@suse.com
Signed-off-by: Tiejun Chen tiejun.c...@intel.com
Acked-by: Kevin Tian kevin.t...@intel.com
---
v12: Restore changes as much as possible to my original version, fixing
 a few issues that got introduced after handing it over. Unionize
 new public memop interface structure to allow for non-PCI to be
 supported later on. Check flags to have all currently undefined
 flags clear. Refine adjustments to xen/pci.h.

--- a/xen/common/compat/memory.c
+++ b/xen/common/compat/memory.c
@@ -17,6 +17,42 @@ CHECK_TYPE(domid);
 CHECK_mem_access_op;
 CHECK_vmemrange;
 
+#ifdef HAS_PASSTHROUGH
+struct get_reserved_device_memory {
+struct compat_reserved_device_memory_map map;
+unsigned int used_entries;
+};
+
+static int get_reserved_device_memory(xen_pfn_t start, xen_ulong_t nr,
+  u32 id, void *ctxt)
+{
+struct get_reserved_device_memory *grdm = ctxt;
+u32 sbdf = PCI_SBDF3(grdm-map.dev.pci.seg, grdm-map.dev.pci.bus,
+ grdm-map.dev.pci.devfn);
+
+if ( !(grdm-map.flags  XENMEM_RDM_ALL)  (sbdf != id) )
+return 0;
+
+if ( grdm-used_entries  grdm-map.nr_entries )
+{
+struct compat_reserved_device_memory rdm = {
+.start_pfn = start, .nr_pages = nr
+};
+
+if ( rdm.start_pfn != start || rdm.nr_pages != nr )
+return -ERANGE;
+
+if ( __copy_to_compat_offset(grdm-map.buffer, grdm-used_entries,
+ rdm, 1) )
+return -EFAULT;
+}
+
+++grdm-used_entries;
+
+return 1;
+}
+#endif
+
 int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) compat)
 {
 int split, op = cmd  MEMOP_CMD_MASK;
@@ -303,6 +339,35 @@ int compat_memory_op(unsigned int cmd, X
 break;
 }
 
+#ifdef HAS_PASSTHROUGH
+case XENMEM_reserved_device_memory_map:
+{
+struct get_reserved_device_memory grdm;
+
+if ( unlikely(start_extent) )
+return -ENOSYS;
+
+if ( copy_from_guest(grdm.map, compat, 1) ||
+ !compat_handle_okay(grdm.map.buffer, grdm.map.nr_entries) )
+return -EFAULT;
+
+if ( grdm.map.flags  ~XENMEM_RDM_ALL )
+return -EINVAL;
+
+grdm.used_entries = 0;
+rc = iommu_get_reserved_device_memory(get_reserved_device_memory,
+  grdm);
+
+if ( !rc  grdm.map.nr_entries  grdm.used_entries )
+rc = -ENOBUFS;
+grdm.map.nr_entries = grdm.used_entries;
+if ( __copy_to_guest(compat, grdm.map, 1) )
+rc = -EFAULT;
+
+return rc;
+}
+#endif
+
 default:
 return compat_arch_memory_op(cmd, compat);
 }
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -748,6 +748,39 @@ static int construct_memop_from_reservat
 return 0;
 }
 
+#ifdef HAS_PASSTHROUGH
+struct get_reserved_device_memory {
+struct xen_reserved_device_memory_map map;
+unsigned int used_entries;
+};
+
+static int get_reserved_device_memory(xen_pfn_t start, xen_ulong_t nr,
+  u32 id, void *ctxt)
+{
+struct get_reserved_device_memory *grdm = ctxt;
+u32 sbdf = PCI_SBDF3(grdm-map.dev.pci.seg, grdm-map.dev.pci.bus,
+ grdm-map.dev.pci.devfn);
+
+if ( !(grdm-map.flags  XENMEM_RDM_ALL)  (sbdf != id) )
+return 0;
+
+if ( grdm-used_entries  grdm-map.nr_entries )
+{
+struct xen_reserved_device_memory rdm = {
+.start_pfn = start, .nr_pages = nr
+};
+
+if ( __copy_to_guest_offset(grdm-map.buffer, grdm-used_entries,
+rdm, 1) )
+return -EFAULT;
+}
+
+++grdm-used_entries;
+
+return 1;
+}
+#endif
+
 long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 {
 struct domain *d;
@@ -1162,6 +1195,35 @@ long do_memory_op(unsigned long cmd, XEN
 break;
 }
 
+#ifdef HAS_PASSTHROUGH
+case XENMEM_reserved_device_memory_map:
+{
+struct get_reserved_device_memory grdm;
+
+if ( unlikely(start_extent) )
+return -ENOSYS;
+
+if ( copy_from_guest(grdm.map, arg, 1) ||
+ !guest_handle_okay(grdm.map.buffer, grdm.map.nr_entries) )
+return -EFAULT;
+
+if ( grdm.map.flags  ~XENMEM_RDM_ALL )
+return -EINVAL;
+
+grdm.used_entries = 0;
+rc = iommu_get_reserved_device_memory(get_reserved_device_memory,
+  grdm);
+
+if ( !rc  grdm.map.nr_entries  grdm.used_entries )
+rc = -ENOBUFS;
+grdm.map.nr_entries = 

[Xen-devel] [PATCH v2 for-4.6 2/3] xen: replace non-POSIX error codes

2015-07-22 Thread Roger Pau Monne
Some DOMCTLs returned non-POSIX error codes, replace them with POSIX
compilant values instead. EBADRQC and EBADSLT are replaced by EDOM, while
EUSERS is replaced with EOVERFLOW.

Signed-off-by: Roger Pau Monné roger@citrix.com
Cc: George Dunlap george.dun...@eu.citrix.com
Cc: Jan Beulich jbeul...@suse.com
Cc: Andrew Cooper andrew.coop...@citrix.com
---
Changes since v1:
 - Use EDOM instead of EINVAL.
---
Nothing in libxc or libxl seems to check for those specific error codes, so
I guess it's fine to replace them with whatever we want.
---
 xen/arch/x86/mm/paging.c | 2 +-
 xen/common/domain.c  | 4 ++--
 xen/common/hvm/save.c| 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index 7089155..9bd54a8 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -766,7 +766,7 @@ long 
paging_domctl_continuation(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
 
 if ( op.interface_version != XEN_DOMCTL_INTERFACE_VERSION ||
  op.cmd != XEN_DOMCTL_shadow_op )
-return -EBADRQC;
+return -EDOM;
 
 d = rcu_lock_domain_by_id(op.domain);
 if ( d == NULL )
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 8efef5c..791166b 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -900,7 +900,7 @@ int vcpu_pause_by_systemcontroller(struct vcpu *v)
 new = old + 1;
 
 if ( new  255 )
-return -EUSERS;
+return -EOVERFLOW;
 
 prev = cmpxchg(v-controller_pause_count, old, new);
 } while ( prev != old );
@@ -980,7 +980,7 @@ int __domain_pause_by_systemcontroller(struct domain *d,
  * toolstack overflowing d-pause_count with many repeated hypercalls.
  */
 if ( new  255 )
-return -EUSERS;
+return -EOVERFLOW;
 
 prev = cmpxchg(d-controller_pause_count, old, new);
 } while ( prev != old );
diff --git a/xen/common/hvm/save.c b/xen/common/hvm/save.c
index da6e668..56589fa 100644
--- a/xen/common/hvm/save.c
+++ b/xen/common/hvm/save.c
@@ -114,7 +114,7 @@ int hvm_save_one(struct domain *d, uint16_t typecode, 
uint16_t instance,
 uint32_t off;
 const struct hvm_save_descriptor *desc;
 
-rv = -EBADSLT;
+rv = -EDOM;
 for ( off = 0; off  (ctxt.cur - sizeof(*desc)); off += desc-length )
 {
 desc = (void *)(ctxt.data + off);
-- 
1.9.5 (Apple Git-50.3)


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2 for-4.6 0/3] Get rid of non-POSIX error codes

2015-07-22 Thread Roger Pau Monne
This patch series gets rid of non-POSIX error codes in the hypervisor and 
the toolstack. This is needed for OS compatibility.

I think the patch series is fairly small and non-intrusive, hence the 
for-4.6 tag.

Thanks, Roger.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 2/6] tools/libx{l, c}: Remove the toolstack_{save, restore} callbacks

2015-07-22 Thread Andrew Cooper
On 22/07/15 11:27, Ian Campbell wrote:
 On Mon, 2015-07-20 at 11:37 +0100, Andrew Cooper wrote:
 +TOOLSTACK (deprecated)
 +--
 +
 + *This record was only present for transitionary purposes during
 transitionary isn't really a word... transitional is (even if it
 sounds a bit odd to me)?

It very much is.

http://www.oxforddictionaries.com/definition/english/transition

It is an appropriate derivative in this case.


 +  development.  It is should not be used.*
 Does it needs to be accepted on restore for N-N+1 purposes (or N+M for
 M1 since you care about that)? Or does the conversion routine turn it
 into something else?

The libxc toolstack record was used in the #LIBXL_HVM_COMPAT phase, but
nothing (not even the conversion script) will generate one any more.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2 for-4.6 3/3] xen: remove non-POSIX error codes

2015-07-22 Thread Roger Pau Monne
Xen was using some non-POSIX error codes that are removed in this patch. For
future reference, the list of POSIX error codes has been obtained from:

http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html

The error codes already present and defined as optional (XSR), have been
left in place.

Signed-off-by: Roger Pau Monné roger@citrix.com
Cc: Ian Campbell ian.campb...@citrix.com
Cc: Ian Jackson ian.jack...@eu.citrix.com
Cc: Jan Beulich jbeul...@suse.com
Cc: Tim Deegan t...@xen.org
---
 xen/include/public/errno.h | 5 -
 1 file changed, 5 deletions(-)

diff --git a/xen/include/public/errno.h b/xen/include/public/errno.h
index 50adcb9..db9d064 100644
--- a/xen/include/public/errno.h
+++ b/xen/include/public/errno.h
@@ -54,8 +54,6 @@ XEN_ERRNO(EDEADLK,35) /* Resource deadlock would 
occur */
 XEN_ERRNO(ENAMETOOLONG,36) /* File name too long */
 XEN_ERRNO(ENOLCK,  37) /* No record locks available */
 XEN_ERRNO(ENOSYS,  38) /* Function not implemented */
-XEN_ERRNO(EBADRQC, 56) /* Invalid request code */
-XEN_ERRNO(EBADSLT, 57) /* Invalid slot */
 XEN_ERRNO(ENODATA, 61) /* No data available */
 XEN_ERRNO(ETIME,   62) /* Timer expired */
 XEN_ERRNO(EBADMSG, 74) /* Not a data message */
@@ -64,7 +62,6 @@ XEN_ERRNO(EILSEQ, 84) /* Illegal byte sequence */
 #ifdef __XEN__ /* Internal only, should never be exposed to the guest. */
 XEN_ERRNO(ERESTART,85) /* Interrupted system call should be restarted 
*/
 #endif
-XEN_ERRNO(EUSERS,  87) /* Too many users */
 XEN_ERRNO(ENOTSOCK,88) /* Socket operation on non-socket */
 XEN_ERRNO(EOPNOTSUPP,  95) /* Operation not supported on transport 
endpoint */
 XEN_ERRNO(EADDRINUSE,  98) /* Address already in use */
@@ -72,8 +69,6 @@ XEN_ERRNO(EADDRNOTAVAIL, 99)  /* Cannot assign requested 
address */
 XEN_ERRNO(ENOBUFS, 105)/* No buffer space available */
 XEN_ERRNO(EISCONN, 106)/* Transport endpoint is already connected */
 XEN_ERRNO(ENOTCONN,107)/* Transport endpoint is not connected */
-XEN_ERRNO(ESHUTDOWN,   108)/* Cannot send after transport endpoint 
shutdown */
-XEN_ERRNO(ETOOMANYREFS,109)/* Too many references: cannot splice */
 XEN_ERRNO(ETIMEDOUT,   110)/* Connection timed out */
 
 #undef XEN_ERRNO
-- 
1.9.5 (Apple Git-50.3)


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 0/15+5+5] Begin to disentangle libxenctrl and provide some stable libraries

2015-07-22 Thread Ian Campbell
On Wed, 2015-07-15 at 16:53 +0100, Andrew Cooper wrote:
 
  I'm stilling mulling over putting everything into tools/libs/FOO
  instead of tools/libxenFOO
 
 On balance, +1.  tools/ is already quite a mixed bag of stuff.

OK, I think I'm going to go ahead with this.

  , I still haven't but I could if people think
  it is worthwhile. Eventually I'd like to split libxc into 
  libxenguest
  and libxenctrl to cut down on the amount of strange cross talk...
 
 Very much +1.

OK (eventually ;-))

 FWIW, also splitting xl and libxl into different directories.

Yes, good idea (also eventually ;-)).

While I'm accumulating TODO items in this thread, every library which
gets split out needs a careful review before it should be declared
A[PBI] stable. I'm thinking at least:

  * Consistent error handling (return -1 + setting errno=EFOO)
  * Correct types
  * Const correctness
  * General interface sanity check

Anything else?

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 for-4.6 2/3] xen: replace non-POSIX error codes

2015-07-22 Thread George Dunlap
On 07/22/2015 12:07 PM, Roger Pau Monne wrote:
 Some DOMCTLs returned non-POSIX error codes, replace them with POSIX
 compilant values instead. EBADRQC and EBADSLT are replaced by EDOM, while
 EUSERS is replaced with EOVERFLOW.
 
 Signed-off-by: Roger Pau Monné roger@citrix.com
 Cc: George Dunlap george.dun...@eu.citrix.com
 Cc: Jan Beulich jbeul...@suse.com
 Cc: Andrew Cooper andrew.coop...@citrix.com
 ---
 Changes since v1:
  - Use EDOM instead of EINVAL.
 ---
 Nothing in libxc or libxl seems to check for those specific error codes, so
 I guess it's fine to replace them with whatever we want.
 ---
  xen/arch/x86/mm/paging.c | 2 +-
  xen/common/domain.c  | 4 ++--
  xen/common/hvm/save.c| 2 +-
  3 files changed, 4 insertions(+), 4 deletions(-)
 
 diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
 index 7089155..9bd54a8 100644
 --- a/xen/arch/x86/mm/paging.c
 +++ b/xen/arch/x86/mm/paging.c
 @@ -766,7 +766,7 @@ long 
 paging_domctl_continuation(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
  
  if ( op.interface_version != XEN_DOMCTL_INTERFACE_VERSION ||
   op.cmd != XEN_DOMCTL_shadow_op )
 -return -EBADRQC;
 +return -EDOM;

mm bit:

Acked-by: George Dunlap george.dun...@eu.citrix.com

Since this is internal, this isn't even a bike shed -- it's debating the
color of the insulation on the pipes in the basement.  Feel free to
retain this Ack even if you end up changing this particular errno to
something else again.

 -George


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [v11][PATCH 01/16] xen: introduce XENMEM_reserved_device_memory_map

2015-07-22 Thread Jan Beulich
 On 22.07.15 at 03:29, tiejun.c...@intel.com wrote:
 From: Jan Beulich jbeul...@suse.com
 
 This is a prerequisite for punching holes into HVM and PVH guests' P2M
 to allow passing through devices that are associated with (on VT-d)
 RMRRs.
 
 Signed-off-by: Jan Beulich jbeul...@suse.com
 Signed-off-by: Tiejun Chen tiejun.c...@intel.com

You not stating what you did to the patch made me always assume
that you didn't do more than some cosmetic adjustments, if any.
Now that I'm preparing to get the initial part of this series in I find
that you altered it quite a bit, so I'm afraid I'm going to need to
undo some of the adjustments you did.

 @@ -303,6 +342,33 @@ int compat_memory_op(unsigned int cmd, 
 XEN_GUEST_HANDLE_PARAM(void) compat)
  break;
  }
  
 +#ifdef HAS_PASSTHROUGH
 +case XENMEM_reserved_device_memory_map:
 +{
 +struct get_reserved_device_memory grdm;
 +
 +if ( copy_from_guest(grdm.map, compat, 1) ||
 + !compat_handle_okay(grdm.map.buffer, grdm.map.nr_entries) )
 +return -EFAULT;
 +
 +grdm.used_entries = 0;
 +rc = iommu_get_reserved_device_memory(get_reserved_device_memory,
 +  grdm);
 +
 +if ( !rc  grdm.map.nr_entries  grdm.used_entries )
 +rc = -ENOBUFS;
 +
 +grdm.map.nr_entries = grdm.used_entries;
 +if ( grdm.map.nr_entries )

This conditional appears to be a bug: How would the caller know,
upon successful return, that there are no reserved regions?

 @@ -1162,6 +1199,33 @@ long do_memory_op(unsigned long cmd, 
 XEN_GUEST_HANDLE_PARAM(void) arg)
  break;
  }
  
 +#ifdef HAS_PASSTHROUGH
 +case XENMEM_reserved_device_memory_map:
 +{
 +struct get_reserved_device_memory grdm;
 +

+if ( unlikely(start_extent) )
+return -ENOSYS;
+

 --- a/xen/include/public/memory.h
 +++ b/xen/include/public/memory.h
 @@ -573,7 +573,42 @@ struct xen_vnuma_topology_info {
  typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t;
  DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t);
  
 -/* Next available subop number is 27 */
 +/*
 + * With some legacy devices, certain guest-physical addresses cannot safely
 + * be used for other purposes, e.g. to map guest RAM.  This hypercall
 + * enumerates those regions so the toolstack can avoid using them.
 + */
 +#define XENMEM_reserved_device_memory_map   27
 +struct xen_reserved_device_memory {
 +xen_pfn_t start_pfn;
 +xen_ulong_t nr_pages;
 +};
 +typedef struct xen_reserved_device_memory xen_reserved_device_memory_t;
 +DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_t);
 +
 +struct xen_reserved_device_memory_map {
 +/* IN */
 +/* Currently just one bit to indicate checkng all Reserved Device 
 Memory. */
 +#define PCI_DEV_RDM_ALL   0x1
 +uint32_tflag;
 +/* IN */
 +uint16_tseg;
 +uint8_t bus;
 +uint8_t devfn;

This makes a mem-op PCI specific. For one, this should therefore be
put in a union, so that non-PCI uses remain possible in the future
without breaking by then existing users of the interface. And with
that I wonder whether this shouldn't use struct physdev_pci_device.

 --- a/xen/include/xen/pci.h
 +++ b/xen/include/xen/pci.h
 @@ -33,6 +33,8 @@
  #define PCI_DEVFN2(bdf) ((bdf)  0xff)
  #define PCI_BDF(b,d,f)  b)  0xff)  8) | PCI_DEVFN(d,f))
  #define PCI_BDF2(b,df)  b)  0xff)  8) | ((df)  0xff))
 +#define PCI_SBDF(s,bdf) (((s  0x)  16) | (bdf  0x))
 +#define PCI_SBDF2(s,b,df) (((s  0x)  16) | PCI_BDF2(b,df))

The natural thing for PCI_SBDF() would be

#define PCI_SBDF(s,b,d,f) ...

See for instance
http://lists.xenproject.org/archives/html/xen-devel/2015-07/msg02554.html

I'm going to produce an updated patch, to be sent out later today.

Jan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 1/6] tools/libxc: Remove legacy migration implementation

2015-07-22 Thread Ian Campbell
On Mon, 2015-07-20 at 11:37 +0100, Andrew Cooper wrote:
 It is no longer used.
 
 One complication is that xc_map_m2p() has users in xc_offline_page.c,
 xen-mfndump and xen-mceinj.  Move its implementation into
 xc_offline_page (for want of a better location) beside it's current
 user.
 
 Signed-off-by: Andrew Cooper andrew.coop...@citrix.com
 CC: Ian Campbell ian.campb...@citrix.com
 CC: Ian Jackson ian.jack...@eu.citrix.com
 CC: Wei Liu wei.l...@citrix.com

Acked-by: Ian Campbell ian.campb...@citrix.com

git grep xc_domain_restore[^2] shows a mention (of the filename) in
MAINTAINERS which doesn't look to be handled later either.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 5/6] docs: Migration v2 is now no longer draft

2015-07-22 Thread Ian Campbell
On Mon, 2015-07-20 at 11:37 +0100, Andrew Cooper wrote:
 Add further instructions to the libxc Future Extensions section, 
 and
 provide such a section for libxl.
 
 In addition, drop the In experimental __func__ IPRINTF()s from the
 libxc implementations.
 
 Finally, a correction to libxl's Not Yet Included section which
 should have been amended in c/s 7eaec00 when libxl Remus support was
 introduced into the protocol.
 
 Signed-off-by: Andrew Cooper andrew.coop...@citrix.com

Acked-by: Ian Campbell ian.campb...@citrix.com


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 4/6] tools/libx{l, c}: Drop '2' suffixes from xc_domain_{save, restore}2() functions

2015-07-22 Thread Ian Campbell
On Mon, 2015-07-20 at 11:37 +0100, Andrew Cooper wrote:
 As there is now only the one implementation.
 
 Signed-off-by: Andrew Cooper andrew.coop...@citrix.com
 CC: Ian Campbell ian.campb...@citrix.com
 CC: Ian Jackson ian.jack...@eu.citrix.com
 CC: Wei Liu wei.l...@citrix.com

Acked-by: Ian Campbell ian.campb...@citrix.com


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [v11][PATCH 11/16] tools/libxl: detect and avoid conflicts with RDM

2015-07-22 Thread George Dunlap
On Wed, Jul 22, 2015 at 9:52 AM, Chen, Tiejun tiejun.c...@intel.com wrote:
 And I also can't understand why we should drop Reviewed-by/Acked-by from
 other guys. And, all new comments I addressed don't conflict with our
 previous revision so why?

You must remove Reviewed-by / Acked-by *for a given patch* *when you
change that patch in a significant way*.

Acked-by: John Smith is saying that John Smith is OK with the patch
the way that it is.  But just because John Smith was OK with the patch
before you made changes doesn't mean he's OK with the patch after you
made changes.

Very small changes -- like shifting whitespace or fixing typos -- are
usually OK to keep the Ack.  But if you make anything more substantial
-- even if you just reword a comment significantly -- then you have to
drop the Acked-by or Reviewed-by, and get them again.  (Usually if the
changes really are minor then it's fairly quick to get them again.)

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 1/6] tools/libxc: Remove legacy migration implementation

2015-07-22 Thread Andrew Cooper
On 22/07/15 11:25, Ian Campbell wrote:
 On Mon, 2015-07-20 at 11:37 +0100, Andrew Cooper wrote:
 It is no longer used.

 One complication is that xc_map_m2p() has users in xc_offline_page.c,
 xen-mfndump and xen-mceinj.  Move its implementation into
 xc_offline_page (for want of a better location) beside it's current
 user.

 Signed-off-by: Andrew Cooper andrew.coop...@citrix.com
 CC: Ian Campbell ian.campb...@citrix.com
 CC: Ian Jackson ian.jack...@eu.citrix.com
 CC: Wei Liu wei.l...@citrix.com
 Acked-by: Ian Campbell ian.campb...@citrix.com

 git grep xc_domain_restore[^2] shows a mention (of the filename) in
 MAINTAINERS which doesn't look to be handled later either.


Ah yes  - I will drop those entries.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2 for-4.6 1/3] xen/x86/libxl: replace non-POSIX error codes used by PSR code

2015-07-22 Thread Roger Pau Monne
PSR was using EBADSLT and EUSERS which are not POSIX error codes, replace
them with ENOTSOCK and EOVERFLOW respectively.

Signed-off-by: Roger Pau Monné roger@citrix.com
Cc: Ian Jackson ian.jack...@eu.citrix.com
Cc: Stefano Stabellini stefano.stabell...@eu.citrix.com
Cc: Ian Campbell ian.campb...@citrix.com
Cc: Wei Liu wei.l...@citrix.com
Cc: Jan Beulich jbeul...@suse.com
Cc: Andrew Cooper andrew.coop...@citrix.com
---
Changes since v1:
 - Use ENOTSOCK to replace EBADSLT instead of EINVAL.
---
 tools/libxl/libxl_psr.c| 6 +++---
 xen/arch/x86/psr.c | 8 
 xen/include/public/errno.h | 1 +
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tools/libxl/libxl_psr.c b/tools/libxl/libxl_psr.c
index 2a0..3378239 100644
--- a/tools/libxl/libxl_psr.c
+++ b/tools/libxl/libxl_psr.c
@@ -31,7 +31,7 @@ static void libxl__psr_log_err_msg(libxl__gc *gc, int err)
 case ESRCH:
 msg = invalid domain ID;
 break;
-case EBADSLT:
+case ENOTSOCK:
 msg = socket is not supported;
 break;
 case EFAULT:
@@ -59,7 +59,7 @@ static void libxl__psr_cmt_log_err_msg(libxl__gc *gc, int err)
 case ENOENT:
 msg = CMT is not attached to this domain;
 break;
-case EUSERS:
+case EOVERFLOW:
 msg = no free RMID available;
 break;
 default:
@@ -81,7 +81,7 @@ static void libxl__psr_cat_log_err_msg(libxl__gc *gc, int err)
 case ENOENT:
 msg = CAT is not enabled on the socket;
 break;
-case EUSERS:
+case EOVERFLOW:
 msg = no free COS available;
 break;
 case EEXIST:
diff --git a/xen/arch/x86/psr.c b/xen/arch/x86/psr.c
index 861683f..19f714b 100644
--- a/xen/arch/x86/psr.c
+++ b/xen/arch/x86/psr.c
@@ -176,7 +176,7 @@ int psr_alloc_rmid(struct domain *d)
 if ( rmid  psr_cmt-rmid_max )
 {
 d-arch.psr_rmid = 0;
-return -EUSERS;
+return -EOVERFLOW;
 }
 
 d-arch.psr_rmid = rmid;
@@ -251,7 +251,7 @@ static struct psr_cat_socket_info 
*get_cat_socket_info(unsigned int socket)
 return ERR_PTR(-ENODEV);
 
 if ( socket = nr_sockets )
-return ERR_PTR(-EBADSLT);
+return ERR_PTR(-ENOTSOCK);
 
 if ( !test_bit(socket, cat_socket_enable) )
 return ERR_PTR(-ENOENT);
@@ -332,7 +332,7 @@ static int write_l3_cbm(unsigned int socket, unsigned int 
cos, uint64_t cbm)
 unsigned int cpu = get_socket_cpu(socket);
 
 if ( cpu = nr_cpu_ids )
-return -EBADSLT;
+return -ENOTSOCK;
 on_selected_cpus(cpumask_of(cpu), do_write_l3_cbm, info, 1);
 }
 
@@ -381,7 +381,7 @@ int psr_set_l3_cbm(struct domain *d, unsigned int socket, 
uint64_t cbm)
 if ( !found )
 {
 spin_unlock(info-cbm_lock);
-return -EUSERS;
+return -EOVERFLOW;
 }
 
 cos = found - map;
diff --git a/xen/include/public/errno.h b/xen/include/public/errno.h
index 9a7e411..50adcb9 100644
--- a/xen/include/public/errno.h
+++ b/xen/include/public/errno.h
@@ -65,6 +65,7 @@ XEN_ERRNO(EILSEQ, 84) /* Illegal byte sequence */
 XEN_ERRNO(ERESTART,85) /* Interrupted system call should be restarted 
*/
 #endif
 XEN_ERRNO(EUSERS,  87) /* Too many users */
+XEN_ERRNO(ENOTSOCK,88) /* Socket operation on non-socket */
 XEN_ERRNO(EOPNOTSUPP,  95) /* Operation not supported on transport 
endpoint */
 XEN_ERRNO(EADDRINUSE,  98) /* Address already in use */
 XEN_ERRNO(EADDRNOTAVAIL, 99)   /* Cannot assign requested address */
-- 
1.9.5 (Apple Git-50.3)


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] Question about mapping between domains

2015-07-22 Thread Oleksandr Dmytryshyn
On Fri, Jul 17, 2015 at 11:59 AM, Ian Campbell ian.campb...@citrix.com wrote:
 Does this mean everything is working as you need, or is there a further
 issue which needs addressing?
All is working as needed. Thank You.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


  1   2   >