date:20150317

[Xen-devel] [PATCH v19 08/14] x86/VPMU: Save VPMU state for PV guests during context switch

2015-03-17 Thread Boris Ostrovsky

Save VPMU state during context switch for both HVM and PV(H) guests.

A subsequent patch (x86/VPMU: NMI-based VPMU support) will make it possible
for vpmu_switch_to() to call vmx_vmcs_try_enter()-vcpu_pause() which needs
is_running to be correctly set/cleared. To prepare for that, call 
context_saved()
before vpmu_switch_to() is executed. (Note that while this change could have
been dalayed until that later patch, the changes are harmless to existing code
and so we do it here)

Signed-off-by: Boris Ostrovsky boris.ostrov...@oracle.com
---
Changes in v19:
* Adjusted for new vpmu_switch_to/from interface

 xen/arch/x86/domain.c | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index f19087e..c7f8210 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1533,17 +1533,14 @@ void context_switch(struct vcpu *prev, struct vcpu 
*next)
 }
 
 if ( prev != next )
-_update_runstate_area(prev);
-
-if ( is_hvm_vcpu(prev) )
 {
-if (prev != next)
-vpmu_switch_from(prev);
-
-if ( !list_empty(prev-arch.hvm_vcpu.tm_list) )
-pt_save_timer(prev);
+_update_runstate_area(prev);
+vpmu_switch_from(prev);
 }
 
+if ( is_hvm_vcpu(prev)  !list_empty(prev-arch.hvm_vcpu.tm_list) )
+pt_save_timer(prev);
+
 local_irq_disable();
 
 set_current(next);
@@ -1581,15 +1578,16 @@ void context_switch(struct vcpu *prev, struct vcpu 
*next)
!is_hardware_domain(next-domain));
 }
 
-if (is_hvm_vcpu(next)  (prev != next) )
-/* Must be done with interrupts enabled */
-vpmu_switch_to(next);
-
 context_saved(prev);
 
 if ( prev != next )
+{
 _update_runstate_area(next);
 
+/* Must be done with interrupts enabled */
+vpmu_switch_to(next);
+}
+
 /* Ensure that the vcpu has an up-to-date time base. */
 update_vcpu_system_time(next);
 
-- 
1.8.1.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [PATCH v19 10/14] x86/VPMU: Add support for PMU register handling on PV guests

2015-03-17 Thread Boris Ostrovsky

Intercept accesses to PMU MSRs and process them in VPMU module. If vpmu ops
for VCPU are not initialized (which is the case, for example, for PV guests that
are not VPMU-enlightened) access to MSRs will return failure.

Dump VPMU state for all domains (HVM and PV) when requested.

Signed-off-by: Boris Ostrovsky boris.ostrov...@oracle.com
Acked-by: Jan Beulich jbeul...@suse.com
Acked-by: Kevin Tian kevin.t...@intel.com
Reviewed-by: Dietmar Hahn dietmar.h...@ts.fujitsu.com
Tested-by: Dietmar Hahn dietmar.h...@ts.fujitsu.com
---
 xen/arch/x86/domain.c |  3 +--
 xen/arch/x86/hvm/vmx/vpmu_core2.c | 49 +++--
 xen/arch/x86/hvm/vpmu.c   |  3 +++
 xen/arch/x86/traps.c  | 51 +--
 4 files changed, 95 insertions(+), 11 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index c7f8210..a48d824 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -2065,8 +2065,7 @@ void arch_dump_vcpu_info(struct vcpu *v)
 {
 paging_dump_vcpu_info(v);
 
-if ( is_hvm_vcpu(v) )
-vpmu_dump(v);
+vpmu_dump(v);
 }
 
 void domain_cpuid(
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c 
b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index d10e3e7..66d7bc0 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -27,6 +27,7 @@
 #include asm/regs.h
 #include asm/types.h
 #include asm/apic.h
+#include asm/traps.h
 #include asm/msr.h
 #include asm/msr-index.h
 #include asm/hvm/support.h
@@ -299,12 +300,18 @@ static inline void __core2_vpmu_save(struct vcpu *v)
 rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
 for ( i = 0; i  arch_pmc_cnt; i++ )
 rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter);
+
+if ( !has_hvm_container_vcpu(v) )
+rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt-global_status);
 }
 
 static int core2_vpmu_save(struct vcpu *v)
 {
 struct vpmu_struct *vpmu = vcpu_vpmu(v);
 
+if ( !has_hvm_container_vcpu(v) )
+wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
 if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) )
 return 0;
 
@@ -342,6 +349,13 @@ static inline void __core2_vpmu_load(struct vcpu *v)
 wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt-fixed_ctrl);
 wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt-ds_area);
 wrmsrl(MSR_IA32_PEBS_ENABLE, core2_vpmu_cxt-pebs_enable);
+
+if ( !has_hvm_container_vcpu(v) )
+{
+wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt-global_ovf_ctrl);
+core2_vpmu_cxt-global_ovf_ctrl = 0;
+wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt-global_ctrl);
+}
 }
 
 static void core2_vpmu_load(struct vcpu *v)
@@ -442,7 +456,6 @@ static int core2_vpmu_msr_common_check(u32 msr_index, int 
*type, int *index)
 static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
uint64_t supported)
 {
-u64 global_ctrl;
 int i, tmp;
 int type = -1, index = -1;
 struct vcpu *v = current;
@@ -486,7 +499,12 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t 
msr_content,
 switch ( msr )
 {
 case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+if ( msr_content  ~(0xC000 |
+ (((1ULL  fixed_pmc_cnt) - 1)  32) |
+ ((1ULL  arch_pmc_cnt) - 1)) )
+return 1;
 core2_vpmu_cxt-global_status = ~msr_content;
+wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
 return 0;
 case MSR_CORE_PERF_GLOBAL_STATUS:
 gdprintk(XENLOG_INFO, Can not write readonly MSR: 
@@ -514,14 +532,18 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t 
msr_content,
 gdprintk(XENLOG_WARNING, Guest setting of DTS is ignored.\n);
 return 0;
 case MSR_CORE_PERF_GLOBAL_CTRL:
-global_ctrl = msr_content;
+core2_vpmu_cxt-global_ctrl = msr_content;
 break;
 case MSR_CORE_PERF_FIXED_CTR_CTRL:
 if ( msr_content 
  ( ~((1ull  (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1)) )
 return 1;
 
-vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl);
+if ( has_hvm_container_vcpu(v) )
+vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
+   core2_vpmu_cxt-global_ctrl);
+else
+rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt-global_ctrl);
 *enabled_cntrs = ~(((1ULL  fixed_pmc_cnt) - 1)  32);
 if ( msr_content != 0 )
 {
@@ -546,7 +568,11 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t 
msr_content,
 if ( msr_content  (~((1ull  32) - 1)) )
 return 1;
 
-vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl);
+if ( has_hvm_container_vcpu(v) )
+vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
+

Re: [Xen-devel] Upstream QEMU based stubdom and rump kernel

2015-03-17 Thread Anthony PERARD

On Tue, Mar 17, 2015 at 02:29:07PM +, Wei Liu wrote:
 I've now successfully built QEMU upstream with rump kernel. However to
 make it fully functional as a stubdom, there are some missing pieces to
 be added in.
 
 1. The ability to access QMP socket (a unix socket) from Dom0. That
will be used to issue command to QEMU.

The QMP socket does not needs to be a unix socket. It can be any of
those (from qemu --help):
Character device options:
-chardev null,id=id[,mux=on|off]
-chardev 
socket,id=id[,host=host],port=port[,to=to][,ipv4][,ipv6][,nodelay][,reconnect=seconds]
 [,server][,nowait][,telnet][,reconnect=seconds][,mux=on|off] (tcp)
-chardev 
socket,id=id,path=path[,server][,nowait][,telnet][,reconnect=seconds][,mux=on|off]
 (unix)
-chardev udp,id=id[,host=host],port=port[,localaddr=localaddr]
 [,localport=localport][,ipv4][,ipv6][,mux=on|off]
-chardev msmouse,id=id[,mux=on|off]
-chardev vc,id=id[[,width=width][,height=height]][[,cols=cols][,rows=rows]]
 [,mux=on|off]
-chardev ringbuf,id=id[,size=size]
-chardev file,id=id,path=path[,mux=on|off]
-chardev pipe,id=id,path=path[,mux=on|off]
-chardev pty,id=id[,mux=on|off]
-chardev stdio,id=id[,mux=on|off][,signal=on|off]
-chardev serial,id=id,path=path[,mux=on|off]
-chardev tty,id=id,path=path[,mux=on|off]
-chardev parallel,id=id,path=path[,mux=on|off]
-chardev parport,id=id,path=path[,mux=on|off]
-chardev spicevmc,id=id,name=name[,debug=debug]
-chardev spiceport,id=id,name=name[,debug=debug]

 2. The ability to access files in Dom0. That will be used to write to /
read from QEMU state file.

To save a QEMU state (write), we do use a filename. But I guest we could
expand the QMP command (xen-save-devices-state) to use something else, if
it's easier.

To restore, we provide a file descriptor from libxl to QEMU, with the fd on
the file that contain the state we want to restore. But there are a few
other way to load a state (from qemu.git/docs/migration.txt):
- tcp migration: do the migration using tcp sockets
- unix migration: do the migration using unix sockets
- exec migration: do the migration using the stdin/stdout through a process.
- fd migration: do the migration using an file descriptor that is
  passed to QEMU.  QEMU doesn't care how this file descriptor is opened.

-- 
Anthony PERARD

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] Upstream QEMU based stubdom and rump kernel

2015-03-17 Thread Wei Liu

On Tue, Mar 17, 2015 at 02:54:09PM +, Ian Campbell wrote:
 On Tue, 2015-03-17 at 14:29 +, Wei Liu wrote:
  2. The ability to access files in Dom0. That will be used to write to /
 read from QEMU state file.
 
 This requirement is not as broad as you make it sound.
 

Yes. You're right.

 All which is really required is the ability to slurp in or write out a
 blob of bytes to a service running in a control domain, not actual

This is more accurate.

 ability to read/write files in dom0 (which would need careful security
 consideration!).
 
 For the old qemu-traditional stubdom for example this is implemented as
 a pair of console devices (one r/o for restore + one w/o for save) which
 are setup by the toolstack at start of day and pre-plumbed into two
 temporary files.
 

Unfortunately I don't think that hack in mini-os is upstreamable in rump
kernel.

Wei.

 Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [PATCH v19 09/14] x86/VPMU: When handling MSR accesses, leave fault injection to callers

2015-03-17 Thread Boris Ostrovsky

With this patch return value of 1 of vpmu_do_msr() will now indicate whether an
error was encountered during MSR processing (instead of stating that the access
was to a VPMU register).

As part of this patch we also check for validity of certain MSR accesses right
when we determine which register is being written, as opposed to postponing this
until later.

Signed-off-by: Boris Ostrovsky boris.ostrov...@oracle.com
Acked-by: Kevin Tian kevin.t...@intel.com
Reviewed-by: Dietmar Hahn dietmar.h...@ts.fujitsu.com
Tested-by: Dietmar Hahn dietmar.h...@ts.fujitsu.com
---
 xen/arch/x86/hvm/svm/svm.c|  6 ++-
 xen/arch/x86/hvm/svm/vpmu.c   |  6 +--
 xen/arch/x86/hvm/vmx/vmx.c| 24 +---
 xen/arch/x86/hvm/vmx/vpmu_core2.c | 82 ++-
 4 files changed, 55 insertions(+), 63 deletions(-)

diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index e523d12..4fe36e9 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1709,7 +1709,8 @@ static int svm_msr_read_intercept(unsigned int msr, 
uint64_t *msr_content)
 case MSR_AMD_FAM15H_EVNTSEL3:
 case MSR_AMD_FAM15H_EVNTSEL4:
 case MSR_AMD_FAM15H_EVNTSEL5:
-vpmu_do_rdmsr(msr, msr_content);
+if ( vpmu_do_rdmsr(msr, msr_content) )
+goto gpf;
 break;
 
 case MSR_AMD64_DR0_ADDRESS_MASK:
@@ -1860,7 +1861,8 @@ static int svm_msr_write_intercept(unsigned int msr, 
uint64_t msr_content)
 case MSR_AMD_FAM15H_EVNTSEL3:
 case MSR_AMD_FAM15H_EVNTSEL4:
 case MSR_AMD_FAM15H_EVNTSEL5:
-vpmu_do_wrmsr(msr, msr_content, 0);
+if ( vpmu_do_wrmsr(msr, msr_content, 0) )
+goto gpf;
 break;
 
 case MSR_IA32_MCx_MISC(4): /* Threshold register */
diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index 58a0dc4..474d0db 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -305,7 +305,7 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t 
msr_content,
 is_pmu_enabled(msr_content)  !vpmu_is_set(vpmu, VPMU_RUNNING) )
 {
 if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
-return 1;
+return 0;
 vpmu_set(vpmu, VPMU_RUNNING);
 
 if ( has_hvm_container_vcpu(v)  is_msr_bitmap_on(vpmu) )
@@ -335,7 +335,7 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t 
msr_content,
 
 /* Write to hw counters */
 wrmsrl(msr, msr_content);
-return 1;
+return 0;
 }
 
 static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
@@ -353,7 +353,7 @@ static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t 
*msr_content)
 
 rdmsrl(msr, *msr_content);
 
-return 1;
+return 0;
 }
 
 static void amd_vpmu_destroy(struct vcpu *v)
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 83b740a..206e50d 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2127,12 +2127,17 @@ static int vmx_msr_read_intercept(unsigned int msr, 
uint64_t *msr_content)
 *msr_content |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL |
MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
 /* Perhaps vpmu will change some bits. */
+/* FALLTHROUGH */
+case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7):
+case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL(3):
+case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2:
+case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+case MSR_IA32_PEBS_ENABLE:
+case MSR_IA32_DS_AREA:
 if ( vpmu_do_rdmsr(msr, msr_content) )
-goto done;
+goto gp_fault;
 break;
 default:
-if ( vpmu_do_rdmsr(msr, msr_content) )
-break;
 if ( passive_domain_do_rdmsr(msr, msr_content) )
 goto done;
 switch ( long_mode_do_msr_read(msr, msr_content) )
@@ -2308,7 +2313,7 @@ static int vmx_msr_write_intercept(unsigned int msr, 
uint64_t msr_content)
 if ( msr_content  ~supported )
 {
 /* Perhaps some other bits are supported in vpmu. */
-if ( !vpmu_do_wrmsr(msr, msr_content, supported) )
+if ( vpmu_do_wrmsr(msr, msr_content, supported) )
 break;
 }
 if ( msr_content  IA32_DEBUGCTLMSR_LBR )
@@ -2336,9 +2341,16 @@ static int vmx_msr_write_intercept(unsigned int msr, 
uint64_t msr_content)
 if ( !nvmx_msr_write_intercept(msr, msr_content) )
 goto gp_fault;
 break;
+case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7):
+case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL(7):
+case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2:
+case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+case MSR_IA32_PEBS_ENABLE:
+case MSR_IA32_DS_AREA:
+ if ( vpmu_do_wrmsr(msr, msr_content, 0) )
+goto gp_fault;
+break;
 default:
-if ( vpmu_do_wrmsr(msr, msr_content, 0) )
-return

[Xen-devel] [PATCH v19 01/14] x86/VPMU: VPMU should not exist when vpmu_initialise() is called

2015-03-17 Thread Boris Ostrovsky

We don't need to try to destroy it since it can't be already allocated at the
time we try to initialize it.

Signed-off-by: Boris Ostrovsky boris.ostrov...@oracle.com
Suggested-by: Andrew Cooper andrew.coop...@citrix.com
---

Changes in v19:
* Removed unnecesary test for VPMU_CONTEXT_ALLOCATED in svm/vpmu.c

 xen/arch/x86/hvm/svm/vpmu.c | 3 ---
 xen/arch/x86/hvm/vpmu.c | 5 +
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index 64dc167..6764070 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -359,9 +359,6 @@ static int amd_vpmu_initialise(struct vcpu *v)
 struct vpmu_struct *vpmu = vcpu_vpmu(v);
 uint8_t family = current_cpu_data.x86;
 
-if ( vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
-return 0;
-
 if ( counters == NULL )
 {
  switch ( family )
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index 0e6b6c0..c3273ee 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -236,10 +236,7 @@ void vpmu_initialise(struct vcpu *v)
 if ( is_pvh_vcpu(v) )
 return;
 
-if ( vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
-vpmu_destroy(v);
-vpmu_clear(vpmu);
-vpmu-context = NULL;
+ASSERT(!vpmu-flags  !vpmu-context);
 
 switch ( vendor )
 {
-- 
1.8.1.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [PATCH v19 12/14] x86/VPMU: Merge vpmu_rdmsr and vpmu_wrmsr

2015-03-17 Thread Boris Ostrovsky

The two routines share most of their logic.

Signed-off-by: Boris Ostrovsky boris.ostrov...@oracle.com
---
Changes in v19:
* const-ified arch_vpmu_ops in vpmu_do_wrmsr
* non-changes:
   - kept 'current' as a non-initializer to avoid unnecessary initialization
 in the (common) non-VPMU case
   - kept 'nop' label since there are multiple dissimilar cases that can cause
 a non-emulation of VPMU access

 xen/arch/x86/hvm/vpmu.c| 76 +-
 xen/include/asm-x86/hvm/vpmu.h | 14 ++--
 2 files changed, 42 insertions(+), 48 deletions(-)

diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index c287d8b..beed956 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -103,63 +103,47 @@ void vpmu_lvtpc_update(uint32_t val)
 apic_write(APIC_LVTPC, vpmu-hw_lapic_lvtpc);
 }
 
-int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported)
+int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
+uint64_t supported, bool_t is_write)
 {
-struct vcpu *curr = current;
+struct vcpu *curr;
 struct vpmu_struct *vpmu;
+const struct arch_vpmu_ops *ops;
+int ret = 0;
 
 if ( vpmu_mode == XENPMU_MODE_OFF )
-return 0;
+goto nop;
 
+curr = current;
 vpmu = vcpu_vpmu(curr);
-if ( vpmu-arch_vpmu_ops  vpmu-arch_vpmu_ops-do_wrmsr )
-{
-int ret = vpmu-arch_vpmu_ops-do_wrmsr(msr, msr_content, supported);
-
-/*
- * We may have received a PMU interrupt during WRMSR handling
- * and since do_wrmsr may load VPMU context we should save
- * (and unload) it again.
- */
-if ( !is_hvm_vcpu(curr)  vpmu-xenpmu_data 
- (vpmu-xenpmu_data-pmu.pmu_flags  PMU_CACHED) )
-{
-vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
-vpmu-arch_vpmu_ops-arch_vpmu_save(curr);
-vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
-}
-return ret;
-}
-
-return 0;
-}
-
-int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
-{
-struct vcpu *curr = current;
-struct vpmu_struct *vpmu;
+ops = vpmu-arch_vpmu_ops;
+if ( !ops )
+goto nop;
+
+if ( is_write  ops-do_wrmsr )
+ret = ops-do_wrmsr(msr, *msr_content, supported);
+else if ( !is_write  ops-do_rdmsr )
+ret = ops-do_rdmsr(msr, msr_content);
+else
+goto nop;
 
-if ( vpmu_mode == XENPMU_MODE_OFF )
+/*
+ * We may have received a PMU interrupt while handling MSR access
+ * and since do_wr/rdmsr may load VPMU context we should save
+ * (and unload) it again.
+ */
+if ( !is_hvm_vcpu(curr) 
+ vpmu-xenpmu_data  (vpmu-xenpmu_data-pmu.pmu_flags  PMU_CACHED) )
 {
-*msr_content = 0;
-return 0;
+vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
+ops-arch_vpmu_save(curr);
+vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
 }
 
-vpmu = vcpu_vpmu(curr);
-if ( vpmu-arch_vpmu_ops  vpmu-arch_vpmu_ops-do_rdmsr )
-{
-int ret = vpmu-arch_vpmu_ops-do_rdmsr(msr, msr_content);
+return ret;
 
-if ( !is_hvm_vcpu(curr)  vpmu-xenpmu_data 
- (vpmu-xenpmu_data-pmu.pmu_flags  PMU_CACHED) )
-{
-vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
-vpmu-arch_vpmu_ops-arch_vpmu_save(curr);
-vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
-}
-return ret;
-}
-else
+ nop:
+if ( !is_write )
 *msr_content = 0;
 
 return 0;
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
index 642a4b7..63851a7 100644
--- a/xen/include/asm-x86/hvm/vpmu.h
+++ b/xen/include/asm-x86/hvm/vpmu.h
@@ -99,8 +99,8 @@ static inline bool_t vpmu_are_all_set(const struct 
vpmu_struct *vpmu,
 }
 
 void vpmu_lvtpc_update(uint32_t val);
-int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported);
-int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content);
+int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
+uint64_t supported, bool_t is_write);
 void vpmu_do_interrupt(struct cpu_user_regs *regs);
 void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
@@ -110,6 +110,16 @@ void vpmu_save(struct vcpu *v);
 void vpmu_load(struct vcpu *v);
 void vpmu_dump(struct vcpu *v);
 
+static inline int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
+uint64_t supported)
+{
+return vpmu_do_msr(msr, msr_content, supported, 1);
+}
+static inline int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
+{
+return vpmu_do_msr(msr, msr_content, 0, 0);
+}
+
 extern int acquire_pmu_ownership(int pmu_ownership);
 extern void release_pmu_ownership(int pmu_ownership);
 
-- 
1.8.1.4


___
Xen-devel mailing

[Xen-devel] [PATCH v19 13/14] x86/VPMU: Add privileged PMU mode

2015-03-17 Thread Boris Ostrovsky

Add support for privileged PMU mode (XENPMU_MODE_ALL) which allows privileged
domain (dom0) profile both itself (and the hypervisor) and the guests. While
this mode is on profiling in guests is disabled.

Signed-off-by: Boris Ostrovsky boris.ostrov...@oracle.com
---
Changes in v19:
* Slightly different mode changing logic in xenpmu_op() since we no longer
  allow mode changes while VPMUs are active

 xen/arch/x86/hvm/vpmu.c  | 34 +-
 xen/arch/x86/traps.c | 13 +
 xen/include/public/pmu.h |  3 +++
 3 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index beed956..71c5063 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -111,7 +111,9 @@ int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
 const struct arch_vpmu_ops *ops;
 int ret = 0;
 
-if ( vpmu_mode == XENPMU_MODE_OFF )
+if ( (vpmu_mode == XENPMU_MODE_OFF) ||
+ ((vpmu_mode  XENPMU_MODE_ALL) 
+  !is_hardware_domain(current-domain)) )
 goto nop;
 
 curr = current;
@@ -166,8 +168,12 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs)
 struct vcpu *sampled = current, *sampling;
 struct vpmu_struct *vpmu;
 
-/* dom0 will handle interrupt for special domains (e.g. idle domain) */
-if ( sampled-domain-domain_id = DOMID_FIRST_RESERVED )
+/*
+ * dom0 will handle interrupt for special domains (e.g. idle domain) or,
+ * in XENPMU_MODE_ALL, for everyone.
+ */
+if ( (vpmu_mode  XENPMU_MODE_ALL) ||
+ (sampled-domain-domain_id = DOMID_FIRST_RESERVED) )
 {
 sampling = choose_hwdom_vcpu();
 if ( !sampling )
@@ -177,17 +183,18 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs)
 sampling = sampled;
 
 vpmu = vcpu_vpmu(sampling);
-if ( !is_hvm_vcpu(sampling) )
+if ( !is_hvm_vcpu(sampling) || (vpmu_mode  XENPMU_MODE_ALL) )
 {
 /* PV(H) guest */
 const struct cpu_user_regs *cur_regs;
 uint64_t *flags = vpmu-xenpmu_data-pmu.pmu_flags;
-uint32_t domid = DOMID_SELF;
+uint32_t domid;
 
 if ( !vpmu-xenpmu_data )
 return;
 
 if ( is_pvh_vcpu(sampling) 
+ !(vpmu_mode  XENPMU_MODE_ALL) 
  !vpmu-arch_vpmu_ops-do_interrupt(regs) )
 return;
 
@@ -204,6 +211,11 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs)
 else
 *flags = PMU_SAMPLE_PV;
 
+if ( sampled == sampling )
+domid = DOMID_SELF;
+else
+domid = sampled-domain-domain_id;
+
 /* Store appropriate registers in xenpmu_data */
 /* FIXME: 32-bit PVH should go here as well */
 if ( is_pv_32bit_vcpu(sampling) )
@@ -232,7 +244,8 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs)
 
 if ( (vpmu_mode  XENPMU_MODE_SELF) )
 cur_regs = guest_cpu_user_regs();
-else if ( !guest_mode(regs)  
is_hardware_domain(sampling-domain) )
+else if ( !guest_mode(regs) 
+  is_hardware_domain(sampling-domain) )
 {
 cur_regs = regs;
 domid = DOMID_XEN;
@@ -508,7 +521,8 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t 
*params)
 struct page_info *page;
 uint64_t gfn = params-val;
 
-if ( vpmu_mode == XENPMU_MODE_OFF )
+if ( (vpmu_mode == XENPMU_MODE_OFF) ||
+ ((vpmu_mode  XENPMU_MODE_ALL)  !is_hardware_domain(d)) )
 return -EINVAL;
 
 if ( (params-vcpu = d-max_vcpus) || (d-vcpu == NULL) ||
@@ -627,12 +641,14 @@ long do_xenpmu_op(unsigned int op, 
XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
 {
 case XENPMU_mode_set:
 {
-if ( (pmu_params.val  ~(XENPMU_MODE_SELF | XENPMU_MODE_HV)) ||
+if ( (pmu_params.val 
+  ~(XENPMU_MODE_SELF | XENPMU_MODE_HV | XENPMU_MODE_ALL)) ||
  (hweight64(pmu_params.val)  1) )
 return -EINVAL;
 
 /* 32-bit dom0 can only sample itself. */
-if ( is_pv_32bit_vcpu(current)  (pmu_params.val  XENPMU_MODE_HV) )
+if ( is_pv_32bit_vcpu(current) 
+ (pmu_params.val  (XENPMU_MODE_HV | XENPMU_MODE_ALL)) )
 return -EINVAL;
 
 spin_lock(vpmu_lock);
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 1eb7bb4..8a40deb 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2653,6 +2653,10 @@ static int emulate_privileged_op(struct cpu_user_regs 
*regs)
 case MSR_AMD_FAM15H_EVNTSEL0...MSR_AMD_FAM15H_PERFCTR5:
 if ( vpmu_msr || (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
 {
+if ( (vpmu_mode  XENPMU_MODE_ALL) 
+ !is_hardware_domain(v-domain) )
+break;
+
 if ( vpmu_do_wrmsr(regs-ecx, msr_content, 0) )
 goto fail;
 }
@@ -2776,6

Re: [Xen-devel] RFC: [PATCH 1/3] Enhance platform support for PCI

2015-03-17 Thread Manish Jaggi



On Tuesday 17 March 2015 06:01 PM, Jan Beulich wrote:

On 17.03.15 at 13:06, mja...@caviumnetworks.com wrote:

On Tuesday 17 March 2015 12:58 PM, Jan Beulich wrote:

On 17.03.15 at 06:26, mja...@caviumnetworks.com wrote:

In drivers/xen/pci.c on notification BUS_NOTIFY_ADD_DEVICE dom0 issues a
hypercall to inform xen that a new pci device has been added.
If we were to inform xen about a new pci bus that is added there are  2 ways
a) Issue the hypercall from drivers/pci/probe.c
b) When a new device is found (BUS_NOTIFY_ADD_DEVICE) issue
PHYSDEVOP_pci_device_add hypercall to xen, if xen does not finds that
segment number (s_bdf), it will return an error
SEG_NO_NOT_FOUND. After that the linux xen code could issue the
PHYSDEVOP_pci_host_bridge_add hypercall.

I think (b) can be done with minimal code changes. What do you think ?

I'm pretty sure (a) would even be refused by the maintainers, unless
there already is a notification being sent. As to (b) - kernel code could
keep track of which segment/bus pairs it informed Xen about, and
hence wouldn't even need to wait for an error to be returned from
the device-add request (which in your proposal would need to be re-
issued after the host-bridge-add).

Have a query on the CFG space address to be passed as hypercall parameter.
The of_pci_get_host_bridge_resource only parses the ranges property and
not reg.
reg property has the CFG space address, which is usually stored in
private pci host controller driver structures.

so pci_dev 's parent pci_bus would not have that info.
One way is to add a method in struct pci_ops but not sure it will be
accepted or not.

I'm afraid I don't understand what you're trying to tell me.

Hi Jan,
I missed this during initial discussion and found out while coding that 
CFG Space address  of a pci host is stored in the reg property
and the of_pci code dos not store reg in the resources only ranges are 
stored. So the pci_bus which is the rootbus created in the probe 
function of the
pcie controller driver will have ranges values in resources but reg 
property value (CFG space address) in the private data.
So from drivers/xen/pci.c we can find out the root bus (pci_bus) from 
the pci_dev (via BUS_NOTIFY) but cannot get the CFG space address.


Now there are 2 ways
a) Add a pci_ops to return the CFG space address
b) Let the pci host controller driver invoke a function 
xen_invoke_hypercall () providing bus number and cfg_space address.
xen_invoke_hypercall would be implemented in drivers/xen/pci.c and 
would issue  PHYSDEVOP_pci_host_bridge_add hypercall

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v6 07/30] PCI: Pass PCI domain number combined with root bus number

2015-03-17 Thread Manish Jaggi



On Tuesday 17 March 2015 07:35 PM, Ian Campbell wrote:

On Tue, 2015-03-17 at 10:45 +0530, Manish Jaggi wrote:

On Monday 09 March 2015 08:04 AM, Yijing Wang wrote:

Now we could pass PCI domain combined with bus number
in u32 argu. Because in arm/arm64, PCI domain number
is assigned by pci_bus_assign_domain_nr(). So we leave
pci_scan_root_bus() and pci_create_root_bus() in arm/arm64
unchanged. A new function pci_host_assign_domain_nr()
will be introduced for arm/arm64 to assign domain number
in later patch.

Hi,
I think these changes might not be required. We have made very few
changes in the xen-pcifront to support PCI passthrough in arm64.
As per xen architecture for a domU only a single pci virtual bus is
created and all passthrough devices are attached to it.

I guess you are only talking about the changes to xen-pcifront.c?
Otherwise you are ignoring the dom0 case which is exposed to the real
set of PCI root complexes and anyway I'm not sure how not needed for
Xen domU translates into not required, since it is clearly required for
other systems.

Strictly speaking the Xen pciif protocol does support multiple buses,
it's just that the tools, and perhaps kernels, have not yet felt any
need to actually make use of that.

There doesn't seem to be any harm in updating pcifront to follow this
generic API change.

ok.

One side question, the function

 pci_host_assign_domain_nr()

which would be introduced in later patch, does it appear to be doing the 
same binding which we are trying to implement via a pci_host_bridge add 
hypercall.




Ian.




___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v2 3/5] xen: print online pCPUs and free pCPUs when dumping

2015-03-17 Thread Juergen Gross


On 03/17/2015 04:33 PM, Dario Faggioli wrote:

e.g., with  `xl debug-key r', like this:

   (XEN) Online Cpus: 0-15
   (XEN) Free Cpus: 8-15

Also, for each cpupool, print the set of pCPUs it
contains, like this:

   (XEN) Cpupool 0:
   (XEN) Cpus: 0-7
   (XEN) Scheduler: SMP Credit Scheduler (credit)

Signed-off-by: Dario Faggioli dario.faggi...@citrix.com


Acked-by: Juergen Gross jgr...@suse.com


Cc: Juergen Gross jgr...@suse.com
Cc: George Dunlap george.dun...@eu.citrix.com
Cc: Jan Beulich jbeul...@suse.com
Cc: Keir Fraser k...@xen.org
---
Changes from v1:
  * _print_cpumap() becomes print_cpumap() (i.e., the
leading '_' was not particularly useful in this
case), as suggested during review
  * changed the output such as (1) we only print the
maps, not the number of elements, and (2) we avoid
printing the free cpus map when empty
  * improved the changelog
---
I'm not including any Reviewed-by / Acked-by tag,
since the patch changed.
---
  xen/common/cpupool.c |   12 
  1 file changed, 12 insertions(+)

diff --git a/xen/common/cpupool.c b/xen/common/cpupool.c
index cd6aab9..812a2f9 100644
--- a/xen/common/cpupool.c
+++ b/xen/common/cpupool.c
@@ -17,6 +17,7 @@
  #include xen/percpu.h
  #include xen/sched.h
  #include xen/sched-if.h
+#include xen/keyhandler.h
  #include xen/cpu.h

  #define for_each_cpupool(ptr)\
@@ -658,6 +659,12 @@ int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op)
  return ret;
  }

+static void print_cpumap(const char *str, const cpumask_t *map)
+{
+cpulist_scnprintf(keyhandler_scratch, sizeof(keyhandler_scratch), map);
+printk(%s: %s\n, str, keyhandler_scratch);
+}
+
  void dump_runq(unsigned char key)
  {
  unsigned longflags;
@@ -671,12 +678,17 @@ void dump_runq(unsigned char key)
  sched_smt_power_savings? enabled:disabled);
  printk(NOW=0x%08X%08X\n,  (u32)(now32), (u32)now);

+print_cpumap(Online Cpus, cpu_online_map);
+if ( cpumask_weight(cpupool_free_cpus) )
+print_cpumap(Free Cpus, cpupool_free_cpus);
+
  printk(Idle cpupool:\n);
  schedule_dump(NULL);

  for_each_cpupool(c)
  {
  printk(Cpupool %d:\n, (*c)-cpupool_id);
+print_cpumap(Cpus, (*c)-cpu_valid);
  schedule_dump(*c);
  }






___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [PATCH v2] Build: Fix stubdom vtpm build failure

2015-03-17 Thread Quan Xu

Typedefs are duplicated in stubdom/vtpmmgr/tcg.h and supported compilers
do not cope with current staging branch.

Signed-off-by: Olaf Hering o...@aepfle.de
Signed-off-by: Quan Xu quan...@intel.com
---
 stubdom/vtpmmgr/common_types.h |9 +
 stubdom/vtpmmgr/tcg.h  |7 +--
 stubdom/vtpmmgr/tpm2.c |   10 +-
 stubdom/vtpmmgr/tpm2.h |6 +++---
 stubdom/vtpmmgr/tpm2_types.h   |   43 +++-
 stubdom/vtpmmgr/vtpmmgr.h  |4 ++--
 6 files changed, 36 insertions(+), 43 deletions(-)
 create mode 100644 stubdom/vtpmmgr/common_types.h

diff --git a/stubdom/vtpmmgr/common_types.h b/stubdom/vtpmmgr/common_types.h
new file mode 100644
index 000..7321bb6
--- /dev/null
+++ b/stubdom/vtpmmgr/common_types.h
@@ -0,0 +1,9 @@
+#ifndef VTPM_COMMON_TYPES
+#define VTPM_COMMON_TYPES 1
+typedef unsigned char BYTE;
+typedef unsigned char BOOL;
+typedef unsigned char UINT8;
+typedef uint16_t UINT16;
+typedef uint32_t UINT32;
+typedef uint64_t UINT64;
+#endif
diff --git a/stubdom/vtpmmgr/tcg.h b/stubdom/vtpmmgr/tcg.h
index 7321ec6..813ce57 100644
--- a/stubdom/vtpmmgr/tcg.h
+++ b/stubdom/vtpmmgr/tcg.h
@@ -39,6 +39,7 @@
 
 #include stdlib.h
 #include stdint.h
+#include common_types.h
 
 //  CONSTANTS *
 
@@ -401,12 +402,6 @@
 
 
 // *** TYPEDEFS *
-typedef unsigned char BYTE;
-typedef unsigned char BOOL;
-typedef uint16_t UINT16;
-typedef uint32_t UINT32;
-typedef uint64_t UINT64;
-
 typedef UINT32 TPM_RESULT;
 typedef UINT32 TPM_PCRINDEX;
 typedef UINT32 TPM_DIRINDEX;
diff --git a/stubdom/vtpmmgr/tpm2.c b/stubdom/vtpmmgr/tpm2.c
index 1903e27..c9f1016 100644
--- a/stubdom/vtpmmgr/tpm2.c
+++ b/stubdom/vtpmmgr/tpm2.c
@@ -167,7 +167,7 @@ egress:
 TPM_RC TPM2_Load(TPMI_DH_OBJECT parentHandle,
  TPM2B_PRIVATE *inPrivate, /* in */
  TPM2B_PUBLIC *inPublic, /* in */
- TPM_HANDLE *objectHandle, /* out */
+ TPM2_HANDLE *objectHandle, /* out */
  TPM2B_NAME *name /* out */)
 {
 TPM_BEGIN(TPM_ST_SESSIONS, TPM_CC_Load);
@@ -185,7 +185,7 @@ TPM_RC TPM2_Load(TPMI_DH_OBJECT parentHandle,
 if (objectHandle != NULL) {
 ptr = unpack_TPM_HANDLE(ptr, objectHandle);
 } else {
-TPM_HANDLE tmp;
+TPM2_HANDLE tmp;
 ptr = unpack_TPM_HANDLE(ptr, tmp);
 }
 
@@ -248,7 +248,7 @@ egress:
 
 TPM_RC TPM2_CreatePrimary(TPMI_RH_HIERARCHY primaryHandle,
   TPM2_Create_Params_in *in,
-  TPM_HANDLE *objHandle,
+  TPM2_HANDLE *objHandle,
   TPM2_Create_Params_out *out)
 {
 UINT32 param_size;
@@ -281,7 +281,7 @@ TPM_RC TPM2_CreatePrimary(TPMI_RH_HIERARCHY primaryHandle,
 if (objHandle != NULL)
 ptr = unpack_TPM_HANDLE(ptr, objHandle);
 else {
-TPM_HANDLE handle;
+TPM2_HANDLE handle;
 ptr = unpack_TPM_HANDLE(ptr, handle);
 }
 ptr = unpack_UINT32(ptr, param_size);
@@ -302,7 +302,7 @@ egress:
return status;
 }
 
-TPM_RC TPM2_HierachyChangeAuth(TPMI_RH_HIERARCHY_AUTH authHandle, TPM2B_AUTH 
*newAuth)
+TPM_RC TPM2_HierachyChangeAuth(TPM2I_RH_HIERARCHY_AUTH authHandle, TPM2B_AUTH 
*newAuth)
 {
 TPM_BEGIN(TPM_ST_SESSIONS, TPM_CC_HierarchyChangeAuth);
 ptr = pack_UINT32(ptr, authHandle);
diff --git a/stubdom/vtpmmgr/tpm2.h b/stubdom/vtpmmgr/tpm2.h
index 9f597ee..9e01286 100644
--- a/stubdom/vtpmmgr/tpm2.h
+++ b/stubdom/vtpmmgr/tpm2.h
@@ -57,7 +57,7 @@ TPM_RC TPM2_PCR_Read(TPML_PCR_SELECTION pcrSelectionIn,
 TPM_RC TPM2_Load(TPMI_DH_OBJECT parentHandle,
  TPM2B_PRIVATE *inPrivate,
  TPM2B_PUBLIC *inPublic,
- TPM_HANDLE *objectHandle,
+ TPM2_HANDLE *objectHandle,
  TPM2B_NAME *name);
 
 TPM_RC TPM2_Create(TPMI_DH_OBJECT parentHandle,
@@ -66,10 +66,10 @@ TPM_RC TPM2_Create(TPMI_DH_OBJECT parentHandle,
 
 TPM_RC TPM2_CreatePrimary(TPMI_RH_HIERARCHY primaryHandle,
   TPM2_Create_Params_in *objHandle,
-  TPM_HANDLE *in,
+  TPM2_HANDLE *in,
   TPM2_Create_Params_out *out);
 
-TPM_RC TPM2_HierachyChangeAuth(TPMI_RH_HIERARCHY_AUTH authHandle,
+TPM_RC TPM2_HierachyChangeAuth(TPM2I_RH_HIERARCHY_AUTH authHandle,
TPM2B_AUTH *newAuth);
 
 TPM_RC TPM2_RSA_ENCRYPT(TPMI_DH_OBJECT keyHandle,
diff --git a/stubdom/vtpmmgr/tpm2_types.h b/stubdom/vtpmmgr/tpm2_types.h
index ac2830d..a07d8f3 100644
--- a/stubdom/vtpmmgr/tpm2_types.h
+++ b/stubdom/vtpmmgr/tpm2_types.h
@@ -3,6 +3,7 @@
 
 #include stdlib.h
 #include stdint.h
+#include common_types.h
 
 // implementation.h
 // Table 212 -- Logic Values
@@ -82,14 +83,6 @@
 #defineMAX_ECC_KEY_BITS 256
 #defineMAX_ECC_KEY_BYTES((MAX_ECC_KEY_BITS + 7) /

Re: [Xen-devel] Any work on sharing of large multi-page segments?

2015-03-17 Thread Jan Beulich

 On 17.03.15 at 01:46, andreww...@gmail.com wrote:
 On 3/16/15, Jan Beulich jbeul...@suse.com wrote:
 So where do you expect the major performance / scalability
 improvement to be gained? Internally to Xen, each page will need
 to be tracked separately anyway, as what appears physically
 contiguous in the granting guest may (and likely will) not be
 contiguous in machine memory (i.e. from Xen's perspective).
 Furthermore the public interface is currently written such that
 grant lengths must be less than 64k. I.e. already at the very
 simple first steps you'd be faced with implementing bigger length
 counterparts of (almost) all existing interfaces.
 
 Since I'm looking to grant OpenGL buffers that can be many megabytes
 in size, I would think there would be a fair bit of overhead if the
 backend domain had to make a hypercall to map every single page of a
 buffer. I guess what I could do would be to add a hypercall that maps
 a contiguous group of grant entries (contiguous in the grant table,
 not necessarily contiguous in memory).

And how would that be significantly different from the batching
that's already built into the grant table hypercall?

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [OSSTEST Nested PATCH 6/6] Compose the main recipe of test-nested test job.

2015-03-17 Thread longtao.pang

From: longtao.pang longtaox.p...@intel.com

---
 sg-run-job |   11 +++
 1 file changed, 11 insertions(+)

diff --git a/sg-run-job b/sg-run-job
index 94d091b..ababebe 100755
--- a/sg-run-job
+++ b/sg-run-job
@@ -297,6 +297,17 @@ proc run-job/test-pair {} {
 #run-ts . remus-failover ts-remus-check src_host dst_host + debian
 }
 
+proc need-hosts/test-nested {} {return host}
+proc run-job/test-nested {} {
+run-ts . = ts-debian-hvm-install + host + nested
+run-ts . = ts-nested-setup + host + nested
+run-ts . = ts-xen-install nested_l1
+run-ts . = ts-host-reboot nested_l1
+run-ts . = ts-debian-hvm-install nested_l1 nested2
+run-ts . = ts-guest-stop nested_l1 nested2
+run-ts . = ts-guest-destroy host nested
+}
+
 proc test-guest-migr {g} {
 if {[catch { run-ts . = ts-migrate-support-check + host $g }]} return
 
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [OSSTEST Nested PATCH 5/6] Add test job for nest test case

2015-03-17 Thread longtao.pang

From: longtao.pang longtaox.p...@intel.com

This patch adds creation of the nested test job, when job creation procedure
is invoked.
'NESTED_OS_IMAGE' is the name of 'Debian ISO Images', which defined in
 standalone.config.
---
 make-flight |   20 
 1 file changed, 20 insertions(+)

diff --git a/make-flight b/make-flight
index 63b14f5..fb35ac4 100755
--- a/make-flight
+++ b/make-flight
@@ -204,6 +204,25 @@ do_hvm_win7_x64_tests () {
 all_hostflags=$most_hostflags,hvm
 }
 
+do_hvm_debian_nested_tests () {
+  if [ $xenarch != amd64 ]; then
+return
+  fi
+  if [ $dom0arch != amd64 ]; then
+return
+  fi
+
+  job_create_test test-$xenarch$kern-$dom0arch-nested test-nested xl \
+   $xenarch $dom0arch \
+nested_image=$NESTED_OS_IMAGE \
+nested2_image=$NESTED_OS_IMAGE \
+bios=seabios \
+kernbuildjob=build-amd64-pvops \
+kernkind=pvops \
+device_model_version=qemu-xen \
+all_hostflags=$most_hostflags,hvm
+}
+
 do_hvm_debian_test_one () {
   testname=$1
   bios=$2
@@ -401,6 +420,7 @@ test_matrix_do_one () {
 done
 
   fi
+  do_hvm_debian_nested_tests
   do_passthrough_tests
 }
 
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [OSSTEST Nested PATCH 4/6] Add new ts-nested-setup script to custmize nested test configuration before the testing.

2015-03-17 Thread longtao.pang

From: longtao.pang longtaox.p...@intel.com

1. In this script, make some appropriate runvars which selecthost would 
recognise.
2. Prepare the configurations for installing L2 guest VM.
3. Create a lv disk in L0 and hot-attach it to L1, need to restart L1 to make
the block disk to be recognized by L1 system, then using this disk to create a 
VG that
used for installing L2.
---
 ts-nested-setup |   54 ++
 1 file changed, 54 insertions(+)
 create mode 100755 ts-nested-setup

diff --git a/ts-nested-setup b/ts-nested-setup
new file mode 100755
index 000..69dc407
--- /dev/null
+++ b/ts-nested-setup
@@ -0,0 +1,54 @@
+#!/usr/bin/perl -w
+# This is part of osstest, an automated testing framework for Xen.
+# Copyright (C) 2015 Intel Inc.
+# 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+# 
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see http://www.gnu.org/licenses/.
+
+use strict qw(vars);
+use DBI;
+use Osstest;
+use Osstest::Debian;
+use Osstest::TestSupport;
+
+tsreadconfig();
+our ($ident_l0,$nested_host) = @ARGV;
+our ($l0,$l1) = ts_get_host_guest(@ARGV);
+
+guest_check_ip($l1);
+target_cmd_root($l1, mkdir -p /home/osstest/.ssh  cp 
/root/.ssh/authorized_keys /home/osstest/.ssh/);
+my $url = $c{WebspaceUrl}.$c{WebspaceCommon}.$l0-{Name}._.'overlay.tar'; 
+target_cmd_root($l1, END);
+wget -O overlay.tar $url
+tar -xf overlay.tar -C /
+rm overlay.tar -f
+update-rc.d osstest-confirm-booted start 99 2 .
+END
+
+store_runvar('nested_l1',$nested_host);
+store_runvar(${nested_host}_ip,$l1-{Ip});
+store_runvar('multi_reboot_time',5);
+
+my $l2_disk_mb = 2;
+my $l2_lv_name = 'nestedl2';
+my $vgname = $l0-{Name};
+$vgname .= .$c{TestHostDomain} if ($l0-{Suite} =~ m/lenny/);
+target_cmd_root($l0, END);
+lvremove -f /dev/${vgname}/${l2_lv_name} ||:
+lvcreate -L ${l2_disk_mb}M -n $l2_lv_name $vgname
+dd if=/dev/zero of=/dev/${vgname}/${l2_lv_name} count=10
+xl block-attach $l1-{Name} /dev/${vgname}/${l2_lv_name},raw,sdb,rw
+END
+target_install_packages_norec($l1, qw(lvm2 rsync genisoimage));
+target_reboot($l1);
+target_cmd_root($l1, pvcreate /dev/xvdb  vgcreate ${l2_lv_name}_vg 
/dev/xvdb);
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [OSSTEST Nested PATCH 2/6] Add and expose some testsupport APIs

2015-03-17 Thread longtao.pang

From: longtao.pang longtaox.p...@intel.com

1. Designate vif model to 'e1000', otherwise, with default device model,
the L1 eth0 interface disappear, hence xenbridge cannot work.
Maybe this limitation can be removed later after some fix it. For now, we
have to accomodate to it.
2. Since reboot L1 guest VM will take more time to boot up, we increase
multi-times for reboot-confirm-booted if test nested job, and the multi value 
is stored as a runvar in 'ts-nested-setup' script. Added another function 
'guest_editconfig_cd' and expose it, this function bascically changes guest
boot device sequence, alter its on_reboot behavior to restart and enabled 
nestedhvm feature.
3. In L2 installation context, its host (L1) IP address is not queried from DNS,
but after running ts-nested-setup + host + nested, L1 IP is stored in runvar.
---
 Osstest/TestSupport.pm |   21 +++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/Osstest/TestSupport.pm b/Osstest/TestSupport.pm
index e1ebcb7..57c7167 100644
--- a/Osstest/TestSupport.pm
+++ b/Osstest/TestSupport.pm
@@ -111,6 +111,7 @@ BEGIN {
   iso_gen_flags_basic
   iso_copy_content_from_image
   guest_editconfig_nocd
+  guest_editconfig_cd
   );
 %EXPORT_TAGS = ( );
 
@@ -819,6 +820,10 @@ sub selecthost ($) {
logm(Host $name is in HostGroup $ho-{Properties}{HostGroup});
 }
 
+if ( $r{${name}_ip} ) {
+$setprop-(IpAddr, $r{${name}_ip});
+}
+
 # Next, we use the config file's general properites as defaults
 foreach my $k (keys %c) {
next unless $k =~ m/^HostProp_([A-Z].*)$/;
@@ -990,8 +995,9 @@ sub common_toolstack ($) {
 
 sub host_reboot ($) {
 my ($ho) = @_;
+my $wait_time = $r{multi_reboot_time} ? $r{multi_reboot_time}*40 : 40;
 target_reboot($ho);
-poll_loop(40,2, 'reboot-confirm-booted', sub {
+poll_loop($wait_time,2, 'reboot-confirm-booted', sub {
 my $output;
 if (!eval {
 $output= target_cmd_output($ho, END, 40);
@@ -1541,7 +1547,7 @@ sub prepareguest_part_xencfg ($) {
 my $xencfg= END;
 name= '$gho-{Name}'
 memory = ${ram_mb}
-vif = [ 'type=ioemu,mac=$gho-{Ether}' ]
+vif = [ 'type=ioemu,model=e1000,mac=$gho-{Ether}' ]
 #
 on_poweroff = '$onpoweroff'
 on_reboot   = '$onreboot'
@@ -2113,4 +2119,15 @@ sub guest_editconfig_nocd ($$) {
 });
 }
 
+sub guest_editconfig_cd ($) {
+my ($gho) = @_;
+guest_editconfig($gho-{Host}, $gho, sub {
+if (m/^\s*boot\s*= '\s*d\s*c\s*'/) {
+s/dc/cd/;
+}
+s/^on_reboot.*/on_reboot='restart'/;
+s/#nestedhvm/nestedhvm/;
+});
+}
+
 1;
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [OSSTEST Nested PATCH 0/6] Introduction of netsted HVM test job

2015-03-17 Thread longtao.pang

This patch set adds nested HVM test case for osstest.
In this test case, a Xen hypervisor (L1) runs on top of another Xen hypervisor 
(L0). 
Upon L1 hypervisor, we will then create a nested guest (L2), and test if the 
Linux guest can then be installed and run well.
About nested Xen virtualization, refer to 
http://wiki.xenproject.org/wiki/Nested_Virtualization_in_Xen.

Test steps
0. To run osstest in standalone mode, write a config file in
   '~/.xen-osstest/config', and then create a standalone.config file to
   define 'TREE_LINUX', 'REVISION_LINUX' and 'NESTED_OS_IMAGE' which
   will be used for nested test. The directry path of 'Debian Images'
   could be difined in '~/.xen-osstest/config'. 
1. run 'build-amd64' job and then 'build-amd64-pvops', to prepare xen
   installation tarball and hvm guest kernel.
2. run 'test-amd64-amd6-nested' job, it does following:
a. invoke test step of 'ts-debain-hvm-install' to install 
   a normal HVM guest
b. invoke test step of 'ts-nested-setup' to make some
   appropriate runvars which selecthost would recognise and
   prepare the configurations for installing L2 guest VM. 
c. invoke test step of 'ts-xen-install' to install xen on 
   the normal guest, alter it into a L1 hypervisor
d. invoke test step of 'ts-debain-hvm-install' again, but 
   take the L1 hypervisor as host, install the L2 guest on it
e. invoke test step of 'ts-guest-stop', stop L2 guest.
f. invoke test step of 'ts-guest-destroy' to destroy L1 guest.

This patch set reuses 'ts-debian-hvm-install' for both L1 installation and L2 
installation, define 'nested' and 'nested2' as L1 and L2's hostname, define 
'nested_l1 as L1's host ident.
It also reuses 'ts-xen-install' with 'nested' input param to differentiate L1 
Xen installation from L0 Xen installation.
This patch series has been tested on test machines of amd64 arch, 
debian-7.6.0-amd64 as guests OS, with hvm domain0 of Linux kernel 3.18.5, in 
standalone mode.

longtao.pang (6):
  parsing grub which has 'submenu' primitive
  Add and expose some testsupport APIs
  Changes on test step of debain hvm guest install in hvm.
  Add new ts-nested-setup script to custmize nested test configuration 
before the testing.
  Add test job for nest test case
  Compose the main recipe of test-nested test job.

 Osstest/Debian.pm  |   52 

 Osstest/TestSupport.pm |   21 +++--
 make-flight|   20 
 sg-run-job |   11 +++
 ts-debian-hvm-install  |   14 +++---
 ts-nested-setup|   54 
++
 6 files changed, 147 insertions(+), 25 deletions(-)
 create mode 100755 ts-nested-setup


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [OSSTEST Nested PATCH 3/6] Changes on test step of debain hvm guest install in hvm.

2015-03-17 Thread longtao.pang

From: longtao.pang longtaox.p...@intel.com

1. Increase disk size to accomodate to nested test requirment.
2. Since 'Debain-xxx-.iso' image will be stored there, therefore needs more
disk capacity, increase root partition size in preseed generation.
3. In L1 installation context, assign more memory to it; since it
acts as a nested hypervisor anyway.
4. In hvm guest configuration file, add '#nestedhvm=1',
which will later be uncommented by guest_editconfig_cd() after
xen installed in L1, and about to boot into a nested xen environment.
---
 ts-debian-hvm-install |   14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/ts-debian-hvm-install b/ts-debian-hvm-install
index 449b96c..f334e30 100755
--- a/ts-debian-hvm-install
+++ b/ts-debian-hvm-install
@@ -36,7 +36,7 @@ our $ho= selecthost($whhost);
 
 # guest memory size will be set based on host free memory, see below
 our $ram_mb;
-our $disk_mb= 1;
+our $disk_mb= 15000;
 
 our $guesthost= $gn.guest.osstest;
 our $gho;
@@ -60,7 +60,7 @@ d-i partman-auto/expert_recipe string \\
 use_filesystem{ } filesystem{ vfat } \\
 mountpoint{ /boot/efi } \\
 . \\
-5000 50 5000 ext4 \\
+1 50 1 ext4 \\
 method{ format } format{ } \\
 use_filesystem{ } filesystem{ ext4 } \\
 mountpoint{ / } \\
@@ -152,6 +152,7 @@ sub prep () {
 more_prepareguest_hvm($ho,$gho, $ram_mb, $disk_mb,
   OnReboot = 'preserve',
   Bios = $r{bios},
+  ExtraConfig = '#nestedhvm=1',
   PostImageHook = sub {
 my $cmds = iso_copy_content_from_image($gho, $newiso);
 $cmds .= prepare_initrd($initrddir,$newiso,$preseed_file_path);
@@ -173,6 +174,8 @@ my $ram_minslop = 100;
 my $ram_lots = 5000;
 if ($host_freemem_mb  $ram_lots * 2 + $ram_minslop) {
 $ram_mb = $ram_lots;
+} elsif ($gn eq 'nested') {
+$ram_mb = 3072;
 } else {
 $ram_mb = 768;
 }
@@ -189,7 +192,12 @@ if ($stage2) {
 guest_destroy($gho);
 }
 
-guest_editconfig_nocd($gho,$emptyiso);
+if ($gn eq 'nested') {
+guest_editconfig_cd($gho);
+} else {
+guest_editconfig_nocd($gho,$emptyiso);
+}
+
 guest_create($gho);
 guest_await_dhcp_tcp($gho,300);
 guest_check_up($gho);
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [OSSTEST Nested PATCH 1/6] parsing grub which has 'submenu' primitive

2015-03-17 Thread longtao.pang

From: longtao.pang longtaox.p...@intel.com

From a hvm kernel build from Linux stable Kernel tree,
the auto generated grub2 menu will have 'submenu' primitive, upon the
'menuentry' items. Xen boot entries will be grouped into a submenu. This
patch adds capability to support such grub formats.
---
 Osstest/Debian.pm |   52 
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/Osstest/Debian.pm b/Osstest/Debian.pm
index e3e1c90..9fdacd7 100644
--- a/Osstest/Debian.pm
+++ b/Osstest/Debian.pm
@@ -1,5 +1,6 @@
 # This is part of osstest, an automated testing framework for Xen.
 # Copyright (C) 2009-2013 Citrix Inc.
+# Copyright (C) 2014-2015 Intel Inc.
 # 
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
@@ -362,26 +363,34 @@ sub setupboot_grub2 ($$$) {
 
 my $count= 0;
 my $entry;
+my $submenu;
 while ($f) {
 next if m/^\s*\#/ || !m/\S/;
 if (m/^\s*\}\s*$/) {
-die unless $entry;
+die unless $entry || $submenu;
+   if(!defined $entry  defined $submenu){
+ logm(Met end of a submenu starting from .
+   $submenu-{StartLine}. .
+   Our want kern is $want_kernver);
+ $submenu=undef;
+ next;
+   }
 my (@missing) =
-grep { !defined $entry-{$_} } 
-   (defined $xenhopt
-? qw(Title Hv KernDom0 KernVer)
-: qw(Title Hv KernOnly KernVer));
-   if (@missing) {
-   logm((skipping entry at $entry-{StartLine};.
- no @missing));
-   } elsif (defined $want_kernver 
-$entry-{KernVer} ne $want_kernver) {
-   logm((skipping entry at $entry-{StartLine};.
- kernel $entry-{KernVer}, not $want_kernver));
-   } else {
-   # yes!
-   last;
-   }
+grep { !defined $entry-{$_} }
+(defined $xenhopt
+ ? qw(Title Hv KernDom0 KernVer)
+ : qw(Title Hv KernOnly KernVer));
+if (@missing) {
+logm((skipping entry at $entry-{StartLine};.
+  no @missing));
+} elsif (defined $want_kernver 
+ $entry-{KernVer} ne $want_kernver) {
+logm((skipping entry at $entry-{StartLine};.
+  kernel $entry-{KernVer}, not $want_kernver));
+} else {
+# yes!
+last;
+}
 $entry= undef;
 next;
 }
@@ -393,21 +402,24 @@ sub setupboot_grub2 ($$$) {
 $entry= { Title = $1, StartLine = $., Number = $count };
 $count++;
 }
-if (m/^\s*multiboot\s*\/(xen\-[0-9][-+.0-9a-z]*\S+)/) {
+   if(m/^submenu\s+[\'\](.*)[\'\].*\{\s*$/){
+   $submenu={ StartLine =$.};
+   }
+if (m/^\s*multiboot\s*(?:\/boot)*\/(xen\S+)/) {
 die unless $entry;
 $entry-{Hv}= $1;
 }
-if (m/^\s*multiboot\s*\/(vmlinu[xz]-(\S+))/) {
+   if (m/^\s*multiboot\s*(?:\/boot)*\/(vmlinu[xz]-(\S+))/) {
 die unless $entry;
 $entry-{KernOnly}= $1;
 $entry-{KernVer}= $2;
 }
-if (m/^\s*module\s*\/(vmlinu[xz]-(\S+))/) {
+   if (m/^\s*module\s*(?:\/boot)*\/(vmlinu[xz]-(\S+))/) {
 die unless $entry;
 $entry-{KernDom0}= $1;
 $entry-{KernVer}= $2;
 }
-if (m/^\s*module\s*\/(initrd\S+)/) {
+   if (m/^\s*module\s*(?:\/boot)*\/(initrd\S+)/) {
 $entry-{Initrd}= $1;
 }
 }
-- 
1.7.10.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 2/6] x86: add support for COS/CBM manangement

2015-03-17 Thread Chao Peng

On Fri, Mar 13, 2015 at 09:53:37AM -0400, Konrad Rzeszutek Wilk wrote:
  +xfree(d-arch.psr_cos_ids);
 
 And d-arch.psr_cos_ids = NULL
 
 (however this depends on who calls psr_domain_init, which
 is unclear to me).
 
  +}
  +
  +int psr_domain_init(struct domain *d)
  +{
  +if ( cat_socket_info )
  +{
  +d-arch.psr_cos_ids = xzalloc_array(unsigned int, opt_socket_num);
  +if ( !d-arch.psr_cos_ids )
  +return -ENOMEM;
  +}
  +
  +return 0;
  +}
  +
  +void psr_domain_free(struct domain *d)
 
 So who calls this?

This function is called by arch_domain_destroy() or the failure path of
arch_domain_create(). In both cases the domain structure will be freed
later so setting NULL is pointless.

Thanks,
Chao
 
  +{
  +psr_free_rmid(d);
  +psr_free_cos(d);
  +}
  +

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] RFC: [PATCH 1/3] Enhance platform support for PCI

2015-03-17 Thread Jan Beulich

 On 17.03.15 at 06:26, mja...@caviumnetworks.com wrote:
 In drivers/xen/pci.c on notification BUS_NOTIFY_ADD_DEVICE dom0 issues a 
 hypercall to inform xen that a new pci device has been added.
 If we were to inform xen about a new pci bus that is added there are  2 ways
 a) Issue the hypercall from drivers/pci/probe.c
 b) When a new device is found (BUS_NOTIFY_ADD_DEVICE) issue 
 PHYSDEVOP_pci_device_add hypercall to xen, if xen does not finds that 
 segment number (s_bdf), it will return an error
 SEG_NO_NOT_FOUND. After that the linux xen code could issue the 
 PHYSDEVOP_pci_host_bridge_add hypercall.
 
 I think (b) can be done with minimal code changes. What do you think ?

I'm pretty sure (a) would even be refused by the maintainers, unless
there already is a notification being sent. As to (b) - kernel code could
keep track of which segment/bus pairs it informed Xen about, and
hence wouldn't even need to wait for an error to be returned from
the device-add request (which in your proposal would need to be re-
issued after the host-bridge-add).

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [v2][PATCH 2/2] libxl: introduce gfx_passthru_kind

2015-03-17 Thread Chen, Tiejun


If I remember the context correctly this is in the autodetect case,
so I think shouldn't mention IGD. Something like Unable to detect
graphics passthru kind, please set gfx_passthru_kind. See xl.cfg(5)
for more


s/gfx_passthru_kind/gfx_passthru, right? Because actually we always get
'gfx_passthru_kind' from 'gfx_passthru'.


I think you have it backwards.

In the case here gfx_passthru=1 has been set by the user, but
gfx_passthru_kind=DEFAULT. So libxl has tried to autodetect but it has
failed.

So if the user wants to make progress they should set gfx_passthru_kind
to whatever type of passthrough they were trying to do.


Looks you're saying 'gfx_passthru_kind' shouldn't reply on 
'gfx_passthru', so 'gfx_passthru_kind' can override that previous value 
parsed from 'gfx_passthru', right?




Alternatively I suppose you could recommend removing gfx_passthru=1 (or


I'm still keep that currently.


changing to=0), but given they've set =1 that doesn't seem to be the
most productive suggestion.



So looks the whole policy should be something like this,

diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index 5c40e84..5518759 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -1953,8 +1953,27 @@ skip_vfb:
 xlu_cfg_replace_string (config, spice_streaming_video,
 b_info-u.hvm.spice.streaming_video, 0);
 xlu_cfg_get_defbool(config, nographic, 
b_info-u.hvm.nographic, 0);

-xlu_cfg_get_defbool(config, gfx_passthru,
-b_info-u.hvm.gfx_passthru, 0);
+if (!xlu_cfg_get_long(config, gfx_passthru, l, 1)) {
+libxl_defbool_set(b_info-u.hvm.gfx_passthru, l);
+} else if (!xlu_cfg_get_string(config, gfx_passthru, buf, 0)) {
+if (libxl_gfx_passthru_kind_from_string(buf,
+ 
b_info-u.hvm.gfx_passthru_kind)) {

+fprintf(stderr,
+ERROR: invalid value \%s\ for 
\gfx_passthru\\n,

+buf);
+exit (1);
+}
+libxl_defbool_set(b_info-u.hvm.gfx_passthru, true);
+}
+if (!xlu_cfg_get_string(config, gfx_passthru_kind, buf, 0)) {
+if (libxl_gfx_passthru_kind_from_string(buf,
+ 
b_info-u.hvm.gfx_passthru_kind)) {

+fprintf(stderr,
+ERROR: invalid value \%s\ for 
\gfx_passthru_kind\\n,

+buf);
+exit (1);
+}
+}
 switch (xlu_cfg_get_list_as_string_list(config, serial,

b_info-u.hvm.serial_list,
 1))

Thanks
Tiejun

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 04/10] xen/blkfront: separate ring information to an new struct

2015-03-17 Thread Bob Liu

Hi Felipe,

On 03/06/2015 06:30 PM, Felipe Franciosi wrote:
 -Original Message-
 From: Bob Liu [mailto:bob@oracle.com]
 Sent: 05 March 2015 00:47
 To: Konrad Rzeszutek Wilk
 Cc: Roger Pau Monne; Felipe Franciosi; David Vrabel; xen-devel@lists.xen.org;
 linux-ker...@vger.kernel.org; ax...@fb.com; h...@infradead.org;
 avanzini.aria...@gmail.com; cheg...@amazon.de
 Subject: Re: [PATCH 04/10] xen/blkfront: separate ring information to an new
 struct

 ...snip...

 Meaning you weren't able to do the same test?

 I can if there are more details about how to set up this 5 and 10 guests
 environment and test pattern have been used.
 Just think it might be save time if somebody still have the similar 
 environment
 by hand.
 Roger and Felipe, if you still have the environment could you please have a
 quick compare about feature-persistent performance with patch [PATCH v5 0/2]
 gnttab: Improve scaleability?

 I've been meaning to do that. I don't have the environment up, but it isn't 
 too hard to put it back together. A bit swamped at the moment, but will try 
 (very hard) to do it next week.

Do you have gotten any testing result?

-- 
Regards,
-Bob

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 1/6] x86: detect and initialize Intel CAT feature

2015-03-17 Thread Chao Peng

On Fri, Mar 13, 2015 at 09:40:13AM -0400, Konrad Rzeszutek Wilk wrote:
 On Fri, Mar 13, 2015 at 06:13:20PM +0800, Chao Peng wrote:
  Detect Intel Cache Allocation Technology(CAT) feature and store the
  cpuid information for later use. Currently only L3 cache allocation is
  supported. The L3 CAT features may vary among sockets so per-socket
  feature information is stored. The initialization can happen either at
  boot time or when CPU(s) is hot plugged after booting.
  
  Signed-off-by: Chao Peng chao.p.p...@linux.intel.com
  ---
   docs/misc/xen-command-line.markdown |  15 +++-
   xen/arch/x86/psr.c  | 151 
  +---
   xen/include/asm-x86/cpufeature.h|   1 +
   3 files changed, 155 insertions(+), 12 deletions(-)
  
  +cat_cpu_init(smp_processor_id());
 
 Do 'if (!cat_cpu_init(..)).`'
 
 as the CPU might not support this.
 
 At which point you should also free the cat_socket_info and
 not register the cpu notifier.

Even the booting CPU does not support this, other CPUs may still support
this. Generally the feature is a per-socket feature. So break here is
not the intention.

Except this, all other comments will be addressed by the next version.
Thanks for your time.

Chao
 
  +register_cpu_notifier(cpu_nfb);
  +}
  +

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [RFC PATCH] dpci: Put the dpci back on the list if running on another CPU.

2015-03-17 Thread Jan Beulich

 On 16.03.15 at 18:59, konrad.w...@oracle.com wrote:
 Hence was wondering if it would just be easier to put
 this patch in (see above) - with the benfit that folks have
 an faster interrupt passthrough experience and then I work on another
 variant of this with tristate cmpxchg and -mapping atomic counter.

Considering how long this issue has been pending I think we really
need to get _something_ in (or revert); if this something is the
patch in its most recent form, so be it (even if maybe not the
simplest of all possible variants). So please submit as a proper non-
RFC patch.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [RFC PATCH] dpci: Put the dpci back on the list if running on another CPU.

2015-03-17 Thread Sander Eikelenboom


Tuesday, March 17, 2015, 9:18:32 AM, you wrote:

 On 16.03.15 at 18:59, konrad.w...@oracle.com wrote:
 Hence was wondering if it would just be easier to put
 this patch in (see above) - with the benfit that folks have
 an faster interrupt passthrough experience and then I work on another
 variant of this with tristate cmpxchg and -mapping atomic counter.

 Considering how long this issue has been pending I think we really
 need to get _something_ in (or revert); if this something is the
 patch in its most recent form, so be it (even if maybe not the
 simplest of all possible variants). So please submit as a proper non-
 RFC patch.

 Jan

I'm still running with this first simple stopgap patch from Konrad,
and it has worked fine for me since.

I will see if this new one also works-for-me, somewhere today :-)

--
Sander



diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index ae050df..d1421b0 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -804,7 +804,18 @@ static void dpci_softirq(void)
 d = pirq_dpci-dom;
 smp_mb(); /* 'd' MUST be saved before we set/clear the bits. */
 if ( test_and_set_bit(STATE_RUN, pirq_dpci-state) )
-BUG();
+{
+unsigned long flags;
+
+/* Put back on the list and retry. */
+local_irq_save(flags);
+list_add_tail(pirq_dpci-softirq_list, this_cpu(dpci_list));
+local_irq_restore(flags);
+
+raise_softirq(HVM_DPCI_SOFTIRQ);
+continue;
+}
+
 /*
  * The one who clears STATE_SCHED MUST refcount the domain.
  */


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] Process context switch handling by xen

2015-03-17 Thread Tamas K Lengyel

Yes there is. Take a look at this sample code in LibVMI:
https://github.com/libvmi/libvmi/blob/master/examples/event-example.c

Cheers,
Tamas

On Mon, Mar 16, 2015 at 11:12 PM, HANNAS YAYA Issa
issa.hannasy...@enseeiht.fr wrote:
 Hello
 I want to know when there is context switch between processes in guest the
 xen hypervisor can handle this switch.
 Is there any possibility to get this context switch from hypervisor?

 ___
 Xen-devel mailing list
 Xen-devel@lists.xen.org
 http://lists.xen.org/xen-devel

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 1/6] x86: detect and initialize Intel CAT feature

2015-03-17 Thread Chao Peng

On Mon, Mar 16, 2015 at 01:47:06PM +, Jan Beulich wrote:
  On 13.03.15 at 11:13, chao.p.p...@linux.intel.com wrote:
  @@ -1112,6 +1117,12 @@ The following resources are available:
 total/local memory bandwidth. Follow the same options with Cache 
  Monitoring
 Technology.
   
  +* Cache Alllocation Technology (Broadwell and later).  Information 
  regarding
  +  the cache allocation.
  +  * `cat` instructs Xen to enable/disable Cache Allocation Technology.
  +  * `socket_num` indicates socket number. Detecte automatically at boot 
  time
  +if not specified(0). Useful for CPU hot-plug case.
 
 While saying something, at least for me what is being said doesn't at
 all make clear what socket number here is: The number of a specific
 socket? The number of sockets? Yet something else? Without knowing
 what it means by _just_ reading this description, people won't know
 what to use the command line option for.

I agree, the name is a little confusing. Or perhaps: max_socket? E.g.

`max_socket` indicates the maximum number of available sockets for CAT
feature detection. All the sockets up to max_socket will be checked for
CAT feature. The value is normally detected at boot time automatically
if not specified(0). While the value may need to be specified manually
for CPU hot-plug scenario in which case the maximum socket number detected
at boot time may be not correct.

 
  -static void __init init_psr_cmt(unsigned int rmid_max)
  +static void __init psr_cmt_init(unsigned int rmid_max)
 
 Is this renaming really an integral part of this patch?

OK, I will move it to separate one or just keep it unchanged.

 
  +on_selected_cpus(cpumask_of(cpu), do_cat_cpu_init, info, 0);
 
 Hmm, using an IPI here seems odd. Is there a reason to can't hook
 this onto CPU_STARTING instead of CPU_ONLINE?

Looks like CPU_STARTING is what I need, IPI then is unnecessary.

 
  +static unsigned int get_max_socket(void)
  +{
  +unsigned int cpu, max_apicid = boot_cpu_physical_apicid;
  +
  +for_each_present_cpu(cpu)
  +if (max_apicid  x86_cpu_to_apicid[cpu])
 
 Coding style.
 
  +max_apicid = x86_cpu_to_apicid[cpu];
  +
  +return apicid_to_socket(max_apicid);
 
 Since when is the socket with the highest numbered APIC ID
 the highest numbered socket? I think the whole function needs
 to act on socket numbers only.

Then perhaps looping cpus and checking against cpu_to_socket(cpu)
directly make sense.

Thanks Jan.
Chao

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 1/6] x86: detect and initialize Intel CAT feature

2015-03-17 Thread Jan Beulich

 On 17.03.15 at 09:48, chao.p.p...@linux.intel.com wrote:
 On Mon, Mar 16, 2015 at 01:47:06PM +, Jan Beulich wrote:
  On 13.03.15 at 11:13, chao.p.p...@linux.intel.com wrote:
  @@ -1112,6 +1117,12 @@ The following resources are available:
 total/local memory bandwidth. Follow the same options with Cache 
  Monitoring
 Technology.
   
  +* Cache Alllocation Technology (Broadwell and later).  Information 
  regarding
  +  the cache allocation.
  +  * `cat` instructs Xen to enable/disable Cache Allocation Technology.
  +  * `socket_num` indicates socket number. Detecte automatically at boot 
  time
  +if not specified(0). Useful for CPU hot-plug case.
 
 While saying something, at least for me what is being said doesn't at
 all make clear what socket number here is: The number of a specific
 socket? The number of sockets? Yet something else? Without knowing
 what it means by _just_ reading this description, people won't know
 what to use the command line option for.
 
 I agree, the name is a little confusing. Or perhaps: max_socket? E.g.
 
 `max_socket` indicates the maximum number of available sockets for CAT
 feature detection. All the sockets up to max_socket will be checked for
 CAT feature. The value is normally detected at boot time automatically
 if not specified(0). While the value may need to be specified manually
 for CPU hot-plug scenario in which case the maximum socket number detected
 at boot time may be not correct.

Thanks, yes - max_socket or (perhaps more intuitive to users)
socket_count or num_sockets.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [RFC PATCH] dpci: Put the dpci back on the list if running on another CPU.

2015-03-17 Thread Konrad Rzeszutek Wilk


  Additionally I think it should be considered whether the bitmap
  approach of interpreting -state is the right one, and we don't
  instead want a clean 3-state (idle, sched, run) model.
  
  Could you elaborate a bit more please? As in three different unsigned int
  (or bool_t) that set in what state we are in?
 
 An enum { STATE_IDLE, STATE_SCHED, STATE_RUN }. Especially
 if my comment above turns out to be wrong, you'd have no real
 need for the SCHED and RUN flags to be set at the same time.

I cobbled what I believe is what you were thinking off.

As you can see to preserve the existing functionality such as
being able to schedule N amount of interrupt injections
for the N interrupts we might get - I modified '-masked'
to be an atomic counter.

The end result is that we can still live-lock. Unless we:
 - Drop on the floor the injection of N interrupts and
   just deliever at max one per VMX_EXIT (and not bother
   with interrupts arriving when we are in the VMX handler).

 - Alter the softirq code slightly - to have an variant
   which will only iterate once over the pending softirq
   bits per call. (so save an copy of the bitmap on the
   stack when entering the softirq handler - and use that.
   We could also xor it against the current to catch any
   non-duplicate bits being set that we should deal with).

Here is the compile, but not run-time tested patch.

From e7d8bcd7c5d32c520554a4ad69c4716246036002 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk konrad.w...@oracle.com
Date: Tue, 17 Mar 2015 13:31:52 -0400
Subject: [RFC PATCH] dpci: Switch to tristate instead of bitmap

*TODO*:
 - Writeup.
 - Tests

Suggested-by: Jan Beulich jbuel...@suse.com
Signed-off-by: Konrad Rzeszutek Wilk konrad.w...@oracle.com
---
 xen/drivers/passthrough/io.c | 140 ---
 xen/include/xen/hvm/irq.h|   4 +-
 2 files changed, 82 insertions(+), 62 deletions(-)

diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index ae050df..663e104 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -30,42 +30,28 @@
 static DEFINE_PER_CPU(struct list_head, dpci_list);
 
 /*
- * These two bit states help to safely schedule, deschedule, and wait until
- * the softirq has finished.
- *
- * The semantics behind these two bits is as follow:
- *  - STATE_SCHED - whoever modifies it has to ref-count the domain (-dom).
- *  - STATE_RUN - only softirq is allowed to set and clear it. If it has
- *  been set hvm_dirq_assist will RUN with a saved value of the
- *  'struct domain' copied from 'pirq_dpci-dom' before STATE_RUN was set.
- *
- * The usual states are: STATE_SCHED(set) - STATE_RUN(set) -
- * STATE_SCHED(unset) - STATE_RUN(unset).
- *
- * However the states can also diverge such as: STATE_SCHED(set) -
- * STATE_SCHED(unset) - STATE_RUN(set) - STATE_RUN(unset). That means
- * the 'hvm_dirq_assist' never run and that the softirq did not do any
- * ref-counting.
- */
-
-enum {
-STATE_SCHED,
-STATE_RUN
-};
-
-/*
  * This can be called multiple times, but the softirq is only raised once.
- * That is until the STATE_SCHED state has been cleared. The state can be
- * cleared by: the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'),
- * or by 'pt_pirq_softirq_reset' (which will try to clear the state before
+ * That is until state is in init. The state can be changed  by:
+ * the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'),
+ * or by 'pt_pirq_softirq_reset' (which will try to init the state before
  * the softirq had a chance to run).
  */
 static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
 {
 unsigned long flags;
 
-if ( test_and_set_bit(STATE_SCHED, pirq_dpci-state) )
+switch ( cmpxchg(pirq_dpci-state, STATE_INIT, STATE_SCHED) )
+{
+case STATE_RUN:
+case STATE_SCHED:
+/*
+ * The pirq_dpci-mapping has been incremented to let us know
+ * how many we have left to do.
+ */
 return;
+case STATE_INIT:
+break;
+}
 
 get_knownalive_domain(pirq_dpci-dom);
 
@@ -85,7 +71,7 @@ static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
  */
 bool_t pt_pirq_softirq_active(struct hvm_pirq_dpci *pirq_dpci)
 {
-if ( pirq_dpci-state  ((1  STATE_RUN) | (1  STATE_SCHED)) )
+if ( pirq_dpci-state != STATE_INIT )
 return 1;
 
 /*
@@ -109,22 +95,22 @@ static void pt_pirq_softirq_reset(struct hvm_pirq_dpci 
*pirq_dpci)
 
 ASSERT(spin_is_locked(d-event_lock));
 
-switch ( cmpxchg(pirq_dpci-state, 1  STATE_SCHED, 0) )
+switch ( cmpxchg(pirq_dpci-state, STATE_SCHED, STATE_INIT) )
 {
-case (1  STATE_SCHED):
+case STATE_SCHED:
 /*
- * We are going to try to de-schedule the softirq before it goes in
- * STATE_RUN. Whoever clears STATE_SCHED MUST refcount the 'dom'.
+ * We are going to try to de-schedule the softirq before it goes to
+ * running state. Whoever

Re: [Xen-devel] [PATCH 3/6] X86: improve psr scheduling code

2015-03-17 Thread Chao Peng

On Mon, Mar 16, 2015 at 04:53:35PM +, Jan Beulich wrote:
  On 13.03.15 at 11:13, chao.p.p...@linux.intel.com wrote:
  @@ -1473,11 +1471,10 @@ static void __context_switch(void)
   }
   vcpu_restore_fpu_eager(n);
   n-arch.ctxt_switch_to(n);
  -
  -if ( psr_cmt_enabled()  n-domain-arch.psr_rmid  0 )
  -psr_assoc_rmid(n-domain-arch.psr_rmid);
   }
   
  +psr_ctxt_switch_to(n-domain);
 
 But you now potentially do the MSR write despite it already being
 the needed value (e.g. when switching between an idle vCPU and
 another one also having RMID zero).

Not really. The MSR is already updated lazily in psr_assoc_rmid().

Chao

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 2/6] x86: add support for COS/CBM manangement

2015-03-17 Thread Chao Peng

On Tue, Mar 17, 2015 at 09:25:48AM +, Jan Beulich wrote:
  On 17.03.15 at 10:11, chao.p.p...@linux.intel.com wrote:
  On Mon, Mar 16, 2015 at 05:10:32PM +, Jan Beulich wrote:
   On 13.03.15 at 11:13, chao.p.p...@linux.intel.com wrote:
   +else
   +{
   +unsigned int cpu = cpumask_check(get_socket_cpu(socket));
  
  Isn't this going to trigger an assertion when the socket count got
  specified on the command line?
  
  Yes, the assertion is needed in tools side anyway. Here the check seems
  unnecessary.
 
 No - if the get_socket_cpu() may return nr_cpu_ids, you need a
 check. Just not in the form of an assertion.

Right, error code can be returned.

 
   +if( !d-arch.psr_cos_ids )
   +return;
  
  Considering this check ...
  
   +for ( socket = 0; socket  opt_socket_num; socket++)
   +{
   +cos = d-arch.psr_cos_ids[socket];
   +if ( cos == 0 )
   +continue;
   +
   +map = cat_socket_info[socket].cos_cbm_map;
   +if ( map )
   +map[cos].ref--;
   +}
   +
   +xfree(d-arch.psr_cos_ids);
  
  ... I think you want to clear the pointer here.
  
  This function is called by arch_domain_destroy() or the failure path of
  arch_domain_create(). In both cases the domain structure will be freed
  later so setting NULL is pointless.
 
 But this is not visible from this function. The alternative to not
 clearing the pointer here would be to do the checks of the
 pointer in the callers, which then would make clear whether this
 is really only on error and domain destruction paths (and that
 hence the pointer can't be double freed).

Then I'd like to clear the pointer here, so that even in the future the
code will not surprise somebody.

 
   @@ -222,6 +422,17 @@ static void do_cat_cpu_init(void* data)
info-cbm_len = (eax  0x1f) + 1;
  
  This means cbm_len = 32. Why is cos_cbm_map[].cbm then a
  uint64_t?
  
  Currently the cbm_len is EAX[4:0], 64 bits cbm here is for future
  possible enhancement.
 
 Unless the specification explicitly says that the field width in EAX
 may be extended in the future, please omit such preparations
 for possible future extensions - it is simply going to confuse
 readers (as it did for me already) trying to understand why the
 type of the field is what it is.

I will try to understand this internally, if 32 bit is enough, then it
will be used.

Thanks,
Chao

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] osstest going offline for a bit due to database server move

2015-03-17 Thread Ian Campbell

On Mon, 2015-03-16 at 12:41 +, Ian Campbell wrote:
 We've not yet tracked down the source of the mysterious filer reboots
 and there was another earlier today, we've fiddled with a few things to
 see if we can track them down.
 
 osstest is doing stuff now, fingers crossed.

There were some more reboots overnight. We've made another config change
which we hope will resolve things. If not we will look at moving the
controller VM to another filer tomorrow.

In the meantime in an attempt to try and keep some of the more important
branches flowing with the limited bandwidth between reboots I've stopped
a bunch of stuff:

$ cd ~/testing.git
$ touch bisect.stop
$ for i in linux-{2.6.39,3.4,3.10,3.16} linux-arm-xen linux-linus
linux-next ovmf seabios xen-4.0-testing xen-4.1-testing qemu-mainline
qemu-upstream-4.2-testing ; do touch $i.stop; done
t$ touch rumpuserxen.stop
$ ls *.stop
bisect.stoplinux-3.10.stop  linux-3.4.stop  linux-linus.stop  
ovmf.stop   qemu-upstream-4.2-testing.stop  seabios.stop  
xen-4.1-testing.stop
linux-2.6.39.stop  linux-3.16.stop  linux-arm-xen.stop  linux-next.stop   
qemu-mainline.stop  rumpuserxen.stopxen-4.0-testing.stop

Since Jan is trying to get 4.3.x and 4.4.x out the door I've left those
stable branches going but otherwise I've stopped all the kernels which
aren't actually feeding other flights, and some other stuff I reckoned
we could live without for now.

Once we've had 24 hours of uninterrupted operation I'll remove those
again.

I've also killed some sg-execute-flights corresponding to the above.
Bisects:
36506 -- [linux-3.4 real-bisect]
36507 -- [qemu-upstream-unstable real-bisect]
36489 -- [rumpuserxen real-bisect]

Real:
 flight |  branch   | intended 
+---+--
  36491 | qemu-mainline | real
  36495 | linux-3.16| real
  36497 | linux-3.4 | real
  36498 | linux-3.10| real
  36500 | ovmf  | real
  36501 | seabios   | real
  36505 | linux-next| real

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v2] xsm: add device tree labeling support

2015-03-17 Thread Ian Campbell

On Thu, 2015-03-12 at 16:42 -0400, Daniel De Graaf wrote:
 This adds support in the hypervisor and policy build toolchain for
 Xen/Flask policy version 30, which adds the ability to label ARM device
 tree nodes and expands the IOMEM ocontext entries to 64 bits.
 
 Signed-off-by: Daniel De Graaf dgde...@tycho.nsa.gov
 ---
  tools/flask/policy/Makefile   |  20 --

That bit: Acked-by: Ian Campbell ian.campb...@citrix.com



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [v2][PATCH 2/2] libxl: introduce gfx_passthru_kind

2015-03-17 Thread Ian Campbell

On Tue, 2015-03-17 at 15:46 +0800, Chen, Tiejun wrote:
  If I remember the context correctly this is in the autodetect case,
  so I think shouldn't mention IGD. Something like Unable to detect
  graphics passthru kind, please set gfx_passthru_kind. See xl.cfg(5)
  for more
 
  s/gfx_passthru_kind/gfx_passthru, right? Because actually we always get
  'gfx_passthru_kind' from 'gfx_passthru'.
 
  I think you have it backwards.
 
  In the case here gfx_passthru=1 has been set by the user, but
  gfx_passthru_kind=DEFAULT. So libxl has tried to autodetect but it has
  failed.
 
  So if the user wants to make progress they should set gfx_passthru_kind
  to whatever type of passthrough they were trying to do.
 
 Looks you're saying 'gfx_passthru_kind' shouldn't reply on 
 'gfx_passthru', so 'gfx_passthru_kind' can override that previous value 
 parsed from 'gfx_passthru', right?

I think perhaps the confusion arises because libxl and xl differ here. I
was commenting on some libxl code, which has two fields, whereas you are
talking about the xl cfg level, which only has one, which can be parsed
in two ways. So I think I've probably been adding lots of confusion
here, sorry.

I think the _libxl_ message needs to be just Unable to detect graphics
passthru kind. i.e. it can't/shouldn't reference anything to do with xl
config options etc (which would make no sense if libvirt was being
used).

That's not very user friendly though, so you may want to consider adding
a new specific error code for this case and returning it here, such that
xl or libvirt can then give a more comprehensible error message.

 So looks the whole policy should be something like this,
 
 diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
 index 5c40e84..5518759 100644
 --- a/tools/libxl/xl_cmdimpl.c
 +++ b/tools/libxl/xl_cmdimpl.c
 @@ -1953,8 +1953,27 @@ skip_vfb:
   xlu_cfg_replace_string (config, spice_streaming_video,
   b_info-u.hvm.spice.streaming_video, 0);
   xlu_cfg_get_defbool(config, nographic, 
 b_info-u.hvm.nographic, 0);
 -xlu_cfg_get_defbool(config, gfx_passthru,
 -b_info-u.hvm.gfx_passthru, 0);
 +if (!xlu_cfg_get_long(config, gfx_passthru, l, 1)) {
 +libxl_defbool_set(b_info-u.hvm.gfx_passthru, l);
 +} else if (!xlu_cfg_get_string(config, gfx_passthru, buf, 0)) {
 +if (libxl_gfx_passthru_kind_from_string(buf,
 + 
 b_info-u.hvm.gfx_passthru_kind)) {
 +fprintf(stderr,
 +ERROR: invalid value \%s\ for 
 \gfx_passthru\\n,
 +buf);
 +exit (1);
 +}
 +libxl_defbool_set(b_info-u.hvm.gfx_passthru, true);
 +}

Up to here is fine.

 +if (!xlu_cfg_get_string(config, gfx_passthru_kind, buf, 0)) {
 +if (libxl_gfx_passthru_kind_from_string(buf,
 + 
 b_info-u.hvm.gfx_passthru_kind)) {
 +fprintf(stderr,
 +ERROR: invalid value \%s\ for 
 \gfx_passthru_kind\\n,
 +buf);
 +exit (1);
 +}
 +}

I don't think you need this bit.



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [linux-3.10 test] 36498: trouble: preparing/queued

2015-03-17 Thread xen . org

flight 36498 linux-3.10 running [real]
http://www.chiark.greenend.org.uk/~xensrcts/logs/36498/

Failures and problems with tests :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 build-amd64-libvirt none executed  queued
 build-i386-rumpuserxen  none executed  queued
 build-amd64-rumpuserxen none executed  queued
 build-armhf-libvirt none executed  queued
 build-i386-pvops  2 hosts-allocate   running [st=running!]
 build-i386-libvirt  none executed  queued
 test-amd64-amd64-rumpuserxen-amd64none executed  queued
 build-armhf   2 hosts-allocate   running [st=running!]
 test-amd64-amd64-xl-pvh-intelnone executed  queued
 test-amd64-i386-rumpuserxen-i386none executed  queued
 test-amd64-i386-libvirt none executed  queued
 build-amd64-pvops 2 hosts-allocate   running [st=running!]
 test-amd64-amd64-libvirtnone executed  queued
 test-amd64-amd64-xl-pvh-amd none executed  queued
 test-amd64-amd64-xl-pcipt-intelnone executed  queued
 test-amd64-i386-qemut-rhel6hvm-intelnone executed  queued
 test-amd64-i386-xl  none executed  queued
 test-amd64-i386-rhel6hvm-amdnone executed  queued
 test-amd64-i386-rhel6hvm-intelnone executed  queued
 test-amd64-i386-qemut-rhel6hvm-amdnone executed  queued
 test-amd64-i386-qemuu-rhel6hvm-amdnone executed  queued
 test-amd64-amd64-xl-sedf-pinnone executed  queued
 test-amd64-amd64-xl none executed  queued
 build-amd64   2 hosts-allocate   running [st=running!]
 build-armhf-pvops 2 hosts-allocate   running [st=running!]
 test-amd64-amd64-xl-credit2 none executed  queued
 test-armhf-armhf-xl-credit2 none executed  queued
 test-armhf-armhf-libvirtnone executed  queued
 test-amd64-i386-freebsd10-i386none executed  queued
 test-armhf-armhf-xl-sedf-pinnone executed  queued
 test-amd64-amd64-xl-sedfnone executed  queued
 test-amd64-i386-qemuu-rhel6hvm-intelnone executed  queued
 test-armhf-armhf-xl-sedfnone executed  queued
 test-amd64-amd64-xl-multivcpunone executed  queued
 test-armhf-armhf-xl none executed  queued
 test-amd64-i386-xl-qemut-win7-amd64none executed  queued
 test-amd64-i386-xl-qemut-debianhvm-amd64none executed queued
 build-i3862 hosts-allocate   running [st=running!]
 test-armhf-armhf-xl-midway  none executed  queued
 test-armhf-armhf-xl-multivcpunone executed  queued
 test-amd64-i386-xl-qemut-winxpsp3none executed  queued
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1none executed queued
 test-amd64-i386-freebsd10-amd64none executed  queued
 test-amd64-i386-xl-winxpsp3-vcpus1none executed  queued
 test-amd64-amd64-xl-qemuu-winxpsp3none executed  queued
 test-amd64-amd64-pair   none executed  queued
 test-amd64-i386-xl-qemuu-ovmf-amd64none executed  queued
 test-amd64-amd64-xl-qemut-debianhvm-amd64none executedqueued
 test-amd64-i386-xl-qemuu-debianhvm-amd64none executed queued
 test-amd64-i386-xl-qemuu-win7-amd64none executed  queued
 test-amd64-i386-xl-winxpsp3 none executed  queued
 test-amd64-amd64-xl-win7-amd64none executed  queued
 test-amd64-i386-xl-win7-amd64none executed  queued
 test-amd64-amd64-xl-qemuu-win7-amd64none executed  queued
 test-amd64-amd64-xl-qemut-win7-amd64none executed  queued
 test-amd64-amd64-xl-qemuu-debianhvm-amd64none executedqueued
 test-amd64-amd64-xl-qemuu-ovmf-amd64none executed  queued
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1none executed queued
 test-amd64-i386-pairnone executed  queued
 test-amd64-i386-xl-qemuu-winxpsp3none executed  queued
 test-amd64-amd64-xl-winxpsp3none executed  queued
 test-amd64-amd64-xl-qemut-winxpsp3none executed  queued

version targeted for testing:
 linux389fb5fb0b8b812ce0e853d5eca748b08fc73289
baseline version:
 linuxbe67db109090b17b56eb8eb2190cd70700f107aa


961 people touched revisions under test,
not listing them all

[Xen-devel] [seabios test] 36501: trouble: preparing/queued

2015-03-17 Thread xen . org

flight 36501 seabios running [real]
http://www.chiark.greenend.org.uk/~xensrcts/logs/36501/

Failures and problems with tests :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-amd64-libvirtnone executed  queued
 test-amd64-amd64-xl-winxpsp3none executed  queued
 test-amd64-i386-libvirt none executed  queued
 test-amd64-amd64-xl-qemuu-win7-amd64none executed  queued
 test-amd64-amd64-xl-win7-amd64none executed  queued
 test-amd64-i386-xl-winxpsp3 none executed  queued
 build-amd64-libvirt none executed  queued
 test-amd64-amd64-xl-qemuu-ovmf-amd64none executed  queued
 test-amd64-i386-xl-winxpsp3-vcpus1none executed  queued
 test-amd64-i386-xl-win7-amd64none executed  queued
 test-amd64-amd64-xl-pvh-intelnone executed  queued
 test-amd64-amd64-xl-qemuu-winxpsp3none executed  queued
 test-amd64-amd64-xl-pcipt-intelnone executed  queued
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1none executed queued
 test-amd64-i386-freebsd10-amd64none executed  queued
 test-amd64-amd64-xl-qemuu-debianhvm-amd64none executedqueued
 test-amd64-amd64-xl-sedfnone executed  queued
 test-amd64-amd64-pair   none executed  queued
 test-amd64-amd64-xl-multivcpunone executed  queued
 test-amd64-i386-xl-qemuu-winxpsp3none executed  queued
 test-amd64-i386-freebsd10-i386none executed  queued
 test-amd64-i386-xl-qemut-win7-amd64none executed  queued
 test-amd64-amd64-xl-qemut-debianhvm-amd64none executedqueued
 test-amd64-amd64-xl-qemut-win7-amd64none executed  queued
 test-amd64-amd64-xl-sedf-pinnone executed  queued
 test-amd64-i386-xl-qemuu-ovmf-amd64none executed  queued
 test-amd64-i386-qemut-rhel6hvm-amdnone executed  queued
 test-amd64-amd64-xl-pvh-amd none executed  queued
 test-amd64-i386-xl-qemut-winxpsp3none executed  queued
 test-amd64-amd64-xl-credit2 none executed  queued
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1none executed queued
 test-amd64-amd64-xl none executed  queued
 test-amd64-i386-xl-qemuu-win7-amd64none executed  queued
 test-amd64-amd64-xl-qemut-winxpsp3none executed  queued
 test-amd64-i386-xl-qemuu-debianhvm-amd64none executed queued
 test-amd64-i386-rhel6hvm-amdnone executed  queued
 test-amd64-i386-pairnone executed  queued
 test-amd64-i386-qemuu-rhel6hvm-intelnone executed  queued
 test-amd64-i386-qemut-rhel6hvm-intelnone executed  queued
 test-amd64-i386-rhel6hvm-intelnone executed  queued
 test-amd64-i386-xl-qemut-debianhvm-amd64none executed queued
 test-amd64-i386-qemuu-rhel6hvm-amdnone executed  queued
 test-amd64-i386-xl  none executed  queued
 build-amd64   2 hosts-allocate   running [st=running!]
 build-amd64-pvops 2 hosts-allocate   running [st=running!]
 build-i386-libvirt  none executed  queued
 build-i3862 hosts-allocate   running [st=running!]
 build-i386-pvops  2 hosts-allocate   running [st=running!]

version targeted for testing:
 seabios  a1ac8861049a5ffefc26ca294293ad666954fcc8
baseline version:
 seabios  d23eba6ea3d429ed8a4a34bae7faad20ce44d8a1


People who touched revisions under test:
  Gerd Hoffmann kra...@redhat.com
  Kevin O'Connor ke...@koconnor.net
  Marcel Apfelbaum marce...@redhat.com
  Marcel Apfelbaum mar...@redhat.com
  Paolo Bonzini pbonz...@redhat.com


jobs:
 build-amd64  preparing
 build-i386   preparing
 build-amd64-libvirt  queued  
 build-i386-libvirt   queued  
 build-amd64-pvopspreparing
 build-i386-pvops preparing
 test-amd64-amd64-xl  queued  
 test-amd64-i386-xl   queued  
 test-amd64-amd64-xl-pvh-amd  queued  
 test-amd64-i386-rhel6hvm-amd queued  
 test-amd64-i386-qemut-rhel6hvm-amd   queued

Re: [Xen-devel] [PATCH v2] Build: Fix stubdom vtpm build failure

2015-03-17 Thread Olaf Hering

On Mon, Mar 16, Quan Xu wrote:

 Typedefs are duplicated in stubdom/vtpmmgr/tcg.h and supported compilers
 do not cope with current staging branch.

This version finally compiles. Thanks!

Olaf

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 4/6] x86: add scheduling support for Intel CAT

2015-03-17 Thread Jan Beulich

 On 13.03.15 at 11:13, chao.p.p...@linux.intel.com wrote:
 @@ -407,10 +388,75 @@ void psr_domain_free(struct domain *d)
  psr_free_cos(d);
  }
  
 +static void psr_assoc_init(struct psr_assoc *psra)
 +{
 +unsigned int socket;
 +struct psr_cat_socket_info *info;
 +
 +socket = cpu_to_socket(smp_processor_id());
 +psra-socket = socket;
 +if ( cat_socket_info  socket  opt_socket_num )
 +{
 +info = cat_socket_info + socket;
 +if ( info-enabled )
 +psra-cos_mask = (~(~0ull  (get_count_order(info-cos_max)
 + + 32)))  (~0ull  32);

((1ull  get_count_order()) - 1)  32

seems better readable to me.

  void psr_ctxt_switch_to(struct domain *d)
  {
 +uint64_t reg;
 +struct psr_assoc *psra = this_cpu(psr_assoc);
 +
 +psr_assoc_reg_read(psra, reg);

This ultimately leading to psr_assoc_init() makes it really undesirable
here imo: Initialization work shouldn't be done during a context
switch, even is this a one time thing on each CPU.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 2/6] x86: add support for COS/CBM manangement

2015-03-17 Thread Jan Beulich

 On 17.03.15 at 10:11, chao.p.p...@linux.intel.com wrote:
 On Mon, Mar 16, 2015 at 05:10:32PM +, Jan Beulich wrote:
  On 13.03.15 at 11:13, chao.p.p...@linux.intel.com wrote:
  +else
  +{
  +unsigned int cpu = cpumask_check(get_socket_cpu(socket));
 
 Isn't this going to trigger an assertion when the socket count got
 specified on the command line?
 
 Yes, the assertion is needed in tools side anyway. Here the check seems
 unnecessary.

No - if the get_socket_cpu() may return nr_cpu_ids, you need a
check. Just not in the form of an assertion.

  +if( !d-arch.psr_cos_ids )
  +return;
 
 Considering this check ...
 
  +for ( socket = 0; socket  opt_socket_num; socket++)
  +{
  +cos = d-arch.psr_cos_ids[socket];
  +if ( cos == 0 )
  +continue;
  +
  +map = cat_socket_info[socket].cos_cbm_map;
  +if ( map )
  +map[cos].ref--;
  +}
  +
  +xfree(d-arch.psr_cos_ids);
 
 ... I think you want to clear the pointer here.
 
 This function is called by arch_domain_destroy() or the failure path of
 arch_domain_create(). In both cases the domain structure will be freed
 later so setting NULL is pointless.

But this is not visible from this function. The alternative to not
clearing the pointer here would be to do the checks of the
pointer in the callers, which then would make clear whether this
is really only on error and domain destruction paths (and that
hence the pointer can't be double freed).

  @@ -222,6 +422,17 @@ static void do_cat_cpu_init(void* data)
   info-cbm_len = (eax  0x1f) + 1;
 
 This means cbm_len = 32. Why is cos_cbm_map[].cbm then a
 uint64_t?
 
 Currently the cbm_len is EAX[4:0], 64 bits cbm here is for future
 possible enhancement.

Unless the specification explicitly says that the field width in EAX
may be extended in the future, please omit such preparations
for possible future extensions - it is simply going to confuse
readers (as it did for me already) trying to understand why the
type of the field is what it is.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 4/6] x86: add scheduling support for Intel CAT

2015-03-17 Thread Chao Peng

On Tue, Mar 17, 2015 at 09:19:42AM +, Jan Beulich wrote:
  +psra-cos_mask = (~(~0ull  (get_count_order(info-cos_max)
  + + 32)))  (~0ull  32);
 
 ((1ull  get_count_order()) - 1)  32
 
 seems better readable to me.

Indeed :)

 
   void psr_ctxt_switch_to(struct domain *d)
   {
  +uint64_t reg;
  +struct psr_assoc *psra = this_cpu(psr_assoc);
  +
  +psr_assoc_reg_read(psra, reg);
 
 This ultimately leading to psr_assoc_init() makes it really undesirable
 here imo: Initialization work shouldn't be done during a context
 switch, even is this a one time thing on each CPU.

This is what we did in CMT patch. But now I think psr_assoc_init()
can be called from cat_cpu_init() other than here.

Chao

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 3/7] xen: rework locking for dump of scheduler info (debug-key r)

2015-03-17 Thread Jan Beulich

 On 16.03.15 at 18:05, dario.faggi...@citrix.com wrote:
 such as it is taken care of by the various schedulers, rather
 than happening in schedule.c. In fact, it is the schedulers
 that know better which locks are necessary for the specific
 dumping operations.
 
 While there, fix a few style issues (indentation, trailing
 whitespace, parentheses and blank line after var declarations)
 
 Signed-off-by: Dario Faggioli dario.faggi...@citrix.com
 Cc: George Dunlap george.dun...@eu.citrix.com
 Cc: Meng Xu xumengpa...@gmail.com
 Cc: Jan Beulich jbeul...@suse.com
 Cc: Keir Fraser k...@xen.org
 ---
  xen/common/sched_credit.c  |   42 --
  xen/common/sched_credit2.c |   40 
  xen/common/sched_rt.c  |7 +--
  xen/common/schedule.c  |5 ++---
  4 files changed, 79 insertions(+), 15 deletions(-)

Is it really correct that sched_sedf.c doesn't need any change here?

 --- a/xen/common/sched_credit.c
 +++ b/xen/common/sched_credit.c
 @@ -26,6 +26,23 @@
  
  
  /*
 + * Locking:
 + * - Scheduler-lock (a.k.a. runqueue lock):
 + *  + is per-runqueue, and there is one runqueue per-cpu;
 + *  + serializes all runqueue manipulation operations;
 + * - Private data lock (a.k.a. private scheduler lock):
 + *  + serializes accesses to the scheduler global state (weight,
 + *credit, balance_credit, etc);
 + *  + serializes updates to the domains' scheduling parameters.
 + *
 + * Ordering is private lock always comes first:
 + *  + if we need both locks, we must acquire the private
 + *scheduler lock for first;
 + *  + if we already own a runqueue lock, we must never acquire
 + *the private scheduler lock.
 + */

And this is Credit1 specific?

Regardless of that, even if that's just reflecting current state, isn't
acquiring a private lock around a generic lock backwards?

Finally, as said in different contexts earlier, I think unconditionally
acquiring locks in dumping routines isn't the best practice. At least
in non-debug builds I think these should be try-locks only, skipping
the dumping when a lock is busy.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [qemu-mainline test] 36491: trouble: preparing/queued

2015-03-17 Thread xen . org

flight 36491 qemu-mainline running [real]
http://www.chiark.greenend.org.uk/~xensrcts/logs/36491/

Failures and problems with tests :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 build-amd64-libvirt none executed  queued
 build-amd64-pvops 2 hosts-allocate   running [st=running!]
 build-i386-pvops  2 hosts-allocate   running [st=running!]
 test-amd64-amd64-xl-pvh-intelnone executed  queued
 test-armhf-armhf-xl-credit2 none executed  queued
 test-armhf-armhf-xl-sedf-pinnone executed  queued
 test-armhf-armhf-xl none executed  queued
 test-armhf-armhf-libvirtnone executed  queued
 test-armhf-armhf-xl-multivcpunone executed  queued
 test-amd64-amd64-xl-sedfnone executed  queued
 test-amd64-amd64-xl-pvh-amd none executed  queued
 test-amd64-amd64-xl-pcipt-intelnone executed  queued
 test-amd64-i386-xl  none executed  queued
 test-armhf-armhf-xl-midway  none executed  queued
 test-amd64-amd64-xl-credit2 none executed  queued
 test-amd64-i386-qemut-rhel6hvm-amdnone executed  queued
 test-amd64-amd64-xl-sedf-pinnone executed  queued
 test-amd64-i386-rhel6hvm-intelnone executed  queued
 test-amd64-i386-rhel6hvm-amdnone executed  queued
 test-amd64-i386-libvirt none executed  queued
 test-armhf-armhf-xl-sedfnone executed  queued
 build-armhf-libvirt none executed  queued
 test-amd64-amd64-xl none executed  queued
 test-amd64-amd64-libvirtnone executed  queued
 test-amd64-i386-qemuu-rhel6hvm-amdnone executed  queued
 test-amd64-amd64-xl-multivcpunone executed  queued
 test-amd64-i386-qemut-rhel6hvm-intelnone executed  queued
 build-armhf-pvops 2 hosts-allocate   running [st=running!]
 build-armhf   2 hosts-allocate   running [st=running!]
 test-amd64-i386-freebsd10-amd64none executed  queued
 test-amd64-i386-qemuu-rhel6hvm-intelnone executed  queued
 test-amd64-amd64-xl-win7-amd64none executed  queued
 build-i3862 hosts-allocate   running [st=running!]
 test-amd64-i386-xl-qemut-debianhvm-amd64none executed queued
 test-amd64-i386-xl-win7-amd64none executed  queued
 test-amd64-i386-freebsd10-i386none executed  queued
 build-i386-libvirt  none executed  queued
 test-amd64-amd64-xl-winxpsp3none executed  queued
 test-amd64-i386-xl-qemut-win7-amd64none executed  queued
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1none executed queued
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1none executed queued
 test-amd64-amd64-xl-qemuu-debianhvm-amd64none executedqueued
 test-amd64-amd64-xl-qemut-winxpsp3none executed  queued
 test-amd64-i386-xl-qemuu-ovmf-amd64none executed  queued
 test-amd64-i386-xl-qemuu-debianhvm-amd64none executed queued
 test-amd64-amd64-pair   none executed  queued
 test-amd64-i386-xl-qemuu-win7-amd64none executed  queued
 test-amd64-amd64-xl-qemut-win7-amd64none executed  queued
 test-amd64-i386-xl-qemut-winxpsp3none executed  queued
 test-amd64-amd64-xl-qemuu-win7-amd64none executed  queued
 test-amd64-amd64-xl-qemuu-winxpsp3none executed  queued
 test-amd64-amd64-xl-qemut-debianhvm-amd64none executedqueued
 build-amd64   2 hosts-allocate   running [st=running!]
 test-amd64-amd64-xl-qemuu-ovmf-amd64none executed  queued
 test-amd64-i386-xl-winxpsp3-vcpus1none executed  queued
 test-amd64-i386-xl-winxpsp3 none executed  queued
 test-amd64-i386-pairnone executed  queued
 test-amd64-i386-xl-qemuu-winxpsp3none executed  queued

version targeted for testing:
 qemuu17b11a1406fdc43b5022f32a6fbfcb005a353b38
baseline version:
 qemuu576a94d8bcaa1bb07a81d9ffd2cf76095a66ad9a


People who touched revisions under test:
  Alberto Garcia be...@igalia.com
  Alex Williamson alex.william...@redhat.com
  Alexander Graf ag...@suse.de
  Alexey Kardashevskiy a...@ozlabs.ru
  Alistair Francis alistai...@gmail.com
  Alistair Francis alist...@alistair23.me
  Amit Shah amit.s...@redhat.com
  Andreas FÃ¤rber afaer...@suse.de
  Andrew Jones drjo...@redhat.com
  Aneesh Kumar K.V

[Xen-devel] [ovmf test] 36500: trouble: preparing/queued

2015-03-17 Thread xen . org

flight 36500 ovmf running [real]
http://www.chiark.greenend.org.uk/~xensrcts/logs/36500/

Failures and problems with tests :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-amd64-xl-pvh-intelnone executed  queued
 build-i386-libvirt  none executed  queued
 build-amd64-pvops 2 hosts-allocate   running [st=running!]
 build-amd64   2 hosts-allocate   running [st=running!]
 test-amd64-i386-libvirt none executed  queued
 test-amd64-amd64-xl-pvh-amd none executed  queued
 test-amd64-amd64-xl none executed  queued
 test-amd64-i386-qemut-rhel6hvm-amdnone executed  queued
 test-amd64-i386-freebsd10-i386none executed  queued
 test-amd64-i386-rhel6hvm-amdnone executed  queued
 test-amd64-amd64-xl-pcipt-intelnone executed  queued
 test-amd64-i386-qemuu-rhel6hvm-amdnone executed  queued
 build-i386-pvops  2 hosts-allocate   running [st=running!]
 test-amd64-amd64-libvirtnone executed  queued
 build-i3862 hosts-allocate   running [st=running!]
 test-amd64-i386-xl  none executed  queued
 test-amd64-amd64-xl-multivcpunone executed  queued
 test-amd64-amd64-xl-sedf-pinnone executed  queued
 test-amd64-i386-qemut-rhel6hvm-intelnone executed  queued
 test-amd64-i386-qemuu-rhel6hvm-intelnone executed  queued
 test-amd64-amd64-xl-credit2 none executed  queued
 test-amd64-i386-freebsd10-amd64none executed  queued
 test-amd64-amd64-xl-sedfnone executed  queued
 test-amd64-i386-rhel6hvm-intelnone executed  queued
 test-amd64-i386-xl-qemut-win7-amd64none executed  queued
 test-amd64-i386-xl-qemut-debianhvm-amd64none executed queued
 test-amd64-amd64-pair   none executed  queued
 test-amd64-i386-xl-qemuu-ovmf-amd64none executed  queued
 build-amd64-libvirt none executed  queued
 test-amd64-amd64-xl-qemut-win7-amd64none executed  queued
 test-amd64-i386-xl-qemuu-debianhvm-amd64none executed queued
 test-amd64-i386-xl-winxpsp3 none executed  queued
 test-amd64-i386-xl-win7-amd64none executed  queued
 test-amd64-amd64-xl-qemuu-win7-amd64none executed  queued
 test-amd64-i386-xl-qemuu-win7-amd64none executed  queued
 test-amd64-i386-xl-qemut-winxpsp3-vcpus1none executed queued
 test-amd64-i386-xl-qemuu-winxpsp3none executed  queued
 test-amd64-amd64-xl-winxpsp3none executed  queued
 test-amd64-amd64-xl-qemuu-debianhvm-amd64none executedqueued
 test-amd64-amd64-xl-qemut-debianhvm-amd64none executedqueued
 test-amd64-amd64-xl-qemuu-ovmf-amd64none executed  queued
 test-amd64-i386-xl-qemuu-winxpsp3-vcpus1none executed queued
 test-amd64-i386-xl-winxpsp3-vcpus1none executed  queued
 test-amd64-i386-pairnone executed  queued
 test-amd64-i386-xl-qemut-winxpsp3none executed  queued
 test-amd64-amd64-xl-qemuu-winxpsp3none executed  queued
 test-amd64-amd64-xl-win7-amd64none executed  queued
 test-amd64-amd64-xl-qemut-winxpsp3none executed  queued

version targeted for testing:
 ovmf ba9d087b8fb91f19c9accf9541332a36889e18ed
baseline version:
 ovmf 69a99d0b2c0d601c60a1d65bac057adb045d002b


People who touched revisions under test:
  Ard Biesheuvel ard.biesheu...@linaro.org
  Chen Fan chen.fan.f...@cn.fujitsu.com
  David Wei david@intel.com
  Elvin Li elvin...@intel.com
  Feng Tian feng.t...@intel.com
  Laszlo Ersek ler...@redhat.com
  Liming Gao liming@intel.com
  Qiu Shumin shumin@intel.com
  Shifei Lu shifeix.a...@intel.com
  Shumin Qiu shumin@intel.com
  Star Zeng star.z...@intel.com
  Tapan Shah tapands...@hp.com
  Tim He tim...@intel.com


jobs:
 build-amd64  preparing
 build-i386   preparing
 build-amd64-libvirt  queued  
 build-i386-libvirt   queued  
 build-amd64-pvopspreparing
 build-i386-pvops preparing
 test-amd64-amd64-xl  queued  
 test-amd64-i386-xl

Re: [Xen-devel] [PATCH v2 2/4] xen: sched: make counters for vCPU sleep and wakeup generic

2015-03-17 Thread Dario Faggioli

On Mon, 2015-03-16 at 17:18 +, George Dunlap wrote:
 On 02/27/2015 04:51 PM, Dario Faggioli wrote:


  diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
  index ad0a5d4..2b852cc 100644
  --- a/xen/common/sched_credit2.c
  +++ b/xen/common/sched_credit2.c
  @@ -931,6 +931,7 @@ csched2_vcpu_sleep(const struct scheduler *ops, struct 
  vcpu *vc)
   struct csched2_vcpu * const svc = CSCHED2_VCPU(vc);
   
   BUG_ON( is_idle_vcpu(vc) );
  +SCHED_STAT_CRANK(vcpu_sleep);
   
   if ( per_cpu(schedule_data, vc-processor).curr == vc )
   cpu_raise_softirq(vc-processor, SCHEDULE_SOFTIRQ);
  @@ -956,19 +957,22 @@ csched2_vcpu_wake(const struct scheduler *ops, struct 
  vcpu *vc)
   
   BUG_ON( is_idle_vcpu(vc) );
   
  -/* Make sure svc priority mod happens before runq check */
   if ( unlikely(per_cpu(schedule_data, vc-processor).curr == vc) )
   {
  +SCHED_STAT_CRANK(vcpu_wake_running);
   goto out;
   }
  -
   if ( unlikely(__vcpu_on_runq(svc)) )
 
 Does this make the 'if' butt right up against the '{'?  Is that bad?
 
 Other than that:
 
 Acked-by: George Dunlap george.dun...@eu.citrix.com
 
As far as I can see, the series is in:

http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=8b0e94da0e23221a6d7ea19bfbd24a407db44de8

and whoever committed it (Jan, probably?), took care of leaving that
blank line in place, which I'm fine with, of course. :-)


 However, it doesn't apply cleanly at the moment, so you'll probably need
 to send a refresh.  (Sorry for taking so long!)
 
NP, and thanks for reviewing. I was prepared to refresh and resend, but
as I said, it's been checked-in already, so thanks for that! :-D

Regards,
Dario


signature.asc
Description: This is a digitally signed message part
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 2/6] x86: add support for COS/CBM manangement

2015-03-17 Thread Chao Peng

On Mon, Mar 16, 2015 at 05:10:32PM +, Jan Beulich wrote:
  On 13.03.15 at 11:13, chao.p.p...@linux.intel.com wrote:
  +else
  +{
  +unsigned int cpu = cpumask_check(get_socket_cpu(socket));
 
 Isn't this going to trigger an assertion when the socket count got
 specified on the command line?

Yes, the assertion is needed in tools side anyway. Here the check seems
unnecessary.

 
  +if( !d-arch.psr_cos_ids )
  +return;
 
 Considering this check ...
 
  +for ( socket = 0; socket  opt_socket_num; socket++)
  +{
  +cos = d-arch.psr_cos_ids[socket];
  +if ( cos == 0 )
  +continue;
  +
  +map = cat_socket_info[socket].cos_cbm_map;
  +if ( map )
  +map[cos].ref--;
  +}
  +
  +xfree(d-arch.psr_cos_ids);
 
 ... I think you want to clear the pointer here.

This function is called by arch_domain_destroy() or the failure path of
arch_domain_create(). In both cases the domain structure will be freed
later so setting NULL is pointless.

 
  @@ -222,6 +422,17 @@ static void do_cat_cpu_init(void* data)
   info-cbm_len = (eax  0x1f) + 1;
 
 This means cbm_len = 32. Why is cos_cbm_map[].cbm then a
 uint64_t?

Currently the cbm_len is EAX[4:0], 64 bits cbm here is for future
possible enhancement.

Chao

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v2] Build: Fix stubdom vtpm build failure

2015-03-17 Thread Xu, Quan

Olaf, 
Thanks for your test.

Ian,
   Could you help me check it again? Any comments, I will fix it soon.
If no any other comments, could you help me merge it?  Thanks. 

Quan

 -Original Message-
 From: Olaf Hering [mailto:o...@aepfle.de]
 Sent: Tuesday, March 17, 2015 5:20 PM
 To: Xu, Quan
 Cc: ian.campb...@citrix.com; xen-devel@lists.xen.org; dgde...@tycho.nsa.gov;
 andrew.coop...@citrix.com
 Subject: Re: [PATCH v2] Build: Fix stubdom vtpm build failure
 
 On Mon, Mar 16, Quan Xu wrote:
 
  Typedefs are duplicated in stubdom/vtpmmgr/tcg.h and supported
  compilers do not cope with current staging branch.
 
 This version finally compiles. Thanks!
 
 Olaf
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 18/18] x86: add multiboot2 protocol support for EFI platforms

2015-03-17 Thread Jan Beulich

 On 30.01.15 at 18:54, daniel.ki...@oracle.com wrote:
 @@ -94,6 +111,17 @@ ENTRY(start)
  gdt_boot_descr:
  .word   6*8-1
  .long   sym_phys(trampoline_gdt)
 +.long   0 /* Needed for 64-bit lgdt */
 +
 +cs32_switch_addr:
 +.long   sym_phys(cs32_switch)
 +.long   BOOT_CS32
 +
 +efi_st:
 +.quad   0
 +
 +efi_ih:
 +.quad   0
  
  .Lbad_cpu_msg: .asciz ERR: Not a 64-bit CPU!
  .Lbad_ldr_msg: .asciz ERR: Not a Multiboot bootloader!
 @@ -124,6 +152,133 @@ print_err:
  .Lhalt: hlt
  jmp .Lhalt
  
 +.code64
 +
 +__efi64_start:
 +cld
 +
 +/* Bootloaders may set multiboot[12].mem_lower to a nonzero value */
 +xor %edx,%edx
 +
 +/* Check for Multiboot2 bootloader */
 +cmp $MULTIBOOT2_BOOTLOADER_MAGIC,%eax
 +je  efi_multiboot2_proto
 +
 +jmp not_multiboot
 +
 +efi_multiboot2_proto:

jne not_multiboot (and efi_multiboot2_proto dropped altogether)

 +/* Skip Multiboot2 information fixed part */
 +lea MB2_fixed_sizeof(%ebx),%ecx

Let's please not add more assumptions than necessary about stuff
being below 4G.

 +
 +0:
 +/* Get mem_lower from Multiboot2 information */
 +cmpl$MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO,(%ecx)
 +jne 1f
 +
 +mov MB2_mem_lower(%ecx),%edx
 +jmp 4f
 +
 +1:
 +/* Get EFI SystemTable address from Multiboot2 information */
 +cmpl$MULTIBOOT2_TAG_TYPE_EFI64,(%ecx)
 +jne 2f
 +
 +lea MB2_efi64_st(%ecx),%esi
 +lea efi_st(%rip),%edi
 +movsq

A simple pair of mov-s please, assuming all of this really needs to be
done in assembly in the first place. Also please use .Lname labels
in this assembly coded switch statement to ease reading.

 +
 +/* Do not go into real mode on EFI platform */
 +movb$1,skip_realmode(%rip)
 +
 +jmp 4f
 +
 +2:
 +/* Get EFI ImageHandle address from Multiboot2 information */
 +cmpl$MULTIBOOT2_TAG_TYPE_EFI64_IH,(%ecx)
 +jne 3f
 +
 +lea MB2_efi64_ih(%ecx),%esi
 +lea efi_ih(%rip),%edi
 +movsq
 +jmp 4f
 +
 +3:
 +/* Is it the end of Multiboot2 information? */
 +cmpl$MULTIBOOT2_TAG_TYPE_END,(%ecx)
 +je  run_bs
 +
 +4:

Please switch the order so that 2 can fall through into 4 (and you
then won't need the run_bs label, which otherwise should also
becom .Lrun_bs).

 +/* Go to next Multiboot2 information tag */
 +add MB2_tag_size(%ecx),%ecx
 +add $(MULTIBOOT2_TAG_ALIGN-1),%ecx
 +and $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
 +jmp 0b
 +
 +run_bs:
 +push%rax
 +push%rdx
 +
 +/* Initialize BSS (no nasty surprises!) */
 +lea __bss_start(%rip),%rdi
 +lea _end(%rip),%rcx
 +sub %rdi,%rcx
 +xor %rax,%rax
 +rep stosb

rep stosb

 +
 +mov efi_ih(%rip),%rdi   /* EFI ImageHandle */
 +mov efi_st(%rip),%rsi   /* EFI SystemTable */
 +callefi_multiboot2

With efi_multiboot2 being a C function it really looks like much of the
above doesn't need to be done in assembly.

 +
 +pop %rcx
 +pop %rax
 +
 +shl $10-4,%rcx  /* Convert multiboot2.mem_lower to 
 bytes/16 */
 +
 +cli
 +
 +/* Initialise GDT */
 +lgdtgdt_boot_descr(%rip)
 +
 +/* Reload code selector */
 +ljmpl   *cs32_switch_addr(%rip)
 +
 +.code32
 +
 +cs32_switch:
 +/* Initialise basic data segments */
 +mov $BOOT_DS,%edx
 +mov %edx,%ds
 +mov %edx,%es
 +mov %edx,%fs
 +mov %edx,%gs
 +mov %edx,%ss
 +
 +mov $sym_phys(cpu0_stack)+1024,%esp
 +
 +/* Disable paging */
 +mov %cr0,%edx
 +and $(~X86_CR0_PG),%edx
 +mov %edx,%cr0
 +
 +push%eax
 +push%ecx
 +
 +/* Disable Long Mode */
 +mov $MSR_EFER,%ecx
 +rdmsr
 +and $(~EFER_LME),%eax
 +wrmsr

I don't think this is really needed.

 +
 +pop %ecx
 +pop %eax
 +
 +/* Turn off PAE */
 +mov %cr4,%edx
 +and $(~X86_CR4_PAE),%edx
 +mov %edx,%cr4

Nor this.

 @@ -179,7 +334,7 @@ multiboot2_proto:
  and $~(MULTIBOOT2_TAG_ALIGN-1),%ecx
  jmp 0b
  
 -trampoline_setup:
 +bios_platform:
  /* Set up trampoline segment 64k below EBDA */

This is still trampoline setup code, so it's not clear why you rename
the label. If you need another named label ...

 @@ -195,12 +350,13 @@ trampoline_setup:
   * multiboot structure (if available) and use the smallest.
   */
  cmp $0x100,%edx /* is the multiboot value too small? */
 -

Re: [Xen-devel] [PATCH 1/7] xen: sched_rt: avoid ASSERT()ing on runq dump if there are no domains

2015-03-17 Thread Jan Beulich

 On 16.03.15 at 18:04, dario.faggi...@citrix.com wrote:
 --- a/xen/common/sched_rt.c
 +++ b/xen/common/sched_rt.c
 @@ -264,18 +264,17 @@ rt_dump(const struct scheduler *ops)
  struct list_head *iter_sdom, *iter_svc, *runq, *depletedq, *iter;
  struct rt_private *prv = rt_priv(ops);
  struct rt_vcpu *svc;
 -cpumask_t *online;
  struct rt_dom *sdom;
  unsigned long flags;
  
 -ASSERT(!list_empty(prv-sdom));
 +spin_lock_irqsave(prv-lock, flags);
 +
 +if (list_empty(prv-sdom))

Coding style.

 +goto out;
  
 -sdom = list_entry(prv-sdom.next, struct rt_dom, sdom_elem);
 -online = cpupool_scheduler_cpumask(sdom-dom-cpupool);

Unrelated change (together with the variable deletion above). I'm
fine for this to stay here, but it should at least be mentioned in the
description so that future archeologists don't wonder about the
connection to the actual issue fixed here.

 @@ -303,6 +302,7 @@ rt_dump(const struct scheduler *ops)
  }
  }
  
 +out:

Labels should be indented by at least one space.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 1/2] VT-d: make XSA-59 workaround fully cover XeonE5/E7 v2

2015-03-17 Thread Dugger, Donald D

Note that the following Nehalem/Westmere chipsets should be included in this 
list:

Nehalem - 0x40, 0x2c01, 0x2c41, 0x313x
Westmere - 0x2c70, 0x2d81, 0xd15x

--
Don Dugger
Censeo Toto nos in Kansa esse decisse. - D. Gale
Ph: 303/443-3786

-Original Message-
From: Jan Beulich [mailto:jbeul...@suse.com] 
Sent: Friday, December 19, 2014 1:41 AM
To: xen-devel
Cc: Dugger, Donald D; Tian, Kevin; Zhang, Yang Z
Subject: [PATCH 1/2] VT-d: make XSA-59 workaround fully cover XeonE5/E7 v2

So far only the VT-d UR masking was being done for them.

Signed-off-by: Jan Beulich jbeul...@suse.com

--- a/xen/drivers/passthrough/vtd/quirks.c
+++ b/xen/drivers/passthrough/vtd/quirks.c
@@ -440,6 +440,9 @@ void pci_vtd_quirk(const struct pci_dev 
seg, bus, dev, func);
 break;
 
+/* Xeon E5/E7 v2 */
+case 0x0e00: /* host bridge */
+case 0x0e01: case 0x0e04 ... 0x0e0b: /* root ports */
 /* Tylersburg (EP)/Boxboro (MP) chipsets (NHM-EP/EX, WSM-EP/EX) */
 case 0x3400 ... 0x3407: /* host bridges */
 case 0x3408 ... 0x3411: case 0x3420 ... 0x3421: /* root ports */




___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v2 2/2] sched_credit2.c: runqueue_per_core code

2015-03-17 Thread Dario Faggioli

On Mon, 2015-03-16 at 12:56 +, Jan Beulich wrote:
  On 16.03.15 at 13:51, george.dun...@eu.citrix.com wrote:
  On 03/16/2015 12:48 PM, Jan Beulich wrote:

  Them returning garbage isn't what needs fixing. Instead the code
  here should use a different condition to check whether this is the
  boot CPU (e.g. looking at system_state). And that can very well be
  done directly in this patch.
  
  What do you suggest, then?
 
 My preferred solution would be, as said, to leverage system_state.
 Provided the state to look for is consistent between x86 and ARM.
 
Would something like this make sense?

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index cfca5a7..2f2aa73 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -1936,12 +1936,8 @@ static void init_pcpu(const struct scheduler *ops, int 
cpu)
 }
 
 /* Figure out which runqueue to put it in */
-rqi = 0;
-
-/* Figure out which runqueue to put it in */
-/* NB: cpu 0 doesn't get a STARTING callback, so we hard-code it to 
runqueue 0. */
-if ( cpu == 0 )
-rqi = 0;
+if ( system_state == SYS_STATE_boot )
+rqi = boot_cpu_to_socket(cpu);
 else
 rqi = cpu_to_socket(cpu);
 
@@ -1986,9 +1982,13 @@ static void init_pcpu(const struct scheduler *ops, int 
cpu)
 static void *
 csched2_alloc_pdata(const struct scheduler *ops, int cpu)
 {
-/* Check to see if the cpu is online yet */
-/* Note: cpu 0 doesn't get a STARTING callback */
-if ( cpu == 0 || cpu_to_socket(cpu) = 0 )
+/*
+ * Actual initialization is deferred to when the pCPU will be
+ * online, via a STARTING callback. The only exception is
+ * the boot cpu, which does not get such a notification, and
+ * hence needs to be taken care of here.
+ */
+if ( system_state == SYS_STATE_boot )
 init_pcpu(ops, cpu);
 else
 printk(%s: cpu %d not online yet, deferring initializatgion\n,

diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index cfca5a7..2f2aa73 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -1936,12 +1936,8 @@ static void init_pcpu(const struct scheduler *ops, int cpu)
 }
 
 /* Figure out which runqueue to put it in */
-rqi = 0;
-
-/* Figure out which runqueue to put it in */
-/* NB: cpu 0 doesn't get a STARTING callback, so we hard-code it to runqueue 0. */
-if ( cpu == 0 )
-rqi = 0;
+if ( system_state == SYS_STATE_boot )
+rqi = boot_cpu_to_socket(cpu);
 else
 rqi = cpu_to_socket(cpu);
 
@@ -1986,9 +1982,13 @@ static void init_pcpu(const struct scheduler *ops, int cpu)
 static void *
 csched2_alloc_pdata(const struct scheduler *ops, int cpu)
 {
-/* Check to see if the cpu is online yet */
-/* Note: cpu 0 doesn't get a STARTING callback */
-if ( cpu == 0 || cpu_to_socket(cpu) = 0 )
+/*
+ * Actual initialization is deferred to when the pCPU will be
+ * online, via a STARTING callback. The only exception is
+ * the boot cpu, which does not get such a notification, and
+ * hence needs to be taken care of here.
+ */
+if ( system_state == SYS_STATE_boot )
 init_pcpu(ops, cpu);
 else
 printk(%s: cpu %d not online yet, deferring initializatgion\n,


signature.asc
Description: This is a digitally signed message part
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 2/3] slightly reduce vm_assist code

2015-03-17 Thread Tim Deegan

At 15:55 + on 17 Mar (1426607705), Jan Beulich wrote:
 - drop an effectively unused struct pv_vcpu field (x86)
 - adjust VM_ASSIST() to prepend VMASST_TYPE_
 
 Signed-off-by: Jan Beulich jbeul...@suse.com

Reviewed-by: Tim Deegan t...@xen.org, though I think these would
have been better as two separate patches.

Cheers,

Tim.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 1/3] libxl: In domain death search, start search at first domid we want

2015-03-17 Thread Wei Liu

On Tue, Mar 17, 2015 at 09:30:57AM -0600, Jim Fehlig wrote:
 From: Ian Jackson ian.jack...@eu.citrix.com

 From: Ian Jackson ian.jack...@eu.citrix.com

 When domain_death_xswatch_callback needed a further call to
 xc_domain_getinfolist it would restart it with the last domain it
 found rather than the first one it wants.

 If it only wants one it will also only ask for one domain.  The result
 would then be that it gets the previous domain again (ie, the previous
 one to the one it wants), which still doesn't reveal the answer to the
 question, and it would therefore loop again.

 It's completely unclear to me why I thought it was a good idea to
 start the xc_domain_getinfolist with the last domain previously found
 rather than the first one left un-confirmed.  The code has been that
 way since it was introduced.

 Instead, start each xc_domain_getinfolist at the next domain whose
 status we need to check.

 We also need to move the test for !evg into the loop, we now need evg
 to compute the arguments to getinfolist.

 Signed-off-by: Ian Jackson ian.jack...@eu.citrix.com
 Reported-by: Jim Fehlig jfeh...@suse.com
 Reviewed-by: Jim Fehlig jfeh...@suse.com
 Tested-by: Jim Fehlig jfeh...@suse.com

Acked-by: Wei Liu wei.l...@citrix.com

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 2/3] libxl: Domain destroy: unlock userdata earlier

2015-03-17 Thread Wei Liu

On Tue, Mar 17, 2015 at 09:30:58AM -0600, Jim Fehlig wrote:
 From: Ian Jackson ian.jack...@eu.citrix.com
 
 Unlock the userdata before we actually call xc_domain_destroy.  This
 leaves open the possibility that other libxl callers will see the
 half-destroyed domain (with no devices, paused), but this is fine.
 
 Signed-off-by: Ian Jackson ian.jack...@eu.citrix.com
 CC: Wei Liu wei.l...@citrix.com
 Reviewed-by: Jim Fehlig jfeh...@suse.com
 Tested-by: Jim Fehlig jfeh...@suse.com

Acked-by: Wei Liu wei.l...@citrix.com

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 3/3] libxl: Domain destroy: fork

2015-03-17 Thread Wei Liu

On Tue, Mar 17, 2015 at 09:30:59AM -0600, Jim Fehlig wrote:
 From: Ian Jackson ian.jack...@eu.citrix.com
 
 Call xc_domain_destroy in a subprocess.  That allows us to do so
 asynchronously, rather than blocking the whole process calling libxl.
 
 The changes in detail:
 
  * Provide an libxl__ev_child in libxl__domain_destroy_state, and
initialise it in libxl__domain_destroy.  There is no possibility
to `clean up' a libxl__ev_child, but there need to clean it up, as
the control flow ensures that we only continue after the child has
exited.
 
  * Call libxl__ev_child_fork at the right point and put the call to
xc_domain_destroy and associated logging in the child.  (The child
opens a new xenctrl handle because we mustn't use the parent's.)
 
  * Consequently, the success return path from domain_destroy_domid_cb
no longer calls dis-callback.  Instead it simply returns.
 
  * We plumb the errorno value through the child's exit status, if it
fits.  This means we normally do the logging only in the parent.
 
  * Incidentally, we fix the bug that we were treating the return value
from xc_domain_destroy as an errno value when in fact it is a
return value from do_domctl (in this case, 0 or -1 setting errno).
 
 Signed-off-by: Ian Jackson ian.jack...@eu.citrix.com
 Reviewed-by: Jim Fehlig jfeh...@suse.com
 Tested-by: Jim Fehlig jfeh...@suse.com

Reviewed-by: Wei Liu wei.l...@citrix.com

One nit below.

 ---
[...]
 +ctx-xch = xc_interface_open(ctx-lg,0,0);
 +if (!ctx-xch) goto badchild;
 +
 +rc = xc_domain_destroy(ctx-xch, domid);
 +if (rc  0) goto badchild;
 +_exit(0);
 +
 +badchild:
 +if (errno  0   errno  126) {
 +_exit(errno);
 +} else {
 +LOGE(ERROR,
 + xc_domain_destroy failed for %d (with difficult errno value %d),

Indentation is wrong.

Wei.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 2/2] VT-d: extend XSA-59 workaround to XeonE5 v3 (Haswell)

2015-03-17 Thread Dugger, Donald D

Note that the following Haswell chipsets should also be included in this list:

Haswell - 0xc0f, 0xd00, 0xd04, 0xd08, 0xd0f, 0xa00, 0xa08, 0xa0f

--
Don Dugger
Censeo Toto nos in Kansa esse decisse. - D. Gale
Ph: 303/443-3786

-Original Message-
From: Jan Beulich [mailto:jbeul...@suse.com] 
Sent: Friday, December 19, 2014 1:42 AM
To: xen-devel
Cc: Dugger, Donald D; Tian, Kevin; Zhang, Yang Z
Subject: [PATCH 2/2] VT-d: extend XSA-59 workaround to XeonE5 v3 (Haswell)

Note that the datasheet lacks PCI IDs for Dev 1 Fn 0-1, so their IDs are being 
added based on what https://pci-ids.ucw.cz/read/PC/8086 says.

Signed-off-by: Jan Beulich jbeul...@suse.com

--- a/xen/drivers/passthrough/vtd/quirks.c
+++ b/xen/drivers/passthrough/vtd/quirks.c
@@ -431,6 +431,7 @@ void pci_vtd_quirk(const struct pci_dev 
  *   - Potential security issue if malicious guest trigger VT-d faults.
  */
 case 0x0e28: /* Xeon-E5v2 (IvyBridge) */
+case 0x2f28: /* Xeon-E5v3 (Haswell) */
 case 0x342e: /* Tylersburg chipset (Nehalem / Westmere systems) */
 case 0x3728: /* Xeon C5500/C3500 (JasperForest) */
 case 0x3c28: /* Sandybridge */
@@ -443,6 +444,9 @@ void pci_vtd_quirk(const struct pci_dev 
 /* Xeon E5/E7 v2 */
 case 0x0e00: /* host bridge */
 case 0x0e01: case 0x0e04 ... 0x0e0b: /* root ports */
+/* Xeon E5 v3 */
+case 0x2f00: /* host bridge */
+case 0x2f01 ... 0x2f0b: /* root ports */
 /* Tylersburg (EP)/Boxboro (MP) chipsets (NHM-EP/EX, WSM-EP/EX) */
 case 0x3400 ... 0x3407: /* host bridges */
 case 0x3408 ... 0x3411: case 0x3420 ... 0x3421: /* root ports */




___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] Running update-server-info on push

2015-03-17 Thread Ian Campbell

It seems some repos have this via the hooks/post-update.sample having
been renamed to hooks/post-update, but a few don't.

So I've done:

xen@xenbits:~/git$ for i in rumpuser-xen.git mini-os.git libvirt.git ; 
do
 mv -iv $i/hooks/post-update.sample $i/hooks/post-update
 done
`rumpuser-xen.git/hooks/post-update.sample' - 
`rumpuser-xen.git/hooks/post-update'
`mini-os.git/hooks/post-update.sample' - 
`mini-os.git/hooks/post-update'
`libvirt.git/hooks/post-update.sample' - 
`libvirt.git/hooks/post-update'
xen@xenbits:~/git$ for i in rumpuser-xen.git mini-os.git libvirt.git ; 
do
 ( cd $i  git update-server-info )
 done
xen@xenbits:~/git$ 

I did not investigate people/* or xenclient/*.

This will explain the failure of flight 36502 which has yet to be posted.

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 7/7] xen: sched_rt: print useful affinity info when dumping

2015-03-17 Thread Meng Xu


 This change also takes the chance to add a scratch
 cpumask, to avoid having to create one more
 cpumask_var_t on the stack of the dumping routine.

 Actually, I have a question about the strength of this design. When we
 have a machine with many cpus, we will end up with allocating a
 cpumask for each cpu. Is this better than having a cpumask_var_t on
 the stack of the dumping routine, since the dumping routine is not in
 the hot path?

 The reason for taking this off the stack is that the hypervisor stack is
 a fairly limited resource -- IIRC it's only 8k (for each cpu).  If the
 call stack gets too deep, the hypervisor will triple-fault.  Keeping
 really large variables like cpumasks off the stack is key to making sure
 we don't get close to that.

I see. I didn't realize the fact of the limited size of hypervisor
stack. That makes sense.


Thank you very much for clarification! :-)

Best,

Meng

---
Meng Xu
PhD Student in Computer and Information Science
University of Pennsylvania

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 04/10] xen/blkfront: separate ring information to an new struct

2015-03-17 Thread Bob Liu


On 03/17/2015 10:52 PM, Felipe Franciosi wrote:
 Hi Bob,
 
 I've put the hardware back together and am sorting out the software for 
 testing. Things are not moving as fast as I wanted due to other commitments. 
 I'll keep this thread updated as I progress. Malcolm is OOO and I'm trying to 
 get his patches to work on a newer Xen.
 

Thank you!

 The evaluation will compare:
 1) bare metal i/o (for baseline)
 2) tapdisk3 (currently using grant copy, which is what scales best in my 
 experience)
 3) blkback w/ persistent grants
 4) blkback w/o persistent grants (I will just comment out the handshake bits 
 in blkback/blkfront)
 5) blkback w/o persistent grants + Malcolm's grant map patches
 

I think you need to add the patches from Christoph Egger with title
[PATCH v5 0/2] gnttab: Improve scaleability here.
http://lists.xen.org/archives/html/xen-devel/2015-02/msg01188.html


 To my knowledge, blkback (w/ or w/o persistent grants) is always faster than 
 user space alternatives (e.g. tapdisk, qemu-qdisk) as latency is much lower. 
 However, tapdisk with grant copy has been shown to produce (much) better 
 aggregate throughput figures as it avoids any issues with grant (un)mapping.
 
 I'm hoping to show that (5) above scales better than (3) and (4) in a 
 representative scenario. If it does, I will recommend that we get rid of 
 persistent grants in favour of a better and more scalable grant (un)mapping 
 implementation.
 

Right, but even if 5) have better performance, we have to make sure
older hypervisors with new linux kernel won't be affected after get rid
of persistent grants.

-- 
Regards,
-Bob

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 7/7] xen: sched_rt: print useful affinity info when dumping

2015-03-17 Thread Meng Xu

Hi Dario,

2015-03-17 10:12 GMT-04:00 Dario Faggioli dario.faggi...@citrix.com:
 On Mon, 2015-03-16 at 16:30 -0400, Meng Xu wrote:
 Hi Dario,

 Hey,

 2015-03-16 13:05 GMT-04:00 Dario Faggioli dario.faggi...@citrix.com:

 
  This change also takes the chance to add a scratch
  cpumask, to avoid having to create one more
  cpumask_var_t on the stack of the dumping routine.

 Actually, I have a question about the strength of this design. When we
 have a machine with many cpus, we will end up with allocating a
 cpumask for each cpu.

 Just FTR, what we will end up allocating is:
  - an array of *pointers* to cpumasks with as many elements as the
number of pCPUs,
  - a cpumask *only* for the pCPUs subjected to an instance of the RTDS
scheduler.

 So, for instance, if you have 64 pCPUs, but are using the RTDS scheduler
 only in a cpupool with 2 pCPUs, you'll have an array of 64 pointers to
 cpumask_t, but only 2 actual cpumasks.

 Is this better than having a cpumask_var_t on
 the stack of the dumping routine, since the dumping routine is not in
 the hot path?

 George and Jan replied to this already, I think. Allow me to add just a
 few words:
  Such scratch area can be used to kill most of the
  cpumasks_var_t local variables in other functions
  in the file, but that is *NOT* done in this chage.
 
 This is the point, actually! As said here, this is not only for the sake
 of the dumping routine. In fact, ideally, someone will, in the near
 future, go throughout the whole file and kill most of the cpumask_t
 local variables, and most of the cpumask dynamic allocations, in favour
 of using this scratch area.

  @@ -409,6 +423,10 @@ rt_init(struct scheduler *ops)
   if ( prv == NULL )
   return -ENOMEM;
 
  +_cpumask_scratch = xmalloc_array(cpumask_var_t, nr_cpu_ids);

 Is it better to use xzalloc_array?

 Why? IMO, not really. I'm only free()-ing (in rt_free_pdata()) the
 elements of the array that have been previously successfully allocated
 (in rt_alloc_pdata()), so I don't think there is any special requirement
 for all the elements to be NULL right away.

OK. I see.


  +if ( _cpumask_scratch == NULL )
  +return -ENOMEM;
  +
   spin_lock_init(prv-lock);
   INIT_LIST_HEAD(prv-sdom);
   INIT_LIST_HEAD(prv-runq);
  @@ -426,6 +444,7 @@ rt_deinit(const struct scheduler *ops)
   {
   struct rt_private *prv = rt_priv(ops);
 
  +xfree(_cpumask_scratch);
   xfree(prv);
   }
 
  @@ -443,6 +462,9 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu)
   per_cpu(schedule_data, cpu).schedule_lock = prv-lock;
   spin_unlock_irqrestore(prv-lock, flags);
 
  +if ( !alloc_cpumask_var(_cpumask_scratch[cpu]) )

 Is it better to use zalloc_cpumask_var() here?

 Nope. It's a scratch area, after all, so one really should not assume it
 to be in a specific state (e.g., no bits set as you're suggesting) when
 using it.

I see the point. Now I got it. :-)


 Thanks and Regards,

Thank you very much for clarification! :-)

Best,

Meng

---
Meng Xu
PhD Student in Computer and Information Science
University of Pennsylvania

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v2 5/5] xen: sched_rt: print useful affinity info when dumping

2015-03-17 Thread Meng Xu

2015-03-17 11:33 GMT-04:00 Dario Faggioli dario.faggi...@citrix.com:
 In fact, printing the cpupool's CPU online mask
 for each vCPU is just redundant, as that is the
 same for all the vCPUs of all the domains in the
 same cpupool, while hard affinity is already part
 of the output of dumping domains info.

 Instead, print the intersection between hard
 affinity and online CPUs, which is --in case of this
 scheduler-- the effective affinity always used for
 the vCPUs.

 This change also takes the chance to add a scratch
 cpumask area, to avoid having to either put one
 (more) cpumask_t on the stack, or dynamically
 allocate it within the dumping routine. (The former
 being bad because hypervisor stack size is limited,
 the latter because dynamic allocations can fail, if
 the hypervisor was built for a large enough number
 of CPUs.)

 Such scratch area can be used to kill most of the
 cpumasks{_var}_t local variables in other functions
 in the file, but that is *NOT* done in this chage.

 Finally, convert the file to use keyhandler scratch,
 instead of open coded string buffers.

 Signed-off-by: Dario Faggioli dario.faggi...@citrix.com
 Cc: George Dunlap george.dun...@eu.citrix.com
 Cc: Meng Xu xumengpa...@gmail.com
 Cc: Jan Beulich jbeul...@suse.com
 Cc: Keir Fraser k...@xen.org
 ---
 Changes from v1:
  * improved changelog;
  * made a local variable to point to the correct
scratch mask, as suggested during review.
 ---

Reviewed-by: Meng Xu men...@cis.upenn.edu

Thanks,

Best,

Meng

---
Meng Xu
PhD Student in Computer and Information Science
University of Pennsylvania

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [RFC PATCH] dpci: Put the dpci back on the list if running on another CPU.

2015-03-17 Thread Sander Eikelenboom


Tuesday, March 17, 2015, 6:44:54 PM, you wrote:


  Additionally I think it should be considered whether the bitmap
  approach of interpreting -state is the right one, and we don't
  instead want a clean 3-state (idle, sched, run) model.
  
  Could you elaborate a bit more please? As in three different unsigned int
  (or bool_t) that set in what state we are in?
 
 An enum { STATE_IDLE, STATE_SCHED, STATE_RUN }. Especially
 if my comment above turns out to be wrong, you'd have no real
 need for the SCHED and RUN flags to be set at the same time.

 I cobbled what I believe is what you were thinking off.

 As you can see to preserve the existing functionality such as
 being able to schedule N amount of interrupt injections
 for the N interrupts we might get - I modified '-masked'
 to be an atomic counter.

 The end result is that we can still live-lock. Unless we:
  - Drop on the floor the injection of N interrupts and
just deliever at max one per VMX_EXIT (and not bother
with interrupts arriving when we are in the VMX handler).

  - Alter the softirq code slightly - to have an variant
which will only iterate once over the pending softirq
bits per call. (so save an copy of the bitmap on the
stack when entering the softirq handler - and use that.
We could also xor it against the current to catch any
non-duplicate bits being set that we should deal with).

 Here is the compile, but not run-time tested patch.

 From e7d8bcd7c5d32c520554a4ad69c4716246036002 Mon Sep 17 00:00:00 2001
 From: Konrad Rzeszutek Wilk konrad.w...@oracle.com
 Date: Tue, 17 Mar 2015 13:31:52 -0400
 Subject: [RFC PATCH] dpci: Switch to tristate instead of bitmap

 *TODO*:
  - Writeup.
  - Tests

Done, and unfortunately it doesn't fly ..
Some devices seem to work fine, others don't receive any interrupts shortly 
after boot like:
 40:  3  0  0  0  xen-pirq-ioapic-level  
cx25821[1]

Don't see any crashes or errors though, so it seems to silently lock somewhere.

--
Sander

 Suggested-by: Jan Beulich jbuel...@suse.com
 Signed-off-by: Konrad Rzeszutek Wilk konrad.w...@oracle.com
 ---
  xen/drivers/passthrough/io.c | 140 
 ---
  xen/include/xen/hvm/irq.h|   4 +-
  2 files changed, 82 insertions(+), 62 deletions(-)

 diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
 index ae050df..663e104 100644
 --- a/xen/drivers/passthrough/io.c
 +++ b/xen/drivers/passthrough/io.c
 @@ -30,42 +30,28 @@
  static DEFINE_PER_CPU(struct list_head, dpci_list);
  
  /*
 - * These two bit states help to safely schedule, deschedule, and wait until
 - * the softirq has finished.
 - *
 - * The semantics behind these two bits is as follow:
 - *  - STATE_SCHED - whoever modifies it has to ref-count the domain (-dom).
 - *  - STATE_RUN - only softirq is allowed to set and clear it. If it has
 - *  been set hvm_dirq_assist will RUN with a saved value of the
 - *  'struct domain' copied from 'pirq_dpci-dom' before STATE_RUN was 
 set.
 - *
 - * The usual states are: STATE_SCHED(set) - STATE_RUN(set) -
 - * STATE_SCHED(unset) - STATE_RUN(unset).
 - *
 - * However the states can also diverge such as: STATE_SCHED(set) -
 - * STATE_SCHED(unset) - STATE_RUN(set) - STATE_RUN(unset). That means
 - * the 'hvm_dirq_assist' never run and that the softirq did not do any
 - * ref-counting.
 - */
 -
 -enum {
 -STATE_SCHED,
 -STATE_RUN
 -};
 -
 -/*
   * This can be called multiple times, but the softirq is only raised once.
 - * That is until the STATE_SCHED state has been cleared. The state can be
 - * cleared by: the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'),
 - * or by 'pt_pirq_softirq_reset' (which will try to clear the state before
 + * That is until state is in init. The state can be changed  by:
 + * the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'),
 + * or by 'pt_pirq_softirq_reset' (which will try to init the state before
   * the softirq had a chance to run).
   */
  static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
  {
  unsigned long flags;
  
 -if ( test_and_set_bit(STATE_SCHED, pirq_dpci-state) )
 +switch ( cmpxchg(pirq_dpci-state, STATE_INIT, STATE_SCHED) )
 +{
 +case STATE_RUN:
 +case STATE_SCHED:
 +/*
 + * The pirq_dpci-mapping has been incremented to let us know
 + * how many we have left to do.
 + */
  return;
 +case STATE_INIT:
 +break;
 +}
  
  get_knownalive_domain(pirq_dpci-dom);
  
 @@ -85,7 +71,7 @@ static void raise_softirq_for(struct hvm_pirq_dpci 
 *pirq_dpci)
   */
  bool_t pt_pirq_softirq_active(struct hvm_pirq_dpci *pirq_dpci)
  {
 -if ( pirq_dpci-state  ((1  STATE_RUN) | (1  STATE_SCHED)) )
 +if ( pirq_dpci-state != STATE_INIT )
  return 1;
  
  /*
 @@ -109,22 +95,22 @@ static void pt_pirq_softirq_reset(struct hvm_pirq_dpci 
 *pirq_dpci)

Re: [Xen-devel] Any work on sharing of large multi-page segments?

2015-03-17 Thread Andrew Warkentin

On 3/17/15, Jan Beulich jbeul...@suse.com wrote:
 And how would that be significantly different from the batching
 that's already built into the grant table hypercall?

I guess it does do more or less what I want already. I was looking
more at the inner mapping/unmapping functions, rather than the
wrappers around them that implement the actual hypercalls.

What would be a useful addition would be support for granting 2M
pages. That would eliminate any problem with running out of grant
table slots.

On 3/17/15, George Dunlap george.dun...@eu.citrix.com wrote:
 Any deduplication code would run in as a process probably in domain 0,
 and may be somewhat slow; but the actual mechanism of sharing is a
 generic mechanism in the hypervisor which any client can use.  Jan is
 suggesting that you might be able to use that interface to
 pro-actively tell Xen about the memory pages shared between your
 various domains.


I wasn't quite sure if it's generic enough to use to implement shared
segments, or if it were specific to deduplication at the hypervisor
level.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [PATCH v3 0/3] Xen/FLASK policy updates for device contexts

2015-03-17 Thread Daniel De Graaf

In order to support assigning security lables to ARM device tree nodes
in Xen's XSM policy, a new ocontext type is needed in the security
policy.

In addition to adding the new ocontext, the existing I/O memory range
ocontext is expanded to 64 bits in order to support hardware with more
than 44 bits of physical address space (32-bit count of 4K pages).

Changes from v2:
 - Clean up printf format strings for 32-bit builds

Changes from v1:
 - Use policy version 30 instead of forking the version numbers for Xen;
   this removes the need for v1's patch 3.
 - Report an error when attempting to use an I/O memory range that
   requires a 64-bit representation with an old policy output version
   that cannot support this
 - Fix a few incorrect references to PCIDEVICECON
 - Reorder patches to clarify the allowed characterset of device tree
   paths

[PATCH 1/3] checkpolicy: Expand allowed character set in paths
[PATCH 2/3] libsepol, checkpolicy: widen Xen IOMEM ocontext entries
[PATCH 3/3] libsepol, checkpolicy: add device tree ocontext nodes to

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

[Xen-devel] [PATCH 3/3] libsepol, checkpolicy: add device tree ocontext nodes to Xen policy

2015-03-17 Thread Daniel De Graaf

In Xen on ARM, device tree nodes identified by a path (string) need to
be labeled by the security policy.

Signed-off-by: Daniel De Graaf dgde...@tycho.nsa.gov
---
 checkpolicy/policy_define.c| 55 +
 checkpolicy/policy_define.h|  1 +
 checkpolicy/policy_parse.y |  8 +++-
 checkpolicy/policy_scan.l  |  2 +
 libsepol/cil/src/cil.c | 17 
 libsepol/cil/src/cil_binary.c  | 29 +
 libsepol/cil/src/cil_build_ast.c   | 66 ++
 libsepol/cil/src/cil_build_ast.h   |  2 +
 libsepol/cil/src/cil_copy_ast.c| 24 +++
 libsepol/cil/src/cil_flavor.h  |  1 +
 libsepol/cil/src/cil_internal.h| 10 +
 libsepol/cil/src/cil_post.c| 34 +++
 libsepol/cil/src/cil_reset_ast.c   | 10 +
 libsepol/cil/src/cil_resolve_ast.c | 28 +
 libsepol/cil/src/cil_tree.c| 13 ++
 libsepol/cil/src/cil_verify.c  | 24 +++
 libsepol/include/sepol/policydb/policydb.h |  1 +
 libsepol/src/expand.c  |  7 
 libsepol/src/policydb.c| 18 +++-
 libsepol/src/write.c   | 14 ++-
 sepolgen/src/sepolgen/refparser.py | 11 +
 sepolgen/src/sepolgen/refpolicy.py |  9 
 22 files changed, 379 insertions(+), 5 deletions(-)

diff --git a/checkpolicy/policy_define.c b/checkpolicy/policy_define.c
index 66c1ff2..de01f6f 100644
--- a/checkpolicy/policy_define.c
+++ b/checkpolicy/policy_define.c
@@ -4116,6 +4116,61 @@ bad:
return -1;
 }
 
+int define_devicetree_context()
+{
+   ocontext_t *newc, *c, *l, *head;
+
+   if (policydbp-target_platform != SEPOL_TARGET_XEN) {
+   yyerror(devicetreecon not supported for target);
+   return -1;
+   }
+
+   if (pass == 1) {
+   free(queue_remove(id_queue));
+   parse_security_context(NULL);
+   return 0;
+   }
+
+   newc = malloc(sizeof(ocontext_t));
+   if (!newc) {
+   yyerror(out of memory);
+   return -1;
+   }
+   memset(newc, 0, sizeof(ocontext_t));
+
+   newc-u.name = (char *)queue_remove(id_queue);
+   if (!newc-u.name) {
+   free(newc);
+   return -1;
+   }
+
+   if (parse_security_context(newc-context[0])) {
+   free(newc-u.name);
+   free(newc);
+   return -1;
+   }
+
+   head = policydbp-ocontexts[OCON_XEN_DEVICETREE];
+   for (l = NULL, c = head; c; l = c, c = c-next) {
+   if (strcmp(newc-u.name, c-u.name) == 0) {
+   yyerror2(duplicate devicetree entry for '%s', 
newc-u.name);
+   goto bad;
+   }
+   }
+
+   if (l)
+   l-next = newc;
+   else
+   policydbp-ocontexts[OCON_XEN_DEVICETREE] = newc;
+
+   return 0;
+
+bad:
+   free(newc-u.name);
+   free(newc);
+   return -1;
+}
+
 int define_port_context(unsigned int low, unsigned int high)
 {
ocontext_t *newc, *c, *l, *head;
diff --git a/checkpolicy/policy_define.h b/checkpolicy/policy_define.h
index 14d30e1..a87ced3 100644
--- a/checkpolicy/policy_define.h
+++ b/checkpolicy/policy_define.h
@@ -49,6 +49,7 @@ int define_pirq_context(unsigned int pirq);
 int define_iomem_context(uint64_t low, uint64_t high);
 int define_ioport_context(unsigned long low, unsigned long high);
 int define_pcidevice_context(unsigned long device);
+int define_devicetree_context(void);
 int define_range_trans(int class_specified);
 int define_role_allow(void);
 int define_role_trans(int class_specified);
diff --git a/checkpolicy/policy_parse.y b/checkpolicy/policy_parse.y
index e3899b9..8b81f04 100644
--- a/checkpolicy/policy_parse.y
+++ b/checkpolicy/policy_parse.y
@@ -130,7 +130,7 @@ typedef int (* require_func_t)(int pass);
 %token TARGET
 %token SAMEUSER
 %token FSCON PORTCON NETIFCON NODECON 
-%token PIRQCON IOMEMCON IOPORTCON PCIDEVICECON
+%token PIRQCON IOMEMCON IOPORTCON PCIDEVICECON DEVICETREECON
 %token FSUSEXATTR FSUSETASK FSUSETRANS
 %token GENFSCON
 %token U1 U2 U3 R1 R2 R3 T1 T2 T3 L1 L2 H1 H2
@@ -644,7 +644,8 @@ dev_contexts: dev_context_def
 dev_context_def: pirq_context_def |
  iomem_context_def |
  ioport_context_def |
- pci_context_def
+ pci_context_def |
+ dtree_context_def
;
 pirq_context_def   : PIRQCON number security_context_def
{if (define_pirq_context($2)) return -1;}
@@ -662,6 +663,9 @@ ioport_context_def  : IOPORTCON number security_context_def
 pci_context_def: PCIDEVICECON number security_context_def

[Xen-devel] [PATCH 1/3] checkpolicy: Expand allowed character set in paths

2015-03-17 Thread Daniel De Graaf

In order to support paths containing spaces or other characters, allow a
quoted string with these characters to be parsed as a path in addition
to the existing unquoted string.

Signed-off-by: Daniel De Graaf dgde...@tycho.nsa.gov
---
 checkpolicy/policy_parse.y | 3 +++
 checkpolicy/policy_scan.l  | 1 +
 2 files changed, 4 insertions(+)

diff --git a/checkpolicy/policy_parse.y b/checkpolicy/policy_parse.y
index 15c8997..e5210bd 100644
--- a/checkpolicy/policy_parse.y
+++ b/checkpolicy/policy_parse.y
@@ -81,6 +81,7 @@ typedef int (* require_func_t)(int pass);
 %type require_func require_decl_def
 
 %token PATH
+%token QPATH
 %token FILENAME
 %token CLONE
 %token COMMON
@@ -805,6 +806,8 @@ filesystem  : FILESYSTEM
 ;
 path   : PATH
{ if (insert_id(yytext,0)) return -1; }
+   | QPATH
+   { yytext[strlen(yytext) - 1] = '\0'; if 
(insert_id(yytext + 1,0)) return -1; }
;
 filename   : FILENAME
{ yytext[strlen(yytext) - 1] = '\0'; if 
(insert_id(yytext + 1,0)) return -1; }
diff --git a/checkpolicy/policy_scan.l b/checkpolicy/policy_scan.l
index 648e1d6..6763c38 100644
--- a/checkpolicy/policy_scan.l
+++ b/checkpolicy/policy_scan.l
@@ -240,6 +240,7 @@ HIGH{ return(HIGH); }
 low |
 LOW{ return(LOW); }
 /({alnum}|[_\.\-/])* { return(PATH); }
+\/[ !#-~]*\{ return(QPATH); }
 \({alnum}|[_\.\-\+\~\: ])+\  { return(FILENAME); }
 {letter}({alnum}|[_\-])*([\.]?({alnum}|[_\-]))*{ return(IDENTIFIER); }
 {digit}+|0x{hexval}+{ return(NUMBER); }
-- 
2.1.0


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] dpci: Put the dpci back on the list if scheduled from another CPU.

2015-03-17 Thread Konrad Rzeszutek Wilk

On Tue, Mar 17, 2015 at 04:06:14PM +, Jan Beulich wrote:
  On 17.03.15 at 16:38, konrad.w...@oracle.com wrote:
  --- a/xen/drivers/passthrough/io.c
  +++ b/xen/drivers/passthrough/io.c
  @@ -804,7 +804,17 @@ static void dpci_softirq(void)
   d = pirq_dpci-dom;
   smp_mb(); /* 'd' MUST be saved before we set/clear the bits. */
   if ( test_and_set_bit(STATE_RUN, pirq_dpci-state) )
  -BUG();
  +{
  +unsigned long flags;
  +
  +/* Put back on the list and retry. */
  +local_irq_save(flags);
  +list_add_tail(pirq_dpci-softirq_list, this_cpu(dpci_list));
  +local_irq_restore(flags);
  +
  +raise_softirq(HVM_DPCI_SOFTIRQ);
  +continue;
  +}
 
 As just said in another mail - unless there are convincing new
 arguments in favor of this (more of a hack than a real fix), I'm
 not going to accept it and instead consider reverting the
 offending commit. Iirc the latest we had come to looked quite a
 bit better than this one.

The latest one (please see attached) would cause an dead-lock iff
on the CPU we are running the softirq and an do_IRQ comes for the
exact dpci we are in process of executing.

 
 Jan
 
From 6b32dccfbe00518d3ca9cd94d19a6e007b2645d9 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk konrad.w...@oracle.com
Date: Tue, 17 Mar 2015 09:46:09 -0400
Subject: [PATCH] dpci: when scheduling spin until STATE_RUN or STATE_SCHED has
 been cleared.

There is race when we clear the STATE_SCHED in the softirq
- which allows the 'raise_softirq_for' (on another CPU)
to schedule the dpci.

Specifically this can happen whenthe other CPU receives
an interrupt, calls 'raise_softirq_for', and puts the dpci
on its per-cpu list (same dpci structure).

There would be two 'dpci_softirq' running at the same time
(on different CPUs) where on one CPU it would be executing
hvm_dirq_assist (so had cleared STATE_SCHED and set STATE_RUN)
and on the other CPU it is trying to call:

   if ( test_and_set_bit(STATE_RUN, pirq_dpci-state) )
BUG();

Since STATE_RUN is already set it would end badly.

The reason we can get his with this is when an interrupt
affinity is set over multiple CPUs.

Potential solutions:

a) Instead of the BUG() we can put the dpci back on the per-cpu
list to deal with later (when the softirq are activated again).
This putting the 'dpci' back on the per-cpu list is an spin
until the bad condition clears.

b) We could also expand the test-and-set(STATE_SCHED) in raise_softirq_for
to detect for 'STATE_RUN' bit being set and schedule the dpci
in a more safe manner (delay it). The dpci would stil not
be scheduled when STATE_SCHED bit was set.

c) This patch explores a third option - we will only schedule
the dpci when the state is cleared (no STATE_SCHED and no STATE_RUN).

We will spin if STATE_RUN is set (as it is in progress and will
finish). If the STATE_SCHED is set (so hasn't run yet) we won't
try to spin and just exit. This can cause an dead-lock if the interrupt
comes when we are processing the dpci in the softirq.

Interestingly the old ('tasklet') code used an a) mechanism.
If the function assigned to the tasklet was running  - the softirq
that ran said function (hvm_dirq_assist) would be responsible for
putting the tasklet back on the per-cpu list. This would allow
to have an running tasklet and an 'to-be-scheduled' tasklet
at the same time. This solution moves this 'to-be-scheduled'
job to be done in 'raise_softirq_for' (instead of the
'softirq').

Reported-by: Sander Eikelenboom li...@eikelenboom.it
Reported-by: Malcolm Crossley malcolm.cross...@citrix.com
Signed-off-by: Konrad Rzeszutek Wilk konrad.w...@oracle.com
---
 xen/drivers/passthrough/io.c | 28 +---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index ae050df..9c30ebb 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -63,10 +63,32 @@ enum {
 static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
 {
 unsigned long flags;
+unsigned long old;
 
-if ( test_and_set_bit(STATE_SCHED, pirq_dpci-state) )
-return;
-
+/*
+ * This cmpxchg spins until the state is zero (unused).
+ */
+for ( ;; )
+{
+old = cmpxchg(pirq_dpci-state, 0, 1  STATE_SCHED);
+switch ( old )
+{
+case (1  STATE_SCHED):
+/*
+ * Whenever STATE_SCHED is set we MUST not schedule it.
+ */
+return;
+case (1  STATE_RUN) | (1  STATE_SCHED):
+case (1  STATE_RUN):
+/* Getting close to finish. Spin. */
+continue;
+}
+/*
+ * If the 'state' is 0 (not in use) we can schedule it.
+ */
+if ( old == 0 )
+break;
+}
 get_knownalive_domain(pirq_dpci-dom);
 
 local_irq_save(flags);
-- 
2.1.0

[Xen-devel] [PATCH 2/3] libsepol, checkpolicy: widen Xen IOMEM ocontext entries

2015-03-17 Thread Daniel De Graaf

This expands IOMEMCON device context entries to 64 bits.  This change is
required to support static I/O memory range labeling for systems with
over 16TB of physical address space.  The policy version number change
is shared with the next patch.

While this makes no changes to SELinux policy, a new SELinux policy
compatibility entry was added in order to avoid breaking compilation of
an SELinux policy without explicitly specifying the policy version.

Signed-off-by: Daniel De Graaf dgde...@tycho.nsa.gov
---
 checkpolicy/policy_define.c| 11 +-
 checkpolicy/policy_define.h|  2 +-
 checkpolicy/policy_parse.y |  9 ++--
 libsepol/cil/src/cil_build_ast.c   | 32 ++---
 libsepol/cil/src/cil_build_ast.h   |  1 +
 libsepol/cil/src/cil_internal.h|  4 ++--
 libsepol/cil/src/cil_policy.c  |  3 ++-
 libsepol/cil/src/cil_tree.c|  3 ++-
 libsepol/include/sepol/policydb/policydb.h |  7 ---
 libsepol/src/policydb.c| 33 +-
 libsepol/src/write.c   | 32 ++---
 policycoreutils/hll/pp/pp.c|  4 ++--
 12 files changed, 109 insertions(+), 32 deletions(-)

diff --git a/checkpolicy/policy_define.c b/checkpolicy/policy_define.c
index a6c5d65..66c1ff2 100644
--- a/checkpolicy/policy_define.c
+++ b/checkpolicy/policy_define.c
@@ -39,6 +39,7 @@
 #include arpa/inet.h
 #include stdlib.h
 #include limits.h
+#include inttypes.h
 
 #include sepol/policydb/expand.h
 #include sepol/policydb/policydb.h
@@ -3932,7 +3933,7 @@ bad:
return -1;
 }
 
-int define_iomem_context(unsigned long low, unsigned long high)
+int define_iomem_context(uint64_t low, uint64_t high)
 {
ocontext_t *newc, *c, *l, *head;
char *id;
@@ -3960,7 +3961,7 @@ int define_iomem_context(unsigned long low, unsigned long 
high)
newc-u.iomem.high_iomem = high;
 
if (low  high) {
-   yyerror2(low memory 0x%lx exceeds high memory 0x%lx, low, 
high);
+   yyerror2(low memory 0x%PRIx64 exceeds high memory 
0x%PRIx64, low, high);
free(newc);
return -1;
}
@@ -3972,13 +3973,13 @@ int define_iomem_context(unsigned long low, unsigned 
long high)
 
head = policydbp-ocontexts[OCON_XEN_IOMEM];
for (l = NULL, c = head; c; l = c, c = c-next) {
-   uint32_t low2, high2;
+   uint64_t low2, high2;
 
low2 = c-u.iomem.low_iomem;
high2 = c-u.iomem.high_iomem;
if (low = high2  low2 = high) {
-   yyerror2(iomemcon entry for 0x%lx-0x%lx overlaps with 
-   earlier entry 0x%x-0x%x, low, high,
+   yyerror2(iomemcon entry for 0x%PRIx64-0x%PRIx64 
overlaps with 
+   earlier entry 0x%PRIx64-0x%PRIx64, low, 
high,
low2, high2);
goto bad;
}
diff --git a/checkpolicy/policy_define.h b/checkpolicy/policy_define.h
index 4ef0f4f..14d30e1 100644
--- a/checkpolicy/policy_define.h
+++ b/checkpolicy/policy_define.h
@@ -46,7 +46,7 @@ int define_permissive(void);
 int define_polcap(void);
 int define_port_context(unsigned int low, unsigned int high);
 int define_pirq_context(unsigned int pirq);
-int define_iomem_context(unsigned long low, unsigned long high);
+int define_iomem_context(uint64_t low, uint64_t high);
 int define_ioport_context(unsigned long low, unsigned long high);
 int define_pcidevice_context(unsigned long device);
 int define_range_trans(int class_specified);
diff --git a/checkpolicy/policy_parse.y b/checkpolicy/policy_parse.y
index e5210bd..e3899b9 100644
--- a/checkpolicy/policy_parse.y
+++ b/checkpolicy/policy_parse.y
@@ -67,6 +67,7 @@ typedef int (* require_func_t)(int pass);
 
 %union {
unsigned int val;
+   uint64_t val64;
uintptr_t valptr;
void *ptr;
 require_func_t require_func;
@@ -78,6 +79,7 @@ typedef int (* require_func_t)(int pass);
 %type ptr role_def roles
 %type valptr cexpr cexpr_prim op role_mls_op
 %type val ipv4_addr_def number
+%type val64 number64
 %type require_func require_decl_def
 
 %token PATH
@@ -647,9 +649,9 @@ dev_context_def : pirq_context_def |
 pirq_context_def   : PIRQCON number security_context_def
{if (define_pirq_context($2)) return -1;}
;
-iomem_context_def  : IOMEMCON number security_context_def
+iomem_context_def  : IOMEMCON number64 security_context_def
{if (define_iomem_context($2,$2)) return -1;}
-   | IOMEMCON number '-' number security_context_def
+   | IOMEMCON number64 '-' number64 security_context_def
{if (define_iomem_context($2,$4)) return -1;}

Re: [Xen-devel] [PATCH 3/7] xen: rework locking for dump of scheduler info (debug-key r)

2015-03-17 Thread Dario Faggioli

On Tue, 2015-03-17 at 10:54 +, Jan Beulich wrote:
  On 16.03.15 at 18:05, dario.faggi...@citrix.com wrote:
  such as it is taken care of by the various schedulers, rather
  than happening in schedule.c. In fact, it is the schedulers
  that know better which locks are necessary for the specific
  dumping operations.
  
  While there, fix a few style issues (indentation, trailing
  whitespace, parentheses and blank line after var declarations)
  
  Signed-off-by: Dario Faggioli dario.faggi...@citrix.com
  Cc: George Dunlap george.dun...@eu.citrix.com
  Cc: Meng Xu xumengpa...@gmail.com
  Cc: Jan Beulich jbeul...@suse.com
  Cc: Keir Fraser k...@xen.org
  ---
   xen/common/sched_credit.c  |   42 
  --
   xen/common/sched_credit2.c |   40 
   xen/common/sched_rt.c  |7 +--
   xen/common/schedule.c  |5 ++---
   4 files changed, 79 insertions(+), 15 deletions(-)
 
 Is it really correct that sched_sedf.c doesn't need any change here?
 
Good point.

I feel like mentioning though that SEDF is broken in many ways, and I
(and I suspect George is in a similar situation) really don't have the
bandwidth to fix it. Therefore, I really think we should start a
discussion on how to deprecate/get rid of it.

That being said, this is simple enough a case that I certainly can have
a quick look.

  --- a/xen/common/sched_credit.c
  +++ b/xen/common/sched_credit.c
  @@ -26,6 +26,23 @@
   
   
   /*
  + * Locking:
  + * - Scheduler-lock (a.k.a. runqueue lock):
  + *  + is per-runqueue, and there is one runqueue per-cpu;
  + *  + serializes all runqueue manipulation operations;
  + * - Private data lock (a.k.a. private scheduler lock):
  + *  + serializes accesses to the scheduler global state (weight,
  + *credit, balance_credit, etc);
  + *  + serializes updates to the domains' scheduling parameters.
  + *
  + * Ordering is private lock always comes first:
  + *  + if we need both locks, we must acquire the private
  + *scheduler lock for first;
  + *  + if we already own a runqueue lock, we must never acquire
  + *the private scheduler lock.
  + */
 
 And this is Credit1 specific?
 
It is similar (although with some differences) in Credit2, but there is
already a comment there, similar to this, stating it. RTDS, as of now,
only use _one_ lock, so not much to say about ordering. :-D

 Regardless of that, even if that's just reflecting current state, isn't
 acquiring a private lock around a generic lock backwards?
 
It sounds like so, I agree, when describing it in English. But then,
looking at the code, things make sense, IMO.

 Finally, as said in different contexts earlier, I think unconditionally
 acquiring locks in dumping routines isn't the best practice. At least
 in non-debug builds I think these should be try-locks only, skipping
 the dumping when a lock is busy.
 
That makes sense. It also looks, if not completely orthogonal, something
that can be done in a follow-up patch wrt this series. I.e., we first
move the locking logic where it belongs, and then we rework it toward
using _trylock(), does this make sense?

Regards,
Dario


signature.asc
Description: This is a digitally signed message part
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [RFC PATCH] xen-netback: making the bandwidth limiter runtime settable

2015-03-17 Thread Wei Liu

On Fri, Mar 13, 2015 at 01:51:05PM +0100, Imre Palik wrote:
 From: Palik, Imre im...@amazon.de
 
 With the current netback, the bandwidth limiter's parameters are only
 settable during vif setup time.  This patch register a watch on them, and
 thus makes them runtime changeable.
 
 When the watch fires, the timer is reset.  The timer's mutex is used for
 fencing the change.
 

I think this is a valid idea.  Just that this commit message is not
complete. It doesn't describe everything this patch does.

 Cc: Anthony Liguori aligu...@amazon.com
 Signed-off-by: Imre Palik im...@amazon.de
 ---
[...]
   queue-rx_queue_max = XENVIF_RX_QUEUE_BYTES;
 diff --git a/drivers/net/xen-netback/netback.c 
 b/drivers/net/xen-netback/netback.c
 index cab9f52..bcc1880 100644
 --- a/drivers/net/xen-netback/netback.c
 +++ b/drivers/net/xen-netback/netback.c
 @@ -641,7 +641,7 @@ static void tx_add_credit(struct xenvif_queue *queue)
   queue-remaining_credit = min(max_credit, max_burst);
  }
  
 -static void tx_credit_callback(unsigned long data)
 +void xenvif_tx_credit_callback(unsigned long data)

Please keep this function static.

And say in the commit message you change tx_credit_callback to a better
name.

  {
   struct xenvif_queue *queue = (struct xenvif_queue *)data;
   tx_add_credit(queue);
 @@ -1170,7 +1170,7 @@ static bool tx_credit_exceeded(struct xenvif_queue 
 *queue, unsigned size)
   queue-credit_timeout.data =
   (unsigned long)queue;
   queue-credit_timeout.function =
 - tx_credit_callback;
 + xenvif_tx_credit_callback;
   mod_timer(queue-credit_timeout,
 next_credit);
[...]
 @@ -594,13 +597,9 @@ static void xen_net_read_rate(struct xenbus_device *dev,
   unsigned long b, u;
   char *ratestr;
  
 - /* Default to unlimited bandwidth. */
 - *bytes = ~0UL;
 - *usec = 0;
 -
   ratestr = xenbus_read(XBT_NIL, dev-nodename, rate, NULL);
   if (IS_ERR(ratestr))
 - return;
 + goto reset;
  
   s = ratestr;
   b = simple_strtoul(s, e, 10);
 @@ -612,15 +611,21 @@ static void xen_net_read_rate(struct xenbus_device *dev,
   if ((s == e) || (*e != '\0'))
   goto fail;
  
 + kfree(ratestr);
 +
   *bytes = b;
   *usec = u;
  
 - kfree(ratestr);
   return;
  
 - fail:
 +fail:
   pr_warn(Failed to parse network rate limit. Traffic unlimited.\n);
   kfree(ratestr);
 +
 +reset:
 + /* Default to unlimited bandwidth. */
 + *bytes = ~0UL;
 + *usec = 0;
  }
  

Any reason you modify this function? It is still doing the exact same
thing, right?

  static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
 @@ -645,6 +650,59 @@ static int xen_net_read_mac(struct xenbus_device *dev, 
 u8 mac[])
   return 0;
  }
  
 +static void xen_net_rate_changed(struct xenbus_watch *watch,
 + const char **vec, unsigned int len)
 +{
 + struct xenvif *vif = container_of(watch, struct xenvif, credit_watch);
 + struct xenbus_device *dev = xenvif_to_xenbus_device(vif);
 + unsigned long   credit_bytes;
 + unsigned long   credit_usec;
 + unsigned int queue_index;
 +
 + xen_net_read_rate(dev, credit_bytes, credit_usec);
 + for (queue_index = 0; queue_index  vif-num_queues; queue_index++) {
 + struct xenvif_queue *queue = vif-queues[queue_index];
 +
 + queue-credit_bytes = credit_bytes;
 + queue-credit_usec = credit_usec;
 + if (!mod_timer_pending(queue-credit_timeout, jiffies) 
 + queue-remaining_credit  queue-credit_bytes) {
 + queue-remaining_credit = queue-credit_bytes;
 + }
 + }
 +}
 +
 +static int xen_register_watchers(struct xenbus_device *dev, struct xenvif 
 *vif)
 +{
 + int err = 0;
 + char *node;
 + unsigned maxlen = strlen(dev-nodename) + sizeof(/rate);
 +
 + node = kmalloc(maxlen, GFP_KERNEL);
 + if (!node)
 + return -ENOMEM;
 + sprintf(node, %s/rate, dev-nodename);

Please use snprintf. (Though I can see using sprintf is fine here but I
want the code to be a bit future proof.)

 + vif-credit_watch.node = node;
 + vif-credit_watch.callback = xen_net_rate_changed;
 + err = register_xenbus_watch(vif-credit_watch);
 + if (err) {
 + pr_err(Failed to set watcher %s\n, vif-credit_watch.node);
 + kfree(node);
 + vif-credit_watch.node = 0;
 + vif-credit_watch.callback = 0;

Please use NULL here.

Wei.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [RFC PATCH] xen-netback: making the bandwidth limiter runtime settable

2015-03-17 Thread Ian Campbell

On Fri, 2015-03-13 at 13:51 +0100, Imre Palik wrote:
[...]
 diff --git a/drivers/net/xen-netback/interface.c 
 b/drivers/net/xen-netback/interface.c
 index 3aa8648..34d8038 100644
 --- a/drivers/net/xen-netback/interface.c
 +++ b/drivers/net/xen-netback/interface.c
 @@ -463,6 +463,7 @@ int xenvif_init_queue(struct xenvif_queue *queue)
   queue-credit_bytes = queue-remaining_credit = ~0UL;
   queue-credit_usec  = 0UL;
   init_timer(queue-credit_timeout);
 + queue-credit_timeout.function = xenvif_tx_credit_callback;
   queue-credit_window_start = get_jiffies_64();
  
   queue-rx_queue_max = XENVIF_RX_QUEUE_BYTES;
 diff --git a/drivers/net/xen-netback/netback.c 
 b/drivers/net/xen-netback/netback.c
 index cab9f52..bcc1880 100644
 --- a/drivers/net/xen-netback/netback.c
 +++ b/drivers/net/xen-netback/netback.c
[...]
 @@ -1170,7 +1170,7 @@ static bool tx_credit_exceeded(struct xenvif_queue 
 *queue, unsigned size)
   queue-credit_timeout.data =
   (unsigned long)queue;
   queue-credit_timeout.function =
 - tx_credit_callback;
 + xenvif_tx_credit_callback;

Are this init and the one in xenvif_init_queue really both needed?

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 7/7] xen: sched_rt: print useful affinity info when dumping

2015-03-17 Thread Jan Beulich

 On 17.03.15 at 12:10, george.dun...@eu.citrix.com wrote:
 On 03/16/2015 08:30 PM, Meng Xu wrote:
 2015-03-16 13:05 GMT-04:00 Dario Faggioli dario.faggi...@citrix.com:
 This change also takes the chance to add a scratch
 cpumask, to avoid having to create one more
 cpumask_var_t on the stack of the dumping routine.
 
 Actually, I have a question about the strength of this design. When we
 have a machine with many cpus, we will end up with allocating a
 cpumask for each cpu. Is this better than having a cpumask_var_t on
 the stack of the dumping routine, since the dumping routine is not in
 the hot path?
 
 The reason for taking this off the stack is that the hypervisor stack is
 a fairly limited resource -- IIRC it's only 8k (for each cpu).  If the
 call stack gets too deep, the hypervisor will triple-fault.  Keeping
 really large variables like cpumasks off the stack is key to making sure
 we don't get close to that.

Actually here you talk about cpumask_t-s on the stack.
cpumask_var_t-s aren't a problem stack size wise, but are an
issue due to the need to dynamically allocate them (and the
potential failure thereof) when the hypervisor was built for a
large enough number of CPUs.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 3/7] xen: rework locking for dump of scheduler info (debug-key r)

2015-03-17 Thread Jan Beulich

 On 17.03.15 at 12:32, george.dun...@eu.citrix.com wrote:
 On 03/17/2015 11:25 AM, Jan Beulich wrote:
 On 17.03.15 at 12:05, george.dun...@eu.citrix.com wrote:
 On 03/17/2015 10:54 AM, Jan Beulich wrote:
 Finally, as said in different contexts earlier, I think unconditionally
 acquiring locks in dumping routines isn't the best practice. At least
 in non-debug builds I think these should be try-locks only, skipping
 the dumping when a lock is busy.

 You mean so that we don't block the console if there turns out to be a
 deadlock?
 
 For example. And also to not unduly get in the way of an otherwise
 extremely busy system.
 
 I don't understand this last argument.  If you're using the debug keys,
 you want to know about the state of the system.  I would much rather my
 system ran 25% slower for the 5 seconds the debug key was dumping
 information, and have a complete snapshot of the system, than for it to
 only run 10% slower and to have half the information missing.  The
 upshot of missing information would likely be that I have to press the
 debug key 3-4 times in a row, meaning I'd be running 10% slower for 20
 seconds rather than 25% slower for 5 seconds.

Yes, I understand this, and in many cases this is the perspective to
take. Yet I've been in the situation where suggesting the use of
debug keys to learn something about a (partially) live locked system
would have had the risk of causing further corruption to it, and
hence a more careful state dumping approach would have been
desirable.

 All in all, I don't think the performance of the debug keys should be a
 major concern.  The only thing I'd be worried about is making the system
 as diagnosable as possible if things have already gone pear-shaped
 (e.g., if there's a deadlock).

It's not their performance that's of concern, but the effect they
may have on the performance (or even correctness - see how
many process_pending_softirqs() calls we had to sprinkle around
over the years) of other code.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCHv1] xen/balloon: disable memory hotplug in PV guests

2015-03-17 Thread Daniel Kiper

On Mon, Mar 16, 2015 at 11:31:49AM +0100, Juergen Gross wrote:
 On 03/16/2015 11:03 AM, Daniel Kiper wrote:
 On Mon, Mar 16, 2015 at 06:35:04AM +0100, Juergen Gross wrote:
 On 03/11/2015 04:40 PM, Boris Ostrovsky wrote:
 On 03/11/2015 10:42 AM, David Vrabel wrote:
 On 10/03/15 13:35, Boris Ostrovsky wrote:
 On 03/10/2015 07:40 AM, David Vrabel wrote:
 On 09/03/15 14:10, David Vrabel wrote:
 Memory hotplug doesn't work with PV guests because:
 
 a) The p2m cannot be expanded to cover the new sections.
 Broken by 054954eb051f35e74b75a566a96fe756015352c8 (xen: switch to
 linear virtual mapped sparse p2m list).
 
 This one would be non-trivial to fix.  We'd need a sparse set of
 vm_area's for the p2m or similar.
 
 b) add_memory() builds page tables for the new sections which
 means
the new pages must have valid p2m entries (or a BUG occurs).
 After some more testing this appears to be broken by:
 
 25b884a83d487fd62c3de7ac1ab5549979188482 (x86/xen: set regions above
 the
 end of RAM as 1:1) included 3.16.
 
 This one can be trivially fixed by setting the new sections in the p2m
 to INVALID_P2M_ENTRY before calling add_memory().
 Have you tried 3.17? As I said yesterday, it worked for me (with 4.4
 Xen).
 No.  But there are three bugs that prevent it from working in 3.16+ so
 I'm really not sure how you had a working in a 3.17 PV guest.
 
 This is what I have:
 
 [build@build-mk2 linux-boris]$ ssh root@tst008 cat
 /mnt/lab/bootstrap-x86_64/test_small.xm
 extra=console=hvc0 debug earlyprintk=xen 
 kernel=/mnt/lab/bootstrap-x86_64/vmlinuz
 ramdisk=/mnt/lab/bootstrap-x86_64/initramfs.cpio.gz
 memory=1024
 maxmem = 4096
 vcpus=1
 maxvcpus=3
 name=bootstrap-x86_64
 on_crash=preserve
 vif = [ 'mac=00:0F:4B:00:00:68, bridge=switch' ]
 vnc=1
 vnclisten=0.0.0.0
 disk=['phy:/dev/guests/bootstrap-x86_64,xvda,w']
 [build@build-mk2 linux-boris]$ ssh root@tst008 xl create
 /mnt/lab/bootstrap-x86_64/test_small.xm
 Parsing config from /mnt/lab/bootstrap-x86_64/test_small.xm
 [build@build-mk2 linux-boris]$ ssh root@tst008 xl list |grep
 bootstrap-x86_64
 bootstrap-x86_64 2  1024 1 -b   5.4
 [build@build-mk2 linux-boris]$ ssh root@g-pvops uname -r
 3.17.0upstream
 [build@build-mk2 linux-boris]$ ssh root@g-pvops dmesg|grep paravirtualized
 [0.00] Booting paravirtualized kernel on Xen
 [build@build-mk2 linux-boris]$ ssh root@g-pvops grep MemTotal /proc/meminfo
 MemTotal: 968036 kB
 [build@build-mk2 linux-boris]$ ssh root@tst008 xl mem-set
 bootstrap-x86_64 2048
 [build@build-mk2 linux-boris]$ ssh root@tst008 xl list |grep
 bootstrap-x86_64
 bootstrap-x86_64 2  2048 1 -b   5.7
 [build@build-mk2 linux-boris]$ ssh root@g-pvops grep MemTotal /proc/meminfo
 MemTotal:2016612 kB
 [build@build-mk2 linux-boris]$
 
 
 
 Regardless, it definitely doesn't work now because of the linear p2m
 change.  What do you want to do about this?
 
 Since backing out p2m changes is not an option I guess your patch is the
 only short-term alternative.
 
 But this still looks like a regression so perhaps Juergen can take a
 look to see how it can be fixed.
 
 Hmm, the p2m list is allocated for the maximum memory size of the domain
 which is obtained from the hypervisor. In case of Dom0 it is read via
 XENMEM_maximum_reservation, for a domU it is based on the E820 memory
 map read via XENMEM_memory_map.
 
 I just tested it with a 4.0-rc1 domU kernel with 512MB initial memory
 and 4GB of maxmem. The E820 map looked like this:
 
 [0.00] Xen: [mem 0x-0x0009] usable
 [0.00] Xen: [mem 0x000a-0x000f] reserved
 [0.00] Xen: [mem 0x0010-0x] usable
 
 So the complete 4GB were included, like they should. The resulting p2m
 list is allocated in the needed size:
 
 [0.00] p2m virtual area at c900, size is 80
 
 So what is your problem here? Can you post the E820 map and the p2m map
 info for your failing domain, please?
 
 If you use memory hotplug then maxmem is not a limit from guest kernel
 point of view (host still must allow that operation but it is another
 not related issue). The problem is that p2m must be dynamically expendable
 to support it. Earlier implementation supported that thing and memory
 hotplug worked without any issue.

 Okay, now I get it.

 The problem with the earlier p2m implementation was that it was
 expendable to support only up to 512GB of RAM. So we need some way to
 tell the kernel how much virtual memory it should reserve for the p2m
 list if memory hotplug is enabled. We could:

 a) use a configurable maximum (e.g. for 512GB RAM as today)

 b) use the maximum of RAM the machine the domain is started on can ever
have (what about migration then?)

Memory hotplug for Xen is planned, so, this would not work.

 c) use a kernel parameter specifying the maximum memory size to support

 d) a

Re: [Xen-devel] PVH DomU panics on boot on Xen 4.5.0 whereas it was fine on 4.4.1

2015-03-17 Thread Konrad Rzeszutek Wilk

On Mon, Mar 16, 2015 at 11:08:50PM +, Ian Murray wrote:
 On 16/03/15 14:12, Konrad Rzeszutek Wilk wrote:
  On Sun, Mar 15, 2015 at 09:34:16PM +, Ian Murray wrote:
  Hi,
 
  I have a domU guest that booted fine under Xen 4.4.1 with pvh=1 but now 
  fails to boot with it under Xen 4.5.0. Removing pvh=1, i.e. booting it as 
  traditional PV results in it booting fine.
 
  The only odd thing is that I had to compile with debug=y on Config.mk to 
  avoid a compiler warning that was causing compilation to fail outright. I 
  will create another mail about that. DomU is Ubuntu 14.10 and Dom0 is 
  12.04.5
 
  there were some incompatible changes in xen 4.5 in regards to
  PVH which were then updated in Linux 3.19 (or was it 3.18?)
 
  I would recommend you rev up to the latest version of Linux.
 
 I tried the mainline support kernel for Ubuntu (GNU/Linux
 3.19.1-031901-generic x86_64) and it booted fine.
 
 Thanks for the assistance and happy to assist if anyone wants to treat
 it as a regression.

Nah, it is labelled 'expermintal' for that exact reason - as we did
realized we made a mistake in Xen 4.4 that we ended up fixing
in Xen 4.5 - and the fixed it in Linux.

Sorry thought that it was not widely mentioned and it made your
day a bit sad. Was there an specific webpage you looked first for
help? (Asking so I can at least edit it to mention this).
 
 
 
 
  Here is the boot output when booting with pvh=1
 
  [0.00] Initializing cgroup subsys cpuset
  [0.00] Initializing cgroup subsys cpu
  [0.00] Initializing cgroup subsys cpuacct
  [0.00] Linux version 3.16.0-31-generic (buildd@batsu) (gcc version 
  4.9.1 (Ubuntu 4.9.1-16ubuntu6) ) #41-Ubuntu SMP Tue Feb 10 15:24:04 UTC 
  2015 (Ubuntu 3.16.0-31.41-generic 3.16.7-ckt5)
  [0.00] Command line: 
  root=UUID=edfcef2a-dcf1-4c77-ad69-22456606702e ro  nomodeset 
  xen-fbfront.video=16,1024,768
  [0.00] KERNEL supported cpus:
  [0.00]   Intel GenuineIntel
  [0.00]   AMD AuthenticAMD
  [0.00]   Centaur CentaurHauls
  [0.00] ACPI in unprivileged domain disabled
  [0.00] e820: BIOS-provided physical RAM map:
  [0.00] Xen: [mem 0x-0x3fff] usable
  [0.00] NX (Execute Disable) protection: active
  [0.00] DMI not present or invalid.
  [0.00] AGP: No AGP bridge found
  [0.00] e820: last_pfn = 0x4 max_arch_pfn = 0x4
  [0.00] Scanning 1 areas for low memory corruption
  [0.00] init_memory_mapping: [mem 0x-0x000f]
  [0.00] init_memory_mapping: [mem 0x3fe0-0x3fff]
  [0.00] init_memory_mapping: [mem 0x3c00-0x3fdf]
  [0.00] init_memory_mapping: [mem 0x0010-0x3bff]
  [0.00] RAMDISK: [mem 0x023f6000-0x0589]
  [0.00] NUMA turned off
  [0.00] Faking a node at [mem 0x-0x3fff]
  [0.00] Initmem setup node 0 [mem 0x-0x3fff]
  [0.00]   NODE_DATA [mem 0x3fffb000-0x3fff]
  [0.00] Zone ranges:
  [0.00]   DMA  [mem 0x1000-0x00ff]
  [0.00]   DMA32[mem 0x0100-0x]
  [0.00]   Normal   empty
  [0.00] Movable zone start for each node
  [0.00] Early memory node ranges
  [0.00]   node   0: [mem 0x1000-0x0009]
  [0.00]   node   0: [mem 0x0010-0x3fff]
  [0.00] SFI: Simple Firmware Interface v0.81 
  http://simplefirmware.org
  [0.00] smpboot: Allowing 2 CPUs, 0 hotplug CPUs
  [0.00] PM: Registered nosave memory: [mem 0x000a-0x000f]
  [0.00] e820: [mem 0x4000-0x] available for PCI devices
  [0.00] Booting paravirtualized kernel with PVH extensions on Xen
  [0.00] Xen version: 4.5.0
  [0.00] setup_percpu: NR_CPUS:256 nr_cpumask_bits:256 nr_cpu_ids:2 
  nr_node_ids:1
  [0.00] PERCPU: Embedded 28 pages/cpu @88003fc0 s83328 
  r8192 d23168 u1048576
  [0.00] Built 1 zonelists in Node order, mobility grouping on.  
  Total pages: 257930
  [0.00] Policy zone: DMA32
  [0.00] Kernel command line: 
  root=UUID=edfcef2a-dcf1-4c77-ad69-22456606702e ro  nomodeset 
  xen-fbfront.video=16,1024,768
  [0.00] PID hash table entries: 4096 (order: 3, 32768 bytes)
  [0.00] xsave: enabled xstate_bv 0x3, cntxt size 0x240
  [0.00] AGP: Checking aperture...
  [0.00] AGP: No AGP bridge found
  [0.00] Memory: 958828K/1048188K available (7743K kernel code, 
  1197K rwdata, 3612K rodata, 1360K init, 1308K bss, 89360K reserved)
  [0.00] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=2, Nodes=1
  [0.00] Hierarchical RCU implementation.
  [0.00] RCU dyntick-idle grace-period acceleration is enabled.
  [0.00] RCU restricting CPUs from NR_CPUS=256 to nr_cpu_ids=2.
  [0.00] Offload RCU

Re: [Xen-devel] OpenStack - Libvirt+Xen CI overview

2015-03-17 Thread George Dunlap

On Tue, Mar 10, 2015 at 12:03 PM, Bob Ball bob.b...@citrix.com wrote:
 For the last few weeks Anthony and I have been working on creating a CI 
 environment to run against all OpenStack jobs.  We're now in a position where 
 we can share the current status, overview of how it works and next steps.  We 
 actively want to support involvement in this effort from others with an 
 interest in libvirt+Xen's openstack integration.

 The CI we have set up is follow the recommendations made by the OpenStack 
 official infrastructure maintainers, and reproduces a notable portion of the 
 official OpenStack CI environment to run these tests.  Namely this setup is 
 using:
 - Puppet to deploy the master node
 - Zuul to watch for code changes uploaded to review.openstack.org
 - Jenkins job builder to create Jenkins job definitions from a YAML file
 - Nodepool to automatically create single-use virtual machines in the 
 Rackspace public cloud
 - Devstack-gate to run Tempest tests in serial

 More information on Zuul, JJB, Nodepool and devstack-gate is available 
 through http://ci.openstack.org

 The current status is that we have a zuul instance monitoring for jobs and 
 adding them to the queue of jobs to be run at 
 http://zuul.openstack.xenproject.org/

 In the background Nodepool provisions virtual machines into a pool of nodes 
 ready to be used.  All ready nodes are automatically added to Jenkins 
 (https://jenkins.openstack.xenproject.org/), and then Zuul+Jenkins will 
 trigger a particular job on a node when one is available.

 Logs are then uploaded to Rackspace's Cloud Files with sample logs for a 
 passing job at 
 http://logs.openstack.xenproject.org/52/162352/3/silent/dsvm-tempest-xen/da3ff30/index.html

 I'd like to organise a meeting to walk through the various components of the 
 CI with those who are interested, so this is an initial call to find out who 
 is interested in finding out more!

I'd be interested.

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 3/7] xen: rework locking for dump of scheduler info (debug-key r)

2015-03-17 Thread Dario Faggioli

On Tue, 2015-03-17 at 11:05 +, George Dunlap wrote:
 On 03/17/2015 10:54 AM, Jan Beulich wrote:

  Finally, as said in different contexts earlier, I think unconditionally
  acquiring locks in dumping routines isn't the best practice. At least
  in non-debug builds I think these should be try-locks only, skipping
  the dumping when a lock is busy.
 
 You mean so that we don't block the console if there turns out to be a
 deadlock?
 
 That makes some sense; but on a busy system that would mean a
 non-negligible chance that any give keystroke would be missing
 information about some cpu or other, which would be pretty frustrating
 for someone trying to figure out the state of their system.
 
Mmm... that's a very good point, actually, which I did not think about.

 Would it make sense to have a version of spin_trylock for use in this
 kind of situation that waits  retries a reasonable number of times
 before giving up?
 
Probably. After all, if one comes to the point of sending a debug-key,
he most likely want to see the system status at (almost) any cost...

Regards,
Dario


signature.asc
Description: This is a digitally signed message part
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v2 2/4] xen: sched: make counters for vCPU sleep and wakeup generic

2015-03-17 Thread George Dunlap

On 03/17/2015 10:48 AM, Dario Faggioli wrote:
 On Mon, 2015-03-16 at 17:18 +, George Dunlap wrote:
 On 02/27/2015 04:51 PM, Dario Faggioli wrote:
 
 
 diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
 index ad0a5d4..2b852cc 100644
 --- a/xen/common/sched_credit2.c
 +++ b/xen/common/sched_credit2.c
 @@ -931,6 +931,7 @@ csched2_vcpu_sleep(const struct scheduler *ops, struct 
 vcpu *vc)
  struct csched2_vcpu * const svc = CSCHED2_VCPU(vc);
  
  BUG_ON( is_idle_vcpu(vc) );
 +SCHED_STAT_CRANK(vcpu_sleep);
  
  if ( per_cpu(schedule_data, vc-processor).curr == vc )
  cpu_raise_softirq(vc-processor, SCHEDULE_SOFTIRQ);
 @@ -956,19 +957,22 @@ csched2_vcpu_wake(const struct scheduler *ops, struct 
 vcpu *vc)
  
  BUG_ON( is_idle_vcpu(vc) );
  
 -/* Make sure svc priority mod happens before runq check */
  if ( unlikely(per_cpu(schedule_data, vc-processor).curr == vc) )
  {
 +SCHED_STAT_CRANK(vcpu_wake_running);
  goto out;
  }
 -
  if ( unlikely(__vcpu_on_runq(svc)) )

 Does this make the 'if' butt right up against the '{'?  Is that bad?

 Other than that:

 Acked-by: George Dunlap george.dun...@eu.citrix.com

 As far as I can see, the series is in:
 
 http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=8b0e94da0e23221a6d7ea19bfbd24a407db44de8
 
 and whoever committed it (Jan, probably?), took care of leaving that
 blank line in place, which I'm fine with, of course. :-)

Sweet. :-)

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 3/7] xen: rework locking for dump of scheduler info (debug-key r)

2015-03-17 Thread Jan Beulich

 On 17.03.15 at 12:14, dario.faggi...@citrix.com wrote:
 On Tue, 2015-03-17 at 10:54 +, Jan Beulich wrote:
 Finally, as said in different contexts earlier, I think unconditionally
 acquiring locks in dumping routines isn't the best practice. At least
 in non-debug builds I think these should be try-locks only, skipping
 the dumping when a lock is busy.
 
 That makes sense. It also looks, if not completely orthogonal, something
 that can be done in a follow-up patch wrt this series. I.e., we first
 move the locking logic where it belongs, and then we rework it toward
 using _trylock(), does this make sense?

Sure, the order doesn't matter all that much. And the keyhandler
issue is certainly affecting code elsewhere too, so getting this
improved would presumably not be part of a scheduler specific
patch series.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 3/7] xen: rework locking for dump of scheduler info (debug-key r)

2015-03-17 Thread George Dunlap

On 03/17/2015 11:25 AM, Jan Beulich wrote:
 On 17.03.15 at 12:05, george.dun...@eu.citrix.com wrote:
 On 03/17/2015 10:54 AM, Jan Beulich wrote:
 Finally, as said in different contexts earlier, I think unconditionally
 acquiring locks in dumping routines isn't the best practice. At least
 in non-debug builds I think these should be try-locks only, skipping
 the dumping when a lock is busy.

 You mean so that we don't block the console if there turns out to be a
 deadlock?
 
 For example. And also to not unduly get in the way of an otherwise
 extremely busy system.

I don't understand this last argument.  If you're using the debug keys,
you want to know about the state of the system.  I would much rather my
system ran 25% slower for the 5 seconds the debug key was dumping
information, and have a complete snapshot of the system, than for it to
only run 10% slower and to have half the information missing.  The
upshot of missing information would likely be that I have to press the
debug key 3-4 times in a row, meaning I'd be running 10% slower for 20
seconds rather than 25% slower for 5 seconds.

And in any case, the effect of being able to *successfully* grab the
private lock is going to have a much larger impact on the system.

All in all, I don't think the performance of the debug keys should be a
major concern.  The only thing I'd be worried about is making the system
as diagnosable as possible if things have already gone pear-shaped
(e.g., if there's a deadlock).

 Yes, that might be a possible compromise. I could also imagine
 another debug key allowing to alter the behavior, i.e. for when
 one absolutely wants the information and doesn't care about
 the state of the system.

Possible, but it seems like a lot of complication for what it buys you.

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] RFC: [PATCH 1/3] Enhance platform support for PCI

2015-03-17 Thread Manish Jaggi



On Tuesday 17 March 2015 12:58 PM, Jan Beulich wrote:

On 17.03.15 at 06:26, mja...@caviumnetworks.com wrote:

In drivers/xen/pci.c on notification BUS_NOTIFY_ADD_DEVICE dom0 issues a
hypercall to inform xen that a new pci device has been added.
If we were to inform xen about a new pci bus that is added there are  2 ways
a) Issue the hypercall from drivers/pci/probe.c
b) When a new device is found (BUS_NOTIFY_ADD_DEVICE) issue
PHYSDEVOP_pci_device_add hypercall to xen, if xen does not finds that
segment number (s_bdf), it will return an error
SEG_NO_NOT_FOUND. After that the linux xen code could issue the
PHYSDEVOP_pci_host_bridge_add hypercall.

I think (b) can be done with minimal code changes. What do you think ?

I'm pretty sure (a) would even be refused by the maintainers, unless
there already is a notification being sent. As to (b) - kernel code could
keep track of which segment/bus pairs it informed Xen about, and
hence wouldn't even need to wait for an error to be returned from
the device-add request (which in your proposal would need to be re-
issued after the host-bridge-add).

Have a query on the CFG space address to be passed as hypercall parameter.
The of_pci_get_host_bridge_resource only parses the ranges property and 
not reg.
reg property has the CFG space address, which is usually stored in 
private pci host controller driver structures.


so pci_dev 's parent pci_bus would not have that info.
One way is to add a method in struct pci_ops but not sure it will be 
accepted or not.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 3/7] xen: rework locking for dump of scheduler info (debug-key r)

2015-03-17 Thread George Dunlap

On 03/17/2015 11:43 AM, Jan Beulich wrote:
 On 17.03.15 at 12:32, george.dun...@eu.citrix.com wrote:
 On 03/17/2015 11:25 AM, Jan Beulich wrote:
 On 17.03.15 at 12:05, george.dun...@eu.citrix.com wrote:
 On 03/17/2015 10:54 AM, Jan Beulich wrote:
 Finally, as said in different contexts earlier, I think unconditionally
 acquiring locks in dumping routines isn't the best practice. At least
 in non-debug builds I think these should be try-locks only, skipping
 the dumping when a lock is busy.

 You mean so that we don't block the console if there turns out to be a
 deadlock?

 For example. And also to not unduly get in the way of an otherwise
 extremely busy system.

 I don't understand this last argument.  If you're using the debug keys,
 you want to know about the state of the system.  I would much rather my
 system ran 25% slower for the 5 seconds the debug key was dumping
 information, and have a complete snapshot of the system, than for it to
 only run 10% slower and to have half the information missing.  The
 upshot of missing information would likely be that I have to press the
 debug key 3-4 times in a row, meaning I'd be running 10% slower for 20
 seconds rather than 25% slower for 5 seconds.
 
 Yes, I understand this, and in many cases this is the perspective to
 take. Yet I've been in the situation where suggesting the use of
 debug keys to learn something about a (partially) live locked system
 would have had the risk of causing further corruption to it, and
 hence a more careful state dumping approach would have been
 desirable.
 
 All in all, I don't think the performance of the debug keys should be a
 major concern.  The only thing I'd be worried about is making the system
 as diagnosable as possible if things have already gone pear-shaped
 (e.g., if there's a deadlock).
 
 It's not their performance that's of concern, but the effect they
 may have on the performance (or even correctness - see how
 many process_pending_softirqs() calls we had to sprinkle around
 over the years) of other code.

So it sounds like maybe we're actually on the same page, but are using
words slightly differently.  :-)  It sounds like we agree that the
ability to tread carefully on a system which may be having trouble, in
order not to make it worse, is important.  For instance, not wedging the
serial console behind a deadlocked lock, and not further corrupting a
system that had gotten itself wedged in livelock.  Those are things I
would classify under correctness and/or diagnosis.

When I say performance is not a concern, I mean it does not concern
me that someone's web page loads 25% slower for the five seconds it
takes to dump the information.  If delaying other parts of the system
causes the system to get wedged or crash, that's obviously a problem.

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] RFC: [PATCH 1/3] Enhance platform support for PCI

2015-03-17 Thread Jan Beulich

 On 17.03.15 at 13:06, mja...@caviumnetworks.com wrote:

 On Tuesday 17 March 2015 12:58 PM, Jan Beulich wrote:
 On 17.03.15 at 06:26, mja...@caviumnetworks.com wrote:
 In drivers/xen/pci.c on notification BUS_NOTIFY_ADD_DEVICE dom0 issues a
 hypercall to inform xen that a new pci device has been added.
 If we were to inform xen about a new pci bus that is added there are  2 ways
 a) Issue the hypercall from drivers/pci/probe.c
 b) When a new device is found (BUS_NOTIFY_ADD_DEVICE) issue
 PHYSDEVOP_pci_device_add hypercall to xen, if xen does not finds that
 segment number (s_bdf), it will return an error
 SEG_NO_NOT_FOUND. After that the linux xen code could issue the
 PHYSDEVOP_pci_host_bridge_add hypercall.

 I think (b) can be done with minimal code changes. What do you think ?
 I'm pretty sure (a) would even be refused by the maintainers, unless
 there already is a notification being sent. As to (b) - kernel code could
 keep track of which segment/bus pairs it informed Xen about, and
 hence wouldn't even need to wait for an error to be returned from
 the device-add request (which in your proposal would need to be re-
 issued after the host-bridge-add).
 Have a query on the CFG space address to be passed as hypercall parameter.
 The of_pci_get_host_bridge_resource only parses the ranges property and 
 not reg.
 reg property has the CFG space address, which is usually stored in 
 private pci host controller driver structures.
 
 so pci_dev 's parent pci_bus would not have that info.
 One way is to add a method in struct pci_ops but not sure it will be 
 accepted or not.

I'm afraid I don't understand what you're trying to tell me.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 3/7] xen: rework locking for dump of scheduler info (debug-key r)

2015-03-17 Thread Jan Beulich

 On 17.03.15 at 12:05, george.dun...@eu.citrix.com wrote:
 On 03/17/2015 10:54 AM, Jan Beulich wrote:
 Finally, as said in different contexts earlier, I think unconditionally
 acquiring locks in dumping routines isn't the best practice. At least
 in non-debug builds I think these should be try-locks only, skipping
 the dumping when a lock is busy.
 
 You mean so that we don't block the console if there turns out to be a
 deadlock?

For example. And also to not unduly get in the way of an otherwise
extremely busy system.

 That makes some sense; but on a busy system that would mean a
 non-negligible chance that any give keystroke would be missing
 information about some cpu or other, which would be pretty frustrating
 for someone trying to figure out the state of their system.

Right - ideally there would be an indication of the skipped state
left in the log.

 Would it make sense to have a version of spin_trylock for use in this
 kind of situation that waits  retries a reasonable number of times
 before giving up?

Yes, that might be a possible compromise. I could also imagine
another debug key allowing to alter the behavior, i.e. for when
one absolutely wants the information and doesn't care about
the state of the system.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v7] sndif: add ABI for Para-virtual sound

2015-03-17 Thread Ian Campbell

On Thu, 2015-03-12 at 18:14 +, Lars Kurth wrote:
 Hi,I nearly missed this. Please make sure you forward stuff and change
 the headline if you want me to look into things. Otherwise I may miss
 it.

Sure, I'll try and remember.

FYI before Ian J went away he mentioned that he had raised some
questions/issues (either on this or a previous version) which had not
yet been answered (or maybe not answered to his satisfaction, I'm not
sure) but that if those were addressed he would take a look with a view
to acking the interface for inclusion in xen.git.

(I've not looked in the threads for it, so I don't know the exact
state).

 From my perspective, this exactly the kind of scenario why we created
 the embedded / automotive subproject, with an option to store code in
 repos owned by the project. 
 
 Given that the primary use-case of these drivers is embedded /
 automotive, my suggestion would be to
 1.a) Use a repo in the embedded / automotive pv driver subproject to
 host the spec - but use a file system structure that matches the xen
 tree
 1.b) I would assume there would be one back-end and several front-ends
 for these drivers and some would eventually appear in trees owned by
 the embedded / automotive pv driver subproject
 
 In this case, the maintainer responsibility would fall to members of
 the embedded / automotive pv driver subproject. Once there are several
 implementations, and enough people with skills to review we can
 re-visit where the spec and drivers live. 
 
 We can have a discussion about criteria of when to move, but I don't
 think that makes a lot of sense. I think the concerns that need to be
 addressed are:
 2.a) Enough skills to review the code / protocols from different
 stake-holders - this should happen with time, once the spec and code
 are there. And of course once the embedded / automotive pv driver
 subproject graduates, that will also give extra weight to its
 maintainers in the wider community
 2.b) Of course if there was a strong case that PV sound drivers are
 extremely useful for core data centre use-cases, I would probably
 suggest another approach
 
 Maybe 2.b) needs to be checked with Intel folks - there may be some
 sound requirement for XenGT
 
 Would this work as a way forward?

I think the main things which is missing is some decision as to the the
point at which we would consider the ABI for a PV protocol fixed, i.e.
to be maintained in a backwards compatible manner from then on. 

That's of particular importance when one end of the pair is implemented
in external projects (e.g. OS driver frontends). If the interface is not
declared stable then changes would be allowed which would invalidate
those drivers.

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 18/18] x86: add multiboot2 protocol support for EFI platforms

2015-03-17 Thread Daniel Kiper

On Tue, Mar 17, 2015 at 10:32:01AM +, Jan Beulich wrote:
  On 30.01.15 at 18:54, daniel.ki...@oracle.com wrote:
  @@ -94,6 +111,17 @@ ENTRY(start)
   gdt_boot_descr:
   .word   6*8-1
   .long   sym_phys(trampoline_gdt)
  +.long   0 /* Needed for 64-bit lgdt */
  +
  +cs32_switch_addr:
  +.long   sym_phys(cs32_switch)
  +.long   BOOT_CS32
  +
  +efi_st:
  +.quad   0
  +
  +efi_ih:
  +.quad   0
 
   .Lbad_cpu_msg: .asciz ERR: Not a 64-bit CPU!
   .Lbad_ldr_msg: .asciz ERR: Not a Multiboot bootloader!
  @@ -124,6 +152,133 @@ print_err:
   .Lhalt: hlt
   jmp .Lhalt
 
  +.code64
  +
  +__efi64_start:
  +cld
  +
  +/* Bootloaders may set multiboot[12].mem_lower to a nonzero value 
  */
  +xor %edx,%edx
  +
  +/* Check for Multiboot2 bootloader */
  +cmp $MULTIBOOT2_BOOTLOADER_MAGIC,%eax
  +je  efi_multiboot2_proto
  +
  +jmp not_multiboot
  +
  +efi_multiboot2_proto:

 jne not_multiboot (and efi_multiboot2_proto dropped altogether)

[...]

Jan, thanks for your comments. Now I am working on relocatable early Xen code.
I hope that I will finish that this week and start tests on this crazy
UEFI platforms. Then I get back to this patch series and replay for your
and other guys comments.

Daniel

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] OpenStack - Libvirt+Xen CI overview

2015-03-17 Thread Dario Faggioli

On Mon, 2015-03-16 at 14:00 -0400, Konrad Rzeszutek Wilk wrote:
 On Tue, Mar 10, 2015 at 12:03:13PM +, Bob Ball wrote:

  I'd like to organise a meeting to walk through the various components of 
  the CI with those who are interested, so this is an initial call to find 
  out who is interested in finding out more!
  
 
 I am always interested in it.

I know next to nothing about OpenStack, and would be interested in
attending something like this to improve that. :-)

Regards,
Dario


signature.asc
Description: This is a digitally signed message part
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v7] sndif: add ABI for Para-virtual sound

2015-03-17 Thread Stefano Stabellini

On Tue, 17 Mar 2015, Ian Campbell wrote:
 On Thu, 2015-03-12 at 18:14 +, Lars Kurth wrote:
  Hi,I nearly missed this. Please make sure you forward stuff and change
  the headline if you want me to look into things. Otherwise I may miss
  it.
 
 Sure, I'll try and remember.
 
 FYI before Ian J went away he mentioned that he had raised some
 questions/issues (either on this or a previous version) which had not
 yet been answered (or maybe not answered to his satisfaction, I'm not
 sure) but that if those were addressed he would take a look with a view
 to acking the interface for inclusion in xen.git.
 
 (I've not looked in the threads for it, so I don't know the exact
 state).
 
  From my perspective, this exactly the kind of scenario why we created
  the embedded / automotive subproject, with an option to store code in
  repos owned by the project. 
  
  Given that the primary use-case of these drivers is embedded /
  automotive, my suggestion would be to
  1.a) Use a repo in the embedded / automotive pv driver subproject to
  host the spec - but use a file system structure that matches the xen
  tree
  1.b) I would assume there would be one back-end and several front-ends
  for these drivers and some would eventually appear in trees owned by
  the embedded / automotive pv driver subproject
  
  In this case, the maintainer responsibility would fall to members of
  the embedded / automotive pv driver subproject. Once there are several
  implementations, and enough people with skills to review we can
  re-visit where the spec and drivers live. 
  
  We can have a discussion about criteria of when to move, but I don't
  think that makes a lot of sense. I think the concerns that need to be
  addressed are:
  2.a) Enough skills to review the code / protocols from different
  stake-holders - this should happen with time, once the spec and code
  are there. And of course once the embedded / automotive pv driver
  subproject graduates, that will also give extra weight to its
  maintainers in the wider community
  2.b) Of course if there was a strong case that PV sound drivers are
  extremely useful for core data centre use-cases, I would probably
  suggest another approach
  
  Maybe 2.b) needs to be checked with Intel folks - there may be some
  sound requirement for XenGT
  
  Would this work as a way forward?
 
 I think the main things which is missing is some decision as to the the
 point at which we would consider the ABI for a PV protocol fixed, i.e.
 to be maintained in a backwards compatible manner from then on. 
 
 That's of particular importance when one end of the pair is implemented
 in external projects (e.g. OS driver frontends). If the interface is not
 declared stable then changes would be allowed which would invalidate
 those drivers.

I think that you are right. Declaring the interface stable or unstable
is far more important than where the code or the spec lives.

If we formally specified within the spec that the ABI is not maintained
for backward compatibility, the bar for acceptance in xen-unstable would
be far lower. Maybe the spec could even be accepted as is if nobody has
any comments?

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] RFC: [PATCH 1/3] Enhance platform support for PCI

2015-03-17 Thread Konrad Rzeszutek Wilk

On Tue, Mar 17, 2015 at 10:56:48AM +0530, Manish Jaggi wrote:
 
 On Friday 27 February 2015 10:20 PM, Ian Campbell wrote:
 On Fri, 2015-02-27 at 16:35 +, Jan Beulich wrote:
 On 27.02.15 at 16:24, ian.campb...@citrix.com wrote:
 On Fri, 2015-02-27 at 14:54 +, Stefano Stabellini wrote:
 MMCFG is a Linux config option, not to be confused with
 PHYSDEVOP_pci_mmcfg_reserved that is a Xen hypercall interface.  I don't
 think that the way Linux (or FreeBSD) call PHYSDEVOP_pci_mmcfg_reserved
 is relevant.
 My (possibly flawed) understanding was that pci_mmcfg_reserved was
 intended to propagate the result of dom0 parsing some firmware table or
 other to the hypevisor.
 That's not flawed at all.
 I think that's a first in this thread ;-)
 
 In Linux dom0 we call it walking pci_mmcfg_list, which looking at
 arch/x86/pci/mmconfig-shared.c pci_parse_mcfg is populated by walking
 over a struct acpi_table_mcfg (there also appears to be a bunch of
 processor family derived entries, which I guess are quirks of some
 sort).
 Right - this parses ACPI tables (plus applies some knowledge about
 certain specific systems/chipsets/CPUs) and verifies that the space
 needed for the MMCFG region is properly reserved either in E820 or
 in the ACPI specified resources (only if so Linux decides to use
 MMCFG and consequently also tells Xen that it may use it).
 Thanks.
 
 So I think what I wrote in 1424948710.14641.25.ca...@citrix.com
 applies as is to Device Tree based ARM devices, including the need for
 the PHYSDEVOP_pci_host_bridge_add call.
 
 On ACPI based devices we will have the MCFG table, and things follow
 much as for x86:
 
* Xen should parse MCFG to discover the PCI host-bridges
* Dom0 should do likewise and call PHYSDEVOP_pci_mmcfg_reserved in
  the same way as Xen/x86 does.
 
 The SBSA, an ARM standard for servers, mandates various things which
 we can rely on here because ACPI on ARM requires an SBSA compliant
 system. So things like odd quirks in PCI controllers or magic setup are
 spec'd out of our zone of caring (into the firmware I suppose), hence
 there is nothing like the DT_DEVICE_START stuff to register specific
 drivers etc.
 
 The PHYSDEVOP_pci_host_bridge_add call is not AFAICT needed on ACPI ARM
 systems (any more than it is on x86). We can decide whether to omit it
 from dom0 or ignore it from Xen later on.
 
 (Manish, this is FYI, I don't expect you to implement ACPI support!)
 
 In drivers/xen/pci.c on notification BUS_NOTIFY_ADD_DEVICE dom0 issues a
 hypercall to inform xen that a new pci device has been added.
 If we were to inform xen about a new pci bus that is added there are  2 ways
 a) Issue the hypercall from drivers/pci/probe.c
 b) When a new device is found (BUS_NOTIFY_ADD_DEVICE) issue
 PHYSDEVOP_pci_device_add hypercall to xen, if xen does not finds that
 segment number (s_bdf), it will return an error
 SEG_NO_NOT_FOUND. After that the linux xen code could issue the
 PHYSDEVOP_pci_host_bridge_add hypercall.

Couldn't the code figure out from 'struct pci_dev' whether the device
is a bridge or an PCI device? And then do the proper hypercall?

Interesting thing you _might_ hit (that I did) was that if you use
'bus=reassign' which re-assigns the bus numbers during scan - Xen
gets very very confused. As in, the bus devices that Xen sees vs the
ones Linux sees are different.

Whether you will encounter this depends on whether the bridge
devices and pci devices end up having an differnet bus number
from what Xen scanned, and from what Linux has determined.

(As in, Linux has found a bridge device with more PCI devices -so
it repograms the bridge which moves all of the other PCI devices 
below it by X number).

The reason I am bringing it up - it sounds like Xen will have no clue
about some devices - and be told about it by Linux  - if some reason
it has the same bus number as some that Xen already scanned - gah!
 
 I think (b) can be done with minimal code changes. What do you think ?

Less code == better.
 
 Ian.
 
 
 ___
 Xen-devel mailing list
 Xen-devel@lists.xen.org
 http://lists.xen.org/xen-devel
 
 
 ___
 Xen-devel mailing list
 Xen-devel@lists.xen.org
 http://lists.xen.org/xen-devel

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v7] sndif: add ABI for Para-virtual sound

2015-03-17 Thread Ian Campbell

On Tue, 2015-03-17 at 13:05 +, Lars Kurth wrote:
  On 17 Mar 2015, at 11:40, Ian Campbell ian.campb...@citrix.com wrote:
  
  On Thu, 2015-03-12 at 18:14 +, Lars Kurth wrote:
  Hi,I nearly missed this. Please make sure you forward stuff and change
  the headline if you want me to look into things. Otherwise I may miss
  it.
  
  Sure, I'll try and remember.
  
  FYI before Ian J went away he mentioned that he had raised some
  questions/issues (either on this or a previous version) which had not
  yet been answered (or maybe not answered to his satisfaction, I'm not
  sure) but that if those were addressed he would take a look with a view
  to acking the interface for inclusion in xen.git.
 
 OK. So this means there are some concrete lose ends, which need to be 
 followed up on. I also remember that there was a discussion on how we should 
 specify protocols, which does not appear to have fully concluded either. 
 
  
  Would this work as a way forward?
  
  I think the main things which is missing is some decision as to the the
  point at which we would consider the ABI for a PV protocol fixed, i.e.
  to be maintained in a backwards compatible manner from then on. 
 
 What do we do with new APIs in such situations?

We review then carefully and hope we get them right. We manage to get
this right at least some of the time because many of us are familiar
with the issues WRT e.g. memory management hypercalls.

This is what I was getting at with people are naturally a bit cautious
about creating new ABIs, which must be maintained long term, for types
of device with which they are not really familiar. in my initial mail.
The which they are not really familiar is pretty key.

It's also (normally) not too hard to add a new hypercall fixing a
shortcoming in an existing one while retaining backwards compat,
compared with doing that for an I/O protocol (see: netchannel2).

In the I/O case adding extensions also is reasonably well understood and
something we manage, but fixing a core issue is much harder (see: the
non-uniformity of the blk protocol over different architectures, or the
ring space wastage due to various power of two requirements, neither of
which can realistically be properly fixed).

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH RFC V2 3/5] libxl: add pvusb API

2015-03-17 Thread Juergen Gross


Hi Chunyan,

I've found another problem while trying to write a qemu based pvUSB
backend.

On 01/19/2015 09:28 AM, Chunyan Liu wrote:

Add pvusb APIs, including:
  - attach/detach (create/destroy) virtual usb controller.
  - attach/detach usb device
  - list assignable usb devices in host
  - some other helper functions

Signed-off-by: Chunyan Liu cy...@suse.com
Signed-off-by: Simon Cao caobosi...@gmail.com
---


...


diff --git a/tools/libxl/libxl_usb.c b/tools/libxl/libxl_usb.c
new file mode 100644
index 000..830a846
--- /dev/null
+++ b/tools/libxl/libxl_usb.c


...


+/* xenstore usb data */
+static int libxl__device_usb_add_xenstore(libxl__gc *gc, uint32_t domid,
+  libxl_device_usb *usb)
+{
+libxl_ctx *ctx = CTX;
+char *be_path;
+int rc;
+libxl_domain_config d_config;
+libxl_device_usb usb_saved;
+libxl__domain_userdata_lock *lock = NULL;
+
+libxl_domain_config_init(d_config);
+libxl_device_usb_init(usb_saved);
+libxl_device_usb_copy(CTX, usb_saved, usb);
+
+be_path = libxl__sprintf(gc, %s/backend/vusb/%d/%d,
+libxl__xs_get_dompath(gc, 0), domid, usb-ctrl);
+if (libxl__wait_for_backend(gc, be_path, 4)  0) {


Don't do this! That's the reason I had to change my backend driver in
order to support assignment of a usb device via config file. Normally
the backend will witch to state 4 only after the frontend is started.

You can just remove waiting for the backend here. The backend has to
check all ports when it is changing is state to 4 (connected).


+rc = ERROR_FAIL;
+goto out;
+}
+
+lock = libxl__lock_domain_userdata(gc, domid);
+if (!lock) {
+rc = ERROR_LOCK_FAIL;
+goto out;
+}
+
+rc = libxl__get_domain_configuration(gc, domid, d_config);
+if (rc) goto out;
+
+DEVICE_ADD(usb, usbs, domid, usb_saved, COMPARE_USB, d_config);
+
+rc = libxl__set_domain_configuration(gc, domid, d_config);
+if (rc) goto out;
+
+be_path = libxl__sprintf(gc, %s/port/%d, be_path, usb-port);
+LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, Adding new usb device to xenstore);
+if (libxl__xs_write_checked(gc, XBT_NULL, be_path, usb-intf)) {
+rc = ERROR_FAIL;
+goto out;
+}
+
+rc = 0;
+
+out:
+if (lock) libxl__unlock_domain_userdata(lock);
+libxl_device_usb_dispose(usb_saved);
+libxl_domain_config_dispose(d_config);
+return rc;
+
+}
+
+static int libxl__device_usb_remove_xenstore(libxl__gc *gc, uint32_t domid,
+ libxl_device_usb *usb)
+{
+libxl_ctx *ctx = CTX;
+char *be_path;
+
+be_path = libxl__sprintf(gc, %s/backend/vusb/%d/%d,
+libxl__xs_get_dompath(gc, 0), domid, usb-ctrl);
+if (libxl__wait_for_backend(gc, be_path, 4)  0)


Remove this one, too.

Juergen

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH v3 21/24] tools/(lib)xl: Add partial device tree support for ARM

2015-03-17 Thread Julien Grall

Hi Ian,

Sorry for the late answer.

On 23/02/15 17:22, Ian Campbell wrote:
 On Mon, 2015-02-23 at 17:06 +, Julien Grall wrote:
 On 23/02/15 11:46, Ian Campbell wrote:
 On Tue, 2015-01-13 at 14:25 +, Julien Grall wrote:
 Let the user to pass additional nodes to the guest device tree. For this
 purpose, everything in the node /passthrough from the partial device tree 
 will
 be copied into the guest device tree.

 The node /aliases will be also copied to allow the user to define aliases
 which can be used by the guest kernel.

 A simple partial device tree will look like:

 /dts-v1/;

 / {
 #address-cells = 2;
 #size-cells = 2;

 Are these mandatory/required as implied below, or only the ones inside
 the passthrough node (which is what I would expect).

 It's to make DTC quiet.
 
 Maybe add /* Keep DTC happy */ to both lines?
 


 passthrough {
 compatible = simple-bus;
 ranges;
 #address-cells = 2;
 #size-cells = 2;

 /* List of your nodes */
 }
 };

 Note that:
 * The interrupt-parent proporties will be added by the toolstack in

 properties

 the root node
 * The properties compatible, ranges, #address-cells and #size-cells
 in /passthrough are mandatory.

 Does ranges need to be the empty form? I think ranges = stuff stuff
 would be illegal?

 It's not illegal as long as you correctly use it in the inner reg.
 
 OK. This could be explained in some more complete documentaiton I think.
 (It's a doc day on Wednesday ;-))
 

 Also, I admit that the ranges is confusing to read.


 Signed-off-by: Julien Grall julien.gr...@linaro.org
 Cc: Ian Jackson ian.jack...@eu.citrix.com
 Cc: Wei Liu wei.l...@citrix.com

 ---
 Changes in v3:
 - Patch added
 ---
  docs/man/xl.cfg.pod.5   |   7 ++
  tools/libxl/libxl_arm.c | 253 
 
  tools/libxl/libxl_types.idl |   1 +
  tools/libxl/xl_cmdimpl.c|   1 +
  4 files changed, 262 insertions(+)

 diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
 index e2f91fc..225b782 100644
 --- a/docs/man/xl.cfg.pod.5
 +++ b/docs/man/xl.cfg.pod.5
 @@ -398,6 +398,13 @@ not emulated.
  Specify that this domain is a driver domain. This enables certain
  features needed in order to run a driver domain.
  
 +=item Bdevice_tree=PATH
 +
 +Specify a partial device tree (compiled via the Device Tree Compiler).
 +Everything under the node /passthrough will be copied into the guest
 +device tree. For convenience, the node /aliases is also copied to allow
 +the user to defined aliases which can be used by the guest kernel.
 +
  =back
  
  =head2 Devices
 diff --git a/tools/libxl/libxl_arm.c b/tools/libxl/libxl_arm.c
 index 53177eb..619458b 100644
 --- a/tools/libxl/libxl_arm.c
 +++ b/tools/libxl/libxl_arm.c
 @@ -540,6 +540,238 @@ out:
  }
  }
  
 +static bool check_overrun(uint64_t a, uint64_t b, uint32_t max)
 +{
 +return ((a + b)  UINT_MAX || (a + b)  max);

 Both halves here will fail if e.g. a == UINT64_MAX-1 and b == 2, so e..g
 a+b = UINT_MAX and  max.

 Oops right.

 To avoid this you should check that a and b are both less than some
 fraction of UINT64_MAX before the other checks, which would ensure the
 overflow can't happen, perhaps even UINT32_MAX would be acceptable for
 this use, depending on the input types involved.

 max is an uint32_t so a and b should be inferior to UINT32_MAX.
 
 by inferior to do you mean less than? Or something to do with type
 promotion/demotion rules?

I meant less than.


 What about

 a  UINT_MAX  b  UINT_MAX  (a + b)  UINT_MAX
 
 Isn't that inverted from the sense which the function name requires?
 
 Given the complexity in reasoning about this I think a series of
 individual if and return statements which check each precondition one at
 a time and return failure if necessary wuold be clearer to read and
 reason about than trying to encode it all in one expression.

Given that we will mark the option unsafe. I'm thinking to drop this
check and some others. This would make the code less complex and avoid
to check on half of the FDT.

 
 diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
 index 1214d2e..5651110 100644
 --- a/tools/libxl/libxl_types.idl
 +++ b/tools/libxl/libxl_types.idl
 @@ -399,6 +399,7 @@ libxl_domain_build_info = Struct(domain_build_info,[
  (kernel,   string),
  (cmdline,  string),
  (ramdisk,  string),
 +(device_tree,  string),

 Needs a #define LIBXL_HAVE... in libxl.h

 Hmmm why? This will be set to empty when libxl_domain_build_info is
 initialized.
 
 So that applications which use libxl can take advantage of the new
 feature when built against versions of the library which support it,
 without breaking when built against versions which do not.

Hmmm right. I will add the LIBXL_HAVE_PARTIAL_DEVICE_TREE.

Regards,

-- 
Julien Grall

___

Re: [Xen-devel] [PATCH] flask/policy: fix static device labeling examples

2015-03-17 Thread Julien Grall

(CC Ian and Jan)

Hi,

Is there any blocker to push this patch? It's useful for using XSM with
passthrough.

Regards,

On 11/03/15 14:59, Daniel De Graaf wrote:
 The definitions of static device labels must be placed at the end of the
 policy.conf before passing it to checkpolicy; the existing examples
 (which are commented out) are in the wrong location.  Create a new file
 for device contexts which will place them in the proper location.
 
 This also removes some directions about using the xen policy type in
 checkpolicy which is no longer needed.
 
 Reported-by: Julien Grall julien.gr...@linaro.org
 Signed-off-by: Daniel De Graaf dgde...@tycho.nsa.gov
 ---
  docs/misc/xsm-flask.txt  | 31 +++
  tools/flask/policy/Makefile  |  3 ++-
  tools/flask/policy/policy/device_contexts| 32 +++
  tools/flask/policy/policy/modules/xen/xen.te | 38 
 +++-
  4 files changed, 41 insertions(+), 63 deletions(-)
  create mode 100644 tools/flask/policy/policy/device_contexts
 
 diff --git a/docs/misc/xsm-flask.txt b/docs/misc/xsm-flask.txt
 index ab05913..e169937 100644
 --- a/docs/misc/xsm-flask.txt
 +++ b/docs/misc/xsm-flask.txt
 @@ -335,33 +335,8 @@ memory, or even changing certain BIOS settings). Dynamic 
 labeling requires that
  the domain performing the labeling be trusted to label all the devices in the
  system properly.
  
 -To enable static device labeling, a checkpolicy = 2.0.20 and libsepol 
 =2.0.39
 -are required. The policy Makefile (tools/flask/policy/Makefile) must also be
 -changed as follows:
 -
 -
 -#
 -# Build a binary policy locally
 -#
 -$(POLVER): policy.conf
 -@echo Compiling $(NAME) $(POLVER)
 -   $(QUIET) $(CHECKPOLICY) $^ -o $@(Comment out this line)
 -# Uncomment line below to enable policies for devices
 -#$(QUIET) $(CHECKPOLICY) -t Xen $^ -o $@   (Uncomment this line)
 -
 -
 -#
 -# Install a binary policy
 -#
 -$(LOADPATH): policy.conf
 -@echo Compiling and installing $(NAME) $(LOADPATH)
 -   $(QUIET) $(CHECKPOLICY) $^ -o $@(Comment out this line)
 -# Uncomment line below to enable policies for devices
 -#$(QUIET) $(CHECKPOLICY) -t Xen $^ -o $@   (Uncomment this line)
 -
 -
 -IRQs, PCI devices, I/O memory and ports can all be labeled.  There are
 -commented out lines in xen.te policy for examples on how to label devices.
 +IRQs, PCI devices, I/O memory and x86 IO ports can all have labels defined.
 +There are examples commented out in 
 tools/flask/policy/policy/device_contexts.
  
  Device Labeling
  ---
 @@ -378,7 +353,7 @@ lspci output is..
  Region 2: I/O ports at ecc0 [size=32]
  Kernel modules: e1000e
  
 -The labeling can be done with these commands
 +The labeling can be done with these lines in device_contexts:
  
  pirqcon 33 system_u:object_r:nicP_t
  iomemcon 0xfebe0-0xfebff system_u:object_r:nicP_t
 diff --git a/tools/flask/policy/Makefile b/tools/flask/policy/Makefile
 index 58d9ce1..e564396 100644
 --- a/tools/flask/policy/Makefile
 +++ b/tools/flask/policy/Makefile
 @@ -56,6 +56,7 @@ MLSSUPPORT := $(POLDIR)/mls
  USERS := $(POLDIR)/users
  CONSTRAINTS := $(POLDIR)/constraints
  ISID_DEFS := $(POLDIR)/initial_sids
 +DEV_OCONS := $(POLDIR)/device_contexts
  
  # config file paths
  GLOBALTUN := $(POLDIR)/global_tunables
 @@ -98,7 +99,7 @@ POLICY_SECTIONS += $(M4SUPPORT) $(MLSSUPPORT)
  POLICY_SECTIONS += $(ALL_INTERFACES)
  POLICY_SECTIONS += $(GLOBALTUN)
  POLICY_SECTIONS += $(ALL_MODULES)
 -POLICY_SECTIONS += $(USERS) $(CONSTRAINTS) $(ISID_DEFS)
 +POLICY_SECTIONS += $(USERS) $(CONSTRAINTS) $(ISID_DEFS) $(DEV_OCONS)
  
  all: $(POLICY_FILENAME)
  
 diff --git a/tools/flask/policy/policy/device_contexts 
 b/tools/flask/policy/policy/device_contexts
 new file mode 100644
 index 000..c2de7e7
 --- /dev/null
 +++ b/tools/flask/policy/policy/device_contexts
 @@ -0,0 +1,32 @@
 +###
 +#
 +# Label devices for delegation
 +#
 +# The PCI, IRQ, memory, and I/O port ranges are hardware-specific.
 +#
 +###
 +
 +# label e1000e nic
 +#pirqcon 33 system_u:object_r:nic_dev_t
 +#pirqcon 55 system_u:object_r:nic_dev_t
 +#iomemcon 0xfebe0-0xfebff system_u:object_r:nic_dev_t
 +#iomemcon 0xfebd9 system_u:object_r:nic_dev_t
 +#ioportcon 0xecc0-0xecdf system_u:object_r:nic_dev_t
 +#pcidevicecon 0xc800 system_u:object_r:nic_dev_t
 +
 +# label e100 nic
 +#pirqcon 16 system_u:object_r:nic_dev_t
 +#iomemcon 0xfe5df system_u:object_r:nic_dev_t
 +#iomemcon 0xfe5e0-0xfe5ff system_u:object_r:nic_dev_t
 +#iomemcon 0xc2000-0xc200f system_u:object_r:nic_dev_t
 +#ioportcon 0xccc0-0xcd00 system_u:object_r:nic_dev_t
 +
 +# label usb 1d.0-2 1d.7
 +#pirqcon 23 system_u:object_r:nic_dev_t

Re: [Xen-devel] [PATCH v2 2/4] xen/arm: Add GSER region to ThunderX platform mapping

2015-03-17 Thread Vijay Kilari

Hi Ian,

On Thu, Mar 5, 2015 at 10:40 PM, Ian Campbell ian.campb...@citrix.com wrote:
 On Thu, 2015-03-05 at 16:46 +, Ian Campbell wrote:
 On Wed, 2015-03-04 at 11:36 +0530, vijay.kil...@gmail.com wrote:
  From: Vijaya Kumar K vijaya.ku...@caviumnetworks.com
 
  Add GSER region to thunderx platfrom specific mappings.
  This region is not mentioned in DT. This is required by
  PCI driver to detect and configure pci devices attached.
 
  In future we can remove this mapping, if pci driver
  in Dom does not require this.

 How do we know what the PCI driver in dom0 needs? I don't think we can,
 so we can in effect never remove this specific mapping, which is a
 shame.

 Unless you have some scheme in mind which would allow us to do so?

 IMHO by far the best solution would be to add this device to the DTB so
 that it is correctly mapped. I'm not quite sure what that will look like
 since thne mainline DTB doesn't have the PCI node at all.

 Looking at a more recent DTB which I have access to it seems like
 0x87e09000 is correctly covered by a ranges entry on the PCI
 controller node.

Where did you find recent DTB?.  AFAIK, this region does not fall
under any pci controller range.


 So I think all which is needed is a) to use this updated DTB and b) my
 series xen: arm: Parse PCI DT nodes' ranges and interrupt-map from
 last October which, as it happens, I've been working on bringing up to
 date yesterday and today (one more thing to clean up before I repost).

Because it is not covered under any PCI ranges, your patch series
still does not help.
Infact, this is common region for SERDES configuration so cannot bind
to any particular pci controller range.

Regards
Vijay

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH] flask/policy: fix static device labeling examples

2015-03-17 Thread Jan Beulich

 On 17.03.15 at 14:03, julien.gr...@linaro.org wrote:
 (CC Ian and Jan)

This is mostly about tools stuff:

  docs/misc/xsm-flask.txt  | 31 +++
  tools/flask/policy/Makefile  |  3 ++-
  tools/flask/policy/policy/device_contexts| 32 +++
  tools/flask/policy/policy/modules/xen/xen.te | 38 
 +++-
  4 files changed, 41 insertions(+), 63 deletions(-)

Hence I don't see why you ping me about it.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH RFC V2 4/6] xen: Support for VMCALL mem_events

2015-03-17 Thread Razvan Cojocaru

On 07/11/2014 08:23 PM, Andrew Cooper wrote:
 On 11/07/14 16:43, Razvan Cojocaru wrote:
 Added support for VMCALL events (the memory introspection library
 will have the guest trigger VMCALLs, which will then be sent along
 via the mem_event mechanism).

 Changes since V1:
  - Added a #define and an comment explaining a previous magic
constant.
  - Had MEM_EVENT_REASON_VMCALL explicitly not honour
HVMPME_onchangeonly.

 Signed-off-by: Razvan Cojocaru rcojoc...@bitdefender.com
 ---
  xen/arch/x86/hvm/hvm.c  |9 +
  xen/arch/x86/hvm/vmx/vmx.c  |   18 +-
  xen/include/asm-x86/hvm/hvm.h   |1 +
  xen/include/public/hvm/params.h |4 +++-
  xen/include/public/mem_event.h  |5 +
  5 files changed, 35 insertions(+), 2 deletions(-)

 diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
 index 89a0382..6e86d7c 100644
 --- a/xen/arch/x86/hvm/hvm.c
 +++ b/xen/arch/x86/hvm/hvm.c
 @@ -5564,6 +5564,7 @@ long do_hvm_op(unsigned long op, 
 XEN_GUEST_HANDLE_PARAM(void) arg)
  case HVM_PARAM_MEMORY_EVENT_INT3:
  case HVM_PARAM_MEMORY_EVENT_SINGLE_STEP:
  case HVM_PARAM_MEMORY_EVENT_MSR:
 +case HVM_PARAM_MEMORY_EVENT_VMCALL:
  if ( d == current-domain )
  {
  rc = -EPERM;
 @@ -6199,6 +6200,14 @@ void hvm_memory_event_msr(unsigned long msr, unsigned 
 long value)
 value, ~value, 1, msr);
  }
  
 +void hvm_memory_event_vmcall(unsigned long rip, unsigned long eax)
 +{
 +hvm_memory_event_traps(current-domain-arch.hvm_domain
 + .params[HVM_PARAM_MEMORY_EVENT_VMCALL],
 +   MEM_EVENT_REASON_VMCALL,
 +   rip, ~rip, 1, eax);
 +}
 +
  int hvm_memory_event_int3(unsigned long gla) 
  {
  uint32_t pfec = PFEC_page_present;
 diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
 index 2caa04a..6c63225 100644
 --- a/xen/arch/x86/hvm/vmx/vmx.c
 +++ b/xen/arch/x86/hvm/vmx/vmx.c
 @@ -2879,8 +2879,24 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
  case EXIT_REASON_VMCALL:
  {
  int rc;
 +unsigned long eax = regs-eax;
 +
  HVMTRACE_1D(VMMCALL, regs-eax);
 -rc = hvm_do_hypercall(regs);
 +
 +/* Don't send a VMCALL mem_event unless something
 + * caused the guests's eax register to contain the
 + * VMCALL_EVENT_REQUEST constant. */
 +if ( regs-eax != VMCALL_EVENT_REQUEST )
 +{
 +rc = hvm_do_hypercall(regs);
 +}
 +else
 +{
 +hvm_memory_event_vmcall(guest_cpu_user_regs()-eip, eax);
 +update_guest_eip();
 +break;
 +}
 
 Thinking more about this, it is really a hypercall pretending not to
 be.  It would be better to introduce a real HVMOP_send_mem_event.
 
 From the point of view of your in-guest agent, it would be a vmcall with
 rax = 34 (hvmop) rdi = $N (send_mem_event subop) rsi = data or pointer
 to struct containing data, depending on how exactly you implement the
 hypercall.
 
 You would have the bonus of being able to detect errors, e.g. -ENOENT
 for mem_event not active, get SVM support for free, and not need magic
 numbers, or vendor specific terms like vmcall finding their way into
 the Xen public API.

Actually, this only seems to be the case where mode == 8 in
hvm_do_hypercall() (xen/arch/x86/hvm/hvm.c):

4987  : hvm_hypercall64_table)[eax](rdi, rsi, rdx,
r10, r8, r9);

Otherwise (and this seems to be the case with my Xen build), ebx seems
to be used for the subop:

5033 regs-_eax = hvm_hypercall32_table[eax](ebx, ecx, edx, esi,
edi, ebp);

So, ebx needs to be $N (send_mem_event subop), not rdi. Is this intended
(rdi in one case and ebx in the other)?


Thanks,
Razvan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 7/7] xen: sched_rt: print useful affinity info when dumping

2015-03-17 Thread Dario Faggioli

On Mon, 2015-03-16 at 16:30 -0400, Meng Xu wrote:
 Hi Dario,
 
Hey,

 2015-03-16 13:05 GMT-04:00 Dario Faggioli dario.faggi...@citrix.com:

 
  This change also takes the chance to add a scratch
  cpumask, to avoid having to create one more
  cpumask_var_t on the stack of the dumping routine.
 
 Actually, I have a question about the strength of this design. When we
 have a machine with many cpus, we will end up with allocating a
 cpumask for each cpu. 

Just FTR, what we will end up allocating is:
 - an array of *pointers* to cpumasks with as many elements as the 
   number of pCPUs,
 - a cpumask *only* for the pCPUs subjected to an instance of the RTDS 
   scheduler.

So, for instance, if you have 64 pCPUs, but are using the RTDS scheduler
only in a cpupool with 2 pCPUs, you'll have an array of 64 pointers to
cpumask_t, but only 2 actual cpumasks.

 Is this better than having a cpumask_var_t on
 the stack of the dumping routine, since the dumping routine is not in
 the hot path?
 
George and Jan replied to this already, I think. Allow me to add just a
few words:
  Such scratch area can be used to kill most of the
  cpumasks_var_t local variables in other functions
  in the file, but that is *NOT* done in this chage.
 
This is the point, actually! As said here, this is not only for the sake
of the dumping routine. In fact, ideally, someone will, in the near
future, go throughout the whole file and kill most of the cpumask_t
local variables, and most of the cpumask dynamic allocations, in favour
of using this scratch area.

  @@ -409,6 +423,10 @@ rt_init(struct scheduler *ops)
   if ( prv == NULL )
   return -ENOMEM;
 
  +_cpumask_scratch = xmalloc_array(cpumask_var_t, nr_cpu_ids);
 
 Is it better to use xzalloc_array?
 
Why? IMO, not really. I'm only free()-ing (in rt_free_pdata()) the
elements of the array that have been previously successfully allocated
(in rt_alloc_pdata()), so I don't think there is any special requirement
for all the elements to be NULL right away.

  +if ( _cpumask_scratch == NULL )
  +return -ENOMEM;
  +
   spin_lock_init(prv-lock);
   INIT_LIST_HEAD(prv-sdom);
   INIT_LIST_HEAD(prv-runq);
  @@ -426,6 +444,7 @@ rt_deinit(const struct scheduler *ops)
   {
   struct rt_private *prv = rt_priv(ops);
 
  +xfree(_cpumask_scratch);
   xfree(prv);
   }
 
  @@ -443,6 +462,9 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu)
   per_cpu(schedule_data, cpu).schedule_lock = prv-lock;
   spin_unlock_irqrestore(prv-lock, flags);
 
  +if ( !alloc_cpumask_var(_cpumask_scratch[cpu]) )
 
 Is it better to use zalloc_cpumask_var() here?
 
Nope. It's a scratch area, after all, so one really should not assume it
to be in a specific state (e.g., no bits set as you're suggesting) when
using it.

Thanks and Regards,
Dario


signature.asc
Description: This is a digitally signed message part
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 1/6] x86: detect and initialize Intel CAT feature

2015-03-17 Thread Konrad Rzeszutek Wilk

On Tue, Mar 17, 2015 at 04:11:33PM +0800, Chao Peng wrote:
 On Fri, Mar 13, 2015 at 09:40:13AM -0400, Konrad Rzeszutek Wilk wrote:
  On Fri, Mar 13, 2015 at 06:13:20PM +0800, Chao Peng wrote:
   Detect Intel Cache Allocation Technology(CAT) feature and store the
   cpuid information for later use. Currently only L3 cache allocation is
   supported. The L3 CAT features may vary among sockets so per-socket
   feature information is stored. The initialization can happen either at
   boot time or when CPU(s) is hot plugged after booting.
   
   Signed-off-by: Chao Peng chao.p.p...@linux.intel.com
   ---
docs/misc/xen-command-line.markdown |  15 +++-
xen/arch/x86/psr.c  | 151 
   +---
xen/include/asm-x86/cpufeature.h|   1 +
3 files changed, 155 insertions(+), 12 deletions(-)
   
   +cat_cpu_init(smp_processor_id());
  
  Do 'if (!cat_cpu_init(..)).`'
  
  as the CPU might not support this.
  
  At which point you should also free the cat_socket_info and
  not register the cpu notifier.
 
 Even the booting CPU does not support this, other CPUs may still support
 this. Generally the feature is a per-socket feature. So break here is
 not the intention.

Oooh, and you did mention that in the git commit description and I dived
right in the code - without looking there - sorry for that noise!

Thought I am curious - what if all the sockets don't support and the
user does try enable it on the command line (user error)? Shouldn't we
then figure out that all of the CPUs don't support and xfree
cat_socket_info and not register the CPU notifier?
 
 Except this, all other comments will be addressed by the next version.

thank you!
 Thanks for your time.
 
 Chao
  
   +register_cpu_notifier(cpu_nfb);
   +}
   +

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Re: [Xen-devel] [PATCH 7/7] xen: sched_rt: print useful affinity info when dumping

2015-03-17 Thread Dario Faggioli

On Mon, 2015-03-16 at 19:05 +, George Dunlap wrote:
 On 03/16/2015 05:05 PM, Dario Faggioli wrote:

  @@ -218,7 +224,6 @@ __q_elem(struct list_head *elem)
   static void
   rt_dump_vcpu(const struct scheduler *ops, const struct rt_vcpu *svc)
   {
  -char cpustr[1024];
   cpumask_t *cpupool_mask;
   
   ASSERT(svc != NULL);
  @@ -229,10 +234,22 @@ rt_dump_vcpu(const struct scheduler *ops, const 
  struct rt_vcpu *svc)
   return;
   }
   
  -cpumask_scnprintf(cpustr, sizeof(cpustr), 
  svc-vcpu-cpu_hard_affinity);
  +cpupool_mask = cpupool_scheduler_cpumask(svc-vcpu-domain-cpupool);
  +/*
  + * We can't just use 'cpumask_scratch' because the dumping can
  + * happen from a pCPU outside of this scheduler's cpupool, and
  + * hence it's not right to use the pCPU's scratch mask (which
  + * may even not exist!). On the other hand, it is safe to use
  + * svc-vcpu-processor's own scratch space, since we own the
  + * runqueue lock.
 
 Since we *hold* the lock.
 
Right, thanks.

  + */
  +cpumask_and(_cpumask_scratch[svc-vcpu-processor], cpupool_mask,
  +svc-vcpu-cpu_hard_affinity);
  +cpulist_scnprintf(keyhandler_scratch, sizeof(keyhandler_scratch),
  +  _cpumask_scratch[svc-vcpu-processor]);
 
 Just a suggestion, would it be worth making a local variable to avoid
 typing this long thing twice?  

It probably would.

 Then you could also put the comment about
 using the svc-vcpu-processor's scratch space above the place where you
 set the local variable, while avoiding breaking up the logic of the
 cpumask operations.
 
I like this, will do.

Regards,
Dario


signature.asc
Description: This is a digitally signed message part
___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

1 2 >

1 - 100 of 151 matches

Mail list logo