[Xen-devel] [PATCH v19 08/14] x86/VPMU: Save VPMU state for PV guests during context switch
Save VPMU state during context switch for both HVM and PV(H) guests. A subsequent patch (x86/VPMU: NMI-based VPMU support) will make it possible for vpmu_switch_to() to call vmx_vmcs_try_enter()-vcpu_pause() which needs is_running to be correctly set/cleared. To prepare for that, call context_saved() before vpmu_switch_to() is executed. (Note that while this change could have been dalayed until that later patch, the changes are harmless to existing code and so we do it here) Signed-off-by: Boris Ostrovsky boris.ostrov...@oracle.com --- Changes in v19: * Adjusted for new vpmu_switch_to/from interface xen/arch/x86/domain.c | 22 ++ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index f19087e..c7f8210 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -1533,17 +1533,14 @@ void context_switch(struct vcpu *prev, struct vcpu *next) } if ( prev != next ) -_update_runstate_area(prev); - -if ( is_hvm_vcpu(prev) ) { -if (prev != next) -vpmu_switch_from(prev); - -if ( !list_empty(prev-arch.hvm_vcpu.tm_list) ) -pt_save_timer(prev); +_update_runstate_area(prev); +vpmu_switch_from(prev); } +if ( is_hvm_vcpu(prev) !list_empty(prev-arch.hvm_vcpu.tm_list) ) +pt_save_timer(prev); + local_irq_disable(); set_current(next); @@ -1581,15 +1578,16 @@ void context_switch(struct vcpu *prev, struct vcpu *next) !is_hardware_domain(next-domain)); } -if (is_hvm_vcpu(next) (prev != next) ) -/* Must be done with interrupts enabled */ -vpmu_switch_to(next); - context_saved(prev); if ( prev != next ) +{ _update_runstate_area(next); +/* Must be done with interrupts enabled */ +vpmu_switch_to(next); +} + /* Ensure that the vcpu has an up-to-date time base. */ update_vcpu_system_time(next); -- 1.8.1.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH v19 10/14] x86/VPMU: Add support for PMU register handling on PV guests
Intercept accesses to PMU MSRs and process them in VPMU module. If vpmu ops for VCPU are not initialized (which is the case, for example, for PV guests that are not VPMU-enlightened) access to MSRs will return failure. Dump VPMU state for all domains (HVM and PV) when requested. Signed-off-by: Boris Ostrovsky boris.ostrov...@oracle.com Acked-by: Jan Beulich jbeul...@suse.com Acked-by: Kevin Tian kevin.t...@intel.com Reviewed-by: Dietmar Hahn dietmar.h...@ts.fujitsu.com Tested-by: Dietmar Hahn dietmar.h...@ts.fujitsu.com --- xen/arch/x86/domain.c | 3 +-- xen/arch/x86/hvm/vmx/vpmu_core2.c | 49 +++-- xen/arch/x86/hvm/vpmu.c | 3 +++ xen/arch/x86/traps.c | 51 +-- 4 files changed, 95 insertions(+), 11 deletions(-) diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index c7f8210..a48d824 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -2065,8 +2065,7 @@ void arch_dump_vcpu_info(struct vcpu *v) { paging_dump_vcpu_info(v); -if ( is_hvm_vcpu(v) ) -vpmu_dump(v); +vpmu_dump(v); } void domain_cpuid( diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/hvm/vmx/vpmu_core2.c index d10e3e7..66d7bc0 100644 --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c +++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c @@ -27,6 +27,7 @@ #include asm/regs.h #include asm/types.h #include asm/apic.h +#include asm/traps.h #include asm/msr.h #include asm/msr-index.h #include asm/hvm/support.h @@ -299,12 +300,18 @@ static inline void __core2_vpmu_save(struct vcpu *v) rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]); for ( i = 0; i arch_pmc_cnt; i++ ) rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter); + +if ( !has_hvm_container_vcpu(v) ) +rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt-global_status); } static int core2_vpmu_save(struct vcpu *v) { struct vpmu_struct *vpmu = vcpu_vpmu(v); +if ( !has_hvm_container_vcpu(v) ) +wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) ) return 0; @@ -342,6 +349,13 @@ static inline void __core2_vpmu_load(struct vcpu *v) wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt-fixed_ctrl); wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt-ds_area); wrmsrl(MSR_IA32_PEBS_ENABLE, core2_vpmu_cxt-pebs_enable); + +if ( !has_hvm_container_vcpu(v) ) +{ +wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt-global_ovf_ctrl); +core2_vpmu_cxt-global_ovf_ctrl = 0; +wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt-global_ctrl); +} } static void core2_vpmu_load(struct vcpu *v) @@ -442,7 +456,6 @@ static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index) static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported) { -u64 global_ctrl; int i, tmp; int type = -1, index = -1; struct vcpu *v = current; @@ -486,7 +499,12 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, switch ( msr ) { case MSR_CORE_PERF_GLOBAL_OVF_CTRL: +if ( msr_content ~(0xC000 | + (((1ULL fixed_pmc_cnt) - 1) 32) | + ((1ULL arch_pmc_cnt) - 1)) ) +return 1; core2_vpmu_cxt-global_status = ~msr_content; +wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content); return 0; case MSR_CORE_PERF_GLOBAL_STATUS: gdprintk(XENLOG_INFO, Can not write readonly MSR: @@ -514,14 +532,18 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, gdprintk(XENLOG_WARNING, Guest setting of DTS is ignored.\n); return 0; case MSR_CORE_PERF_GLOBAL_CTRL: -global_ctrl = msr_content; +core2_vpmu_cxt-global_ctrl = msr_content; break; case MSR_CORE_PERF_FIXED_CTR_CTRL: if ( msr_content ( ~((1ull (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1)) ) return 1; -vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl); +if ( has_hvm_container_vcpu(v) ) +vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, + core2_vpmu_cxt-global_ctrl); +else +rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt-global_ctrl); *enabled_cntrs = ~(((1ULL fixed_pmc_cnt) - 1) 32); if ( msr_content != 0 ) { @@ -546,7 +568,11 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, if ( msr_content (~((1ull 32) - 1)) ) return 1; -vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl); +if ( has_hvm_container_vcpu(v) ) +vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, +
Re: [Xen-devel] Upstream QEMU based stubdom and rump kernel
On Tue, Mar 17, 2015 at 02:29:07PM +, Wei Liu wrote: I've now successfully built QEMU upstream with rump kernel. However to make it fully functional as a stubdom, there are some missing pieces to be added in. 1. The ability to access QMP socket (a unix socket) from Dom0. That will be used to issue command to QEMU. The QMP socket does not needs to be a unix socket. It can be any of those (from qemu --help): Character device options: -chardev null,id=id[,mux=on|off] -chardev socket,id=id[,host=host],port=port[,to=to][,ipv4][,ipv6][,nodelay][,reconnect=seconds] [,server][,nowait][,telnet][,reconnect=seconds][,mux=on|off] (tcp) -chardev socket,id=id,path=path[,server][,nowait][,telnet][,reconnect=seconds][,mux=on|off] (unix) -chardev udp,id=id[,host=host],port=port[,localaddr=localaddr] [,localport=localport][,ipv4][,ipv6][,mux=on|off] -chardev msmouse,id=id[,mux=on|off] -chardev vc,id=id[[,width=width][,height=height]][[,cols=cols][,rows=rows]] [,mux=on|off] -chardev ringbuf,id=id[,size=size] -chardev file,id=id,path=path[,mux=on|off] -chardev pipe,id=id,path=path[,mux=on|off] -chardev pty,id=id[,mux=on|off] -chardev stdio,id=id[,mux=on|off][,signal=on|off] -chardev serial,id=id,path=path[,mux=on|off] -chardev tty,id=id,path=path[,mux=on|off] -chardev parallel,id=id,path=path[,mux=on|off] -chardev parport,id=id,path=path[,mux=on|off] -chardev spicevmc,id=id,name=name[,debug=debug] -chardev spiceport,id=id,name=name[,debug=debug] 2. The ability to access files in Dom0. That will be used to write to / read from QEMU state file. To save a QEMU state (write), we do use a filename. But I guest we could expand the QMP command (xen-save-devices-state) to use something else, if it's easier. To restore, we provide a file descriptor from libxl to QEMU, with the fd on the file that contain the state we want to restore. But there are a few other way to load a state (from qemu.git/docs/migration.txt): - tcp migration: do the migration using tcp sockets - unix migration: do the migration using unix sockets - exec migration: do the migration using the stdin/stdout through a process. - fd migration: do the migration using an file descriptor that is passed to QEMU. QEMU doesn't care how this file descriptor is opened. -- Anthony PERARD ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] Upstream QEMU based stubdom and rump kernel
On Tue, Mar 17, 2015 at 02:54:09PM +, Ian Campbell wrote: On Tue, 2015-03-17 at 14:29 +, Wei Liu wrote: 2. The ability to access files in Dom0. That will be used to write to / read from QEMU state file. This requirement is not as broad as you make it sound. Yes. You're right. All which is really required is the ability to slurp in or write out a blob of bytes to a service running in a control domain, not actual This is more accurate. ability to read/write files in dom0 (which would need careful security consideration!). For the old qemu-traditional stubdom for example this is implemented as a pair of console devices (one r/o for restore + one w/o for save) which are setup by the toolstack at start of day and pre-plumbed into two temporary files. Unfortunately I don't think that hack in mini-os is upstreamable in rump kernel. Wei. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH v19 09/14] x86/VPMU: When handling MSR accesses, leave fault injection to callers
With this patch return value of 1 of vpmu_do_msr() will now indicate whether an error was encountered during MSR processing (instead of stating that the access was to a VPMU register). As part of this patch we also check for validity of certain MSR accesses right when we determine which register is being written, as opposed to postponing this until later. Signed-off-by: Boris Ostrovsky boris.ostrov...@oracle.com Acked-by: Kevin Tian kevin.t...@intel.com Reviewed-by: Dietmar Hahn dietmar.h...@ts.fujitsu.com Tested-by: Dietmar Hahn dietmar.h...@ts.fujitsu.com --- xen/arch/x86/hvm/svm/svm.c| 6 ++- xen/arch/x86/hvm/svm/vpmu.c | 6 +-- xen/arch/x86/hvm/vmx/vmx.c| 24 +--- xen/arch/x86/hvm/vmx/vpmu_core2.c | 82 ++- 4 files changed, 55 insertions(+), 63 deletions(-) diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c index e523d12..4fe36e9 100644 --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -1709,7 +1709,8 @@ static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content) case MSR_AMD_FAM15H_EVNTSEL3: case MSR_AMD_FAM15H_EVNTSEL4: case MSR_AMD_FAM15H_EVNTSEL5: -vpmu_do_rdmsr(msr, msr_content); +if ( vpmu_do_rdmsr(msr, msr_content) ) +goto gpf; break; case MSR_AMD64_DR0_ADDRESS_MASK: @@ -1860,7 +1861,8 @@ static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content) case MSR_AMD_FAM15H_EVNTSEL3: case MSR_AMD_FAM15H_EVNTSEL4: case MSR_AMD_FAM15H_EVNTSEL5: -vpmu_do_wrmsr(msr, msr_content, 0); +if ( vpmu_do_wrmsr(msr, msr_content, 0) ) +goto gpf; break; case MSR_IA32_MCx_MISC(4): /* Threshold register */ diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c index 58a0dc4..474d0db 100644 --- a/xen/arch/x86/hvm/svm/vpmu.c +++ b/xen/arch/x86/hvm/svm/vpmu.c @@ -305,7 +305,7 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, is_pmu_enabled(msr_content) !vpmu_is_set(vpmu, VPMU_RUNNING) ) { if ( !acquire_pmu_ownership(PMU_OWNER_HVM) ) -return 1; +return 0; vpmu_set(vpmu, VPMU_RUNNING); if ( has_hvm_container_vcpu(v) is_msr_bitmap_on(vpmu) ) @@ -335,7 +335,7 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, /* Write to hw counters */ wrmsrl(msr, msr_content); -return 1; +return 0; } static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) @@ -353,7 +353,7 @@ static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) rdmsrl(msr, *msr_content); -return 1; +return 0; } static void amd_vpmu_destroy(struct vcpu *v) diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 83b740a..206e50d 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -2127,12 +2127,17 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content) *msr_content |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL | MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL; /* Perhaps vpmu will change some bits. */ +/* FALLTHROUGH */ +case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7): +case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL(3): +case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2: +case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL: +case MSR_IA32_PEBS_ENABLE: +case MSR_IA32_DS_AREA: if ( vpmu_do_rdmsr(msr, msr_content) ) -goto done; +goto gp_fault; break; default: -if ( vpmu_do_rdmsr(msr, msr_content) ) -break; if ( passive_domain_do_rdmsr(msr, msr_content) ) goto done; switch ( long_mode_do_msr_read(msr, msr_content) ) @@ -2308,7 +2313,7 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content) if ( msr_content ~supported ) { /* Perhaps some other bits are supported in vpmu. */ -if ( !vpmu_do_wrmsr(msr, msr_content, supported) ) +if ( vpmu_do_wrmsr(msr, msr_content, supported) ) break; } if ( msr_content IA32_DEBUGCTLMSR_LBR ) @@ -2336,9 +2341,16 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content) if ( !nvmx_msr_write_intercept(msr, msr_content) ) goto gp_fault; break; +case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7): +case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL(7): +case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2: +case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL: +case MSR_IA32_PEBS_ENABLE: +case MSR_IA32_DS_AREA: + if ( vpmu_do_wrmsr(msr, msr_content, 0) ) +goto gp_fault; +break; default: -if ( vpmu_do_wrmsr(msr, msr_content, 0) ) -return
[Xen-devel] [PATCH v19 01/14] x86/VPMU: VPMU should not exist when vpmu_initialise() is called
We don't need to try to destroy it since it can't be already allocated at the time we try to initialize it. Signed-off-by: Boris Ostrovsky boris.ostrov...@oracle.com Suggested-by: Andrew Cooper andrew.coop...@citrix.com --- Changes in v19: * Removed unnecesary test for VPMU_CONTEXT_ALLOCATED in svm/vpmu.c xen/arch/x86/hvm/svm/vpmu.c | 3 --- xen/arch/x86/hvm/vpmu.c | 5 + 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c index 64dc167..6764070 100644 --- a/xen/arch/x86/hvm/svm/vpmu.c +++ b/xen/arch/x86/hvm/svm/vpmu.c @@ -359,9 +359,6 @@ static int amd_vpmu_initialise(struct vcpu *v) struct vpmu_struct *vpmu = vcpu_vpmu(v); uint8_t family = current_cpu_data.x86; -if ( vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) -return 0; - if ( counters == NULL ) { switch ( family ) diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c index 0e6b6c0..c3273ee 100644 --- a/xen/arch/x86/hvm/vpmu.c +++ b/xen/arch/x86/hvm/vpmu.c @@ -236,10 +236,7 @@ void vpmu_initialise(struct vcpu *v) if ( is_pvh_vcpu(v) ) return; -if ( vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) -vpmu_destroy(v); -vpmu_clear(vpmu); -vpmu-context = NULL; +ASSERT(!vpmu-flags !vpmu-context); switch ( vendor ) { -- 1.8.1.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH v19 12/14] x86/VPMU: Merge vpmu_rdmsr and vpmu_wrmsr
The two routines share most of their logic. Signed-off-by: Boris Ostrovsky boris.ostrov...@oracle.com --- Changes in v19: * const-ified arch_vpmu_ops in vpmu_do_wrmsr * non-changes: - kept 'current' as a non-initializer to avoid unnecessary initialization in the (common) non-VPMU case - kept 'nop' label since there are multiple dissimilar cases that can cause a non-emulation of VPMU access xen/arch/x86/hvm/vpmu.c| 76 +- xen/include/asm-x86/hvm/vpmu.h | 14 ++-- 2 files changed, 42 insertions(+), 48 deletions(-) diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c index c287d8b..beed956 100644 --- a/xen/arch/x86/hvm/vpmu.c +++ b/xen/arch/x86/hvm/vpmu.c @@ -103,63 +103,47 @@ void vpmu_lvtpc_update(uint32_t val) apic_write(APIC_LVTPC, vpmu-hw_lapic_lvtpc); } -int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported) +int vpmu_do_msr(unsigned int msr, uint64_t *msr_content, +uint64_t supported, bool_t is_write) { -struct vcpu *curr = current; +struct vcpu *curr; struct vpmu_struct *vpmu; +const struct arch_vpmu_ops *ops; +int ret = 0; if ( vpmu_mode == XENPMU_MODE_OFF ) -return 0; +goto nop; +curr = current; vpmu = vcpu_vpmu(curr); -if ( vpmu-arch_vpmu_ops vpmu-arch_vpmu_ops-do_wrmsr ) -{ -int ret = vpmu-arch_vpmu_ops-do_wrmsr(msr, msr_content, supported); - -/* - * We may have received a PMU interrupt during WRMSR handling - * and since do_wrmsr may load VPMU context we should save - * (and unload) it again. - */ -if ( !is_hvm_vcpu(curr) vpmu-xenpmu_data - (vpmu-xenpmu_data-pmu.pmu_flags PMU_CACHED) ) -{ -vpmu_set(vpmu, VPMU_CONTEXT_SAVE); -vpmu-arch_vpmu_ops-arch_vpmu_save(curr); -vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); -} -return ret; -} - -return 0; -} - -int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) -{ -struct vcpu *curr = current; -struct vpmu_struct *vpmu; +ops = vpmu-arch_vpmu_ops; +if ( !ops ) +goto nop; + +if ( is_write ops-do_wrmsr ) +ret = ops-do_wrmsr(msr, *msr_content, supported); +else if ( !is_write ops-do_rdmsr ) +ret = ops-do_rdmsr(msr, msr_content); +else +goto nop; -if ( vpmu_mode == XENPMU_MODE_OFF ) +/* + * We may have received a PMU interrupt while handling MSR access + * and since do_wr/rdmsr may load VPMU context we should save + * (and unload) it again. + */ +if ( !is_hvm_vcpu(curr) + vpmu-xenpmu_data (vpmu-xenpmu_data-pmu.pmu_flags PMU_CACHED) ) { -*msr_content = 0; -return 0; +vpmu_set(vpmu, VPMU_CONTEXT_SAVE); +ops-arch_vpmu_save(curr); +vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); } -vpmu = vcpu_vpmu(curr); -if ( vpmu-arch_vpmu_ops vpmu-arch_vpmu_ops-do_rdmsr ) -{ -int ret = vpmu-arch_vpmu_ops-do_rdmsr(msr, msr_content); +return ret; -if ( !is_hvm_vcpu(curr) vpmu-xenpmu_data - (vpmu-xenpmu_data-pmu.pmu_flags PMU_CACHED) ) -{ -vpmu_set(vpmu, VPMU_CONTEXT_SAVE); -vpmu-arch_vpmu_ops-arch_vpmu_save(curr); -vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); -} -return ret; -} -else + nop: +if ( !is_write ) *msr_content = 0; return 0; diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h index 642a4b7..63851a7 100644 --- a/xen/include/asm-x86/hvm/vpmu.h +++ b/xen/include/asm-x86/hvm/vpmu.h @@ -99,8 +99,8 @@ static inline bool_t vpmu_are_all_set(const struct vpmu_struct *vpmu, } void vpmu_lvtpc_update(uint32_t val); -int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported); -int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content); +int vpmu_do_msr(unsigned int msr, uint64_t *msr_content, +uint64_t supported, bool_t is_write); void vpmu_do_interrupt(struct cpu_user_regs *regs); void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); @@ -110,6 +110,16 @@ void vpmu_save(struct vcpu *v); void vpmu_load(struct vcpu *v); void vpmu_dump(struct vcpu *v); +static inline int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, +uint64_t supported) +{ +return vpmu_do_msr(msr, msr_content, supported, 1); +} +static inline int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) +{ +return vpmu_do_msr(msr, msr_content, 0, 0); +} + extern int acquire_pmu_ownership(int pmu_ownership); extern void release_pmu_ownership(int pmu_ownership); -- 1.8.1.4 ___ Xen-devel mailing
[Xen-devel] [PATCH v19 13/14] x86/VPMU: Add privileged PMU mode
Add support for privileged PMU mode (XENPMU_MODE_ALL) which allows privileged domain (dom0) profile both itself (and the hypervisor) and the guests. While this mode is on profiling in guests is disabled. Signed-off-by: Boris Ostrovsky boris.ostrov...@oracle.com --- Changes in v19: * Slightly different mode changing logic in xenpmu_op() since we no longer allow mode changes while VPMUs are active xen/arch/x86/hvm/vpmu.c | 34 +- xen/arch/x86/traps.c | 13 + xen/include/public/pmu.h | 3 +++ 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c index beed956..71c5063 100644 --- a/xen/arch/x86/hvm/vpmu.c +++ b/xen/arch/x86/hvm/vpmu.c @@ -111,7 +111,9 @@ int vpmu_do_msr(unsigned int msr, uint64_t *msr_content, const struct arch_vpmu_ops *ops; int ret = 0; -if ( vpmu_mode == XENPMU_MODE_OFF ) +if ( (vpmu_mode == XENPMU_MODE_OFF) || + ((vpmu_mode XENPMU_MODE_ALL) + !is_hardware_domain(current-domain)) ) goto nop; curr = current; @@ -166,8 +168,12 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs) struct vcpu *sampled = current, *sampling; struct vpmu_struct *vpmu; -/* dom0 will handle interrupt for special domains (e.g. idle domain) */ -if ( sampled-domain-domain_id = DOMID_FIRST_RESERVED ) +/* + * dom0 will handle interrupt for special domains (e.g. idle domain) or, + * in XENPMU_MODE_ALL, for everyone. + */ +if ( (vpmu_mode XENPMU_MODE_ALL) || + (sampled-domain-domain_id = DOMID_FIRST_RESERVED) ) { sampling = choose_hwdom_vcpu(); if ( !sampling ) @@ -177,17 +183,18 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs) sampling = sampled; vpmu = vcpu_vpmu(sampling); -if ( !is_hvm_vcpu(sampling) ) +if ( !is_hvm_vcpu(sampling) || (vpmu_mode XENPMU_MODE_ALL) ) { /* PV(H) guest */ const struct cpu_user_regs *cur_regs; uint64_t *flags = vpmu-xenpmu_data-pmu.pmu_flags; -uint32_t domid = DOMID_SELF; +uint32_t domid; if ( !vpmu-xenpmu_data ) return; if ( is_pvh_vcpu(sampling) + !(vpmu_mode XENPMU_MODE_ALL) !vpmu-arch_vpmu_ops-do_interrupt(regs) ) return; @@ -204,6 +211,11 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs) else *flags = PMU_SAMPLE_PV; +if ( sampled == sampling ) +domid = DOMID_SELF; +else +domid = sampled-domain-domain_id; + /* Store appropriate registers in xenpmu_data */ /* FIXME: 32-bit PVH should go here as well */ if ( is_pv_32bit_vcpu(sampling) ) @@ -232,7 +244,8 @@ void vpmu_do_interrupt(struct cpu_user_regs *regs) if ( (vpmu_mode XENPMU_MODE_SELF) ) cur_regs = guest_cpu_user_regs(); -else if ( !guest_mode(regs) is_hardware_domain(sampling-domain) ) +else if ( !guest_mode(regs) + is_hardware_domain(sampling-domain) ) { cur_regs = regs; domid = DOMID_XEN; @@ -508,7 +521,8 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t *params) struct page_info *page; uint64_t gfn = params-val; -if ( vpmu_mode == XENPMU_MODE_OFF ) +if ( (vpmu_mode == XENPMU_MODE_OFF) || + ((vpmu_mode XENPMU_MODE_ALL) !is_hardware_domain(d)) ) return -EINVAL; if ( (params-vcpu = d-max_vcpus) || (d-vcpu == NULL) || @@ -627,12 +641,14 @@ long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg) { case XENPMU_mode_set: { -if ( (pmu_params.val ~(XENPMU_MODE_SELF | XENPMU_MODE_HV)) || +if ( (pmu_params.val + ~(XENPMU_MODE_SELF | XENPMU_MODE_HV | XENPMU_MODE_ALL)) || (hweight64(pmu_params.val) 1) ) return -EINVAL; /* 32-bit dom0 can only sample itself. */ -if ( is_pv_32bit_vcpu(current) (pmu_params.val XENPMU_MODE_HV) ) +if ( is_pv_32bit_vcpu(current) + (pmu_params.val (XENPMU_MODE_HV | XENPMU_MODE_ALL)) ) return -EINVAL; spin_lock(vpmu_lock); diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 1eb7bb4..8a40deb 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -2653,6 +2653,10 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) case MSR_AMD_FAM15H_EVNTSEL0...MSR_AMD_FAM15H_PERFCTR5: if ( vpmu_msr || (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) ) { +if ( (vpmu_mode XENPMU_MODE_ALL) + !is_hardware_domain(v-domain) ) +break; + if ( vpmu_do_wrmsr(regs-ecx, msr_content, 0) ) goto fail; } @@ -2776,6
Re: [Xen-devel] RFC: [PATCH 1/3] Enhance platform support for PCI
On Tuesday 17 March 2015 06:01 PM, Jan Beulich wrote: On 17.03.15 at 13:06, mja...@caviumnetworks.com wrote: On Tuesday 17 March 2015 12:58 PM, Jan Beulich wrote: On 17.03.15 at 06:26, mja...@caviumnetworks.com wrote: In drivers/xen/pci.c on notification BUS_NOTIFY_ADD_DEVICE dom0 issues a hypercall to inform xen that a new pci device has been added. If we were to inform xen about a new pci bus that is added there are 2 ways a) Issue the hypercall from drivers/pci/probe.c b) When a new device is found (BUS_NOTIFY_ADD_DEVICE) issue PHYSDEVOP_pci_device_add hypercall to xen, if xen does not finds that segment number (s_bdf), it will return an error SEG_NO_NOT_FOUND. After that the linux xen code could issue the PHYSDEVOP_pci_host_bridge_add hypercall. I think (b) can be done with minimal code changes. What do you think ? I'm pretty sure (a) would even be refused by the maintainers, unless there already is a notification being sent. As to (b) - kernel code could keep track of which segment/bus pairs it informed Xen about, and hence wouldn't even need to wait for an error to be returned from the device-add request (which in your proposal would need to be re- issued after the host-bridge-add). Have a query on the CFG space address to be passed as hypercall parameter. The of_pci_get_host_bridge_resource only parses the ranges property and not reg. reg property has the CFG space address, which is usually stored in private pci host controller driver structures. so pci_dev 's parent pci_bus would not have that info. One way is to add a method in struct pci_ops but not sure it will be accepted or not. I'm afraid I don't understand what you're trying to tell me. Hi Jan, I missed this during initial discussion and found out while coding that CFG Space address of a pci host is stored in the reg property and the of_pci code dos not store reg in the resources only ranges are stored. So the pci_bus which is the rootbus created in the probe function of the pcie controller driver will have ranges values in resources but reg property value (CFG space address) in the private data. So from drivers/xen/pci.c we can find out the root bus (pci_bus) from the pci_dev (via BUS_NOTIFY) but cannot get the CFG space address. Now there are 2 ways a) Add a pci_ops to return the CFG space address b) Let the pci host controller driver invoke a function xen_invoke_hypercall () providing bus number and cfg_space address. xen_invoke_hypercall would be implemented in drivers/xen/pci.c and would issue PHYSDEVOP_pci_host_bridge_add hypercall Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v6 07/30] PCI: Pass PCI domain number combined with root bus number
On Tuesday 17 March 2015 07:35 PM, Ian Campbell wrote: On Tue, 2015-03-17 at 10:45 +0530, Manish Jaggi wrote: On Monday 09 March 2015 08:04 AM, Yijing Wang wrote: Now we could pass PCI domain combined with bus number in u32 argu. Because in arm/arm64, PCI domain number is assigned by pci_bus_assign_domain_nr(). So we leave pci_scan_root_bus() and pci_create_root_bus() in arm/arm64 unchanged. A new function pci_host_assign_domain_nr() will be introduced for arm/arm64 to assign domain number in later patch. Hi, I think these changes might not be required. We have made very few changes in the xen-pcifront to support PCI passthrough in arm64. As per xen architecture for a domU only a single pci virtual bus is created and all passthrough devices are attached to it. I guess you are only talking about the changes to xen-pcifront.c? Otherwise you are ignoring the dom0 case which is exposed to the real set of PCI root complexes and anyway I'm not sure how not needed for Xen domU translates into not required, since it is clearly required for other systems. Strictly speaking the Xen pciif protocol does support multiple buses, it's just that the tools, and perhaps kernels, have not yet felt any need to actually make use of that. There doesn't seem to be any harm in updating pcifront to follow this generic API change. ok. One side question, the function pci_host_assign_domain_nr() which would be introduced in later patch, does it appear to be doing the same binding which we are trying to implement via a pci_host_bridge add hypercall. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v2 3/5] xen: print online pCPUs and free pCPUs when dumping
On 03/17/2015 04:33 PM, Dario Faggioli wrote: e.g., with `xl debug-key r', like this: (XEN) Online Cpus: 0-15 (XEN) Free Cpus: 8-15 Also, for each cpupool, print the set of pCPUs it contains, like this: (XEN) Cpupool 0: (XEN) Cpus: 0-7 (XEN) Scheduler: SMP Credit Scheduler (credit) Signed-off-by: Dario Faggioli dario.faggi...@citrix.com Acked-by: Juergen Gross jgr...@suse.com Cc: Juergen Gross jgr...@suse.com Cc: George Dunlap george.dun...@eu.citrix.com Cc: Jan Beulich jbeul...@suse.com Cc: Keir Fraser k...@xen.org --- Changes from v1: * _print_cpumap() becomes print_cpumap() (i.e., the leading '_' was not particularly useful in this case), as suggested during review * changed the output such as (1) we only print the maps, not the number of elements, and (2) we avoid printing the free cpus map when empty * improved the changelog --- I'm not including any Reviewed-by / Acked-by tag, since the patch changed. --- xen/common/cpupool.c | 12 1 file changed, 12 insertions(+) diff --git a/xen/common/cpupool.c b/xen/common/cpupool.c index cd6aab9..812a2f9 100644 --- a/xen/common/cpupool.c +++ b/xen/common/cpupool.c @@ -17,6 +17,7 @@ #include xen/percpu.h #include xen/sched.h #include xen/sched-if.h +#include xen/keyhandler.h #include xen/cpu.h #define for_each_cpupool(ptr)\ @@ -658,6 +659,12 @@ int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op) return ret; } +static void print_cpumap(const char *str, const cpumask_t *map) +{ +cpulist_scnprintf(keyhandler_scratch, sizeof(keyhandler_scratch), map); +printk(%s: %s\n, str, keyhandler_scratch); +} + void dump_runq(unsigned char key) { unsigned longflags; @@ -671,12 +678,17 @@ void dump_runq(unsigned char key) sched_smt_power_savings? enabled:disabled); printk(NOW=0x%08X%08X\n, (u32)(now32), (u32)now); +print_cpumap(Online Cpus, cpu_online_map); +if ( cpumask_weight(cpupool_free_cpus) ) +print_cpumap(Free Cpus, cpupool_free_cpus); + printk(Idle cpupool:\n); schedule_dump(NULL); for_each_cpupool(c) { printk(Cpupool %d:\n, (*c)-cpupool_id); +print_cpumap(Cpus, (*c)-cpu_valid); schedule_dump(*c); } ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH v2] Build: Fix stubdom vtpm build failure
Typedefs are duplicated in stubdom/vtpmmgr/tcg.h and supported compilers do not cope with current staging branch. Signed-off-by: Olaf Hering o...@aepfle.de Signed-off-by: Quan Xu quan...@intel.com --- stubdom/vtpmmgr/common_types.h |9 + stubdom/vtpmmgr/tcg.h |7 +-- stubdom/vtpmmgr/tpm2.c | 10 +- stubdom/vtpmmgr/tpm2.h |6 +++--- stubdom/vtpmmgr/tpm2_types.h | 43 +++- stubdom/vtpmmgr/vtpmmgr.h |4 ++-- 6 files changed, 36 insertions(+), 43 deletions(-) create mode 100644 stubdom/vtpmmgr/common_types.h diff --git a/stubdom/vtpmmgr/common_types.h b/stubdom/vtpmmgr/common_types.h new file mode 100644 index 000..7321bb6 --- /dev/null +++ b/stubdom/vtpmmgr/common_types.h @@ -0,0 +1,9 @@ +#ifndef VTPM_COMMON_TYPES +#define VTPM_COMMON_TYPES 1 +typedef unsigned char BYTE; +typedef unsigned char BOOL; +typedef unsigned char UINT8; +typedef uint16_t UINT16; +typedef uint32_t UINT32; +typedef uint64_t UINT64; +#endif diff --git a/stubdom/vtpmmgr/tcg.h b/stubdom/vtpmmgr/tcg.h index 7321ec6..813ce57 100644 --- a/stubdom/vtpmmgr/tcg.h +++ b/stubdom/vtpmmgr/tcg.h @@ -39,6 +39,7 @@ #include stdlib.h #include stdint.h +#include common_types.h // CONSTANTS * @@ -401,12 +402,6 @@ // *** TYPEDEFS * -typedef unsigned char BYTE; -typedef unsigned char BOOL; -typedef uint16_t UINT16; -typedef uint32_t UINT32; -typedef uint64_t UINT64; - typedef UINT32 TPM_RESULT; typedef UINT32 TPM_PCRINDEX; typedef UINT32 TPM_DIRINDEX; diff --git a/stubdom/vtpmmgr/tpm2.c b/stubdom/vtpmmgr/tpm2.c index 1903e27..c9f1016 100644 --- a/stubdom/vtpmmgr/tpm2.c +++ b/stubdom/vtpmmgr/tpm2.c @@ -167,7 +167,7 @@ egress: TPM_RC TPM2_Load(TPMI_DH_OBJECT parentHandle, TPM2B_PRIVATE *inPrivate, /* in */ TPM2B_PUBLIC *inPublic, /* in */ - TPM_HANDLE *objectHandle, /* out */ + TPM2_HANDLE *objectHandle, /* out */ TPM2B_NAME *name /* out */) { TPM_BEGIN(TPM_ST_SESSIONS, TPM_CC_Load); @@ -185,7 +185,7 @@ TPM_RC TPM2_Load(TPMI_DH_OBJECT parentHandle, if (objectHandle != NULL) { ptr = unpack_TPM_HANDLE(ptr, objectHandle); } else { -TPM_HANDLE tmp; +TPM2_HANDLE tmp; ptr = unpack_TPM_HANDLE(ptr, tmp); } @@ -248,7 +248,7 @@ egress: TPM_RC TPM2_CreatePrimary(TPMI_RH_HIERARCHY primaryHandle, TPM2_Create_Params_in *in, - TPM_HANDLE *objHandle, + TPM2_HANDLE *objHandle, TPM2_Create_Params_out *out) { UINT32 param_size; @@ -281,7 +281,7 @@ TPM_RC TPM2_CreatePrimary(TPMI_RH_HIERARCHY primaryHandle, if (objHandle != NULL) ptr = unpack_TPM_HANDLE(ptr, objHandle); else { -TPM_HANDLE handle; +TPM2_HANDLE handle; ptr = unpack_TPM_HANDLE(ptr, handle); } ptr = unpack_UINT32(ptr, param_size); @@ -302,7 +302,7 @@ egress: return status; } -TPM_RC TPM2_HierachyChangeAuth(TPMI_RH_HIERARCHY_AUTH authHandle, TPM2B_AUTH *newAuth) +TPM_RC TPM2_HierachyChangeAuth(TPM2I_RH_HIERARCHY_AUTH authHandle, TPM2B_AUTH *newAuth) { TPM_BEGIN(TPM_ST_SESSIONS, TPM_CC_HierarchyChangeAuth); ptr = pack_UINT32(ptr, authHandle); diff --git a/stubdom/vtpmmgr/tpm2.h b/stubdom/vtpmmgr/tpm2.h index 9f597ee..9e01286 100644 --- a/stubdom/vtpmmgr/tpm2.h +++ b/stubdom/vtpmmgr/tpm2.h @@ -57,7 +57,7 @@ TPM_RC TPM2_PCR_Read(TPML_PCR_SELECTION pcrSelectionIn, TPM_RC TPM2_Load(TPMI_DH_OBJECT parentHandle, TPM2B_PRIVATE *inPrivate, TPM2B_PUBLIC *inPublic, - TPM_HANDLE *objectHandle, + TPM2_HANDLE *objectHandle, TPM2B_NAME *name); TPM_RC TPM2_Create(TPMI_DH_OBJECT parentHandle, @@ -66,10 +66,10 @@ TPM_RC TPM2_Create(TPMI_DH_OBJECT parentHandle, TPM_RC TPM2_CreatePrimary(TPMI_RH_HIERARCHY primaryHandle, TPM2_Create_Params_in *objHandle, - TPM_HANDLE *in, + TPM2_HANDLE *in, TPM2_Create_Params_out *out); -TPM_RC TPM2_HierachyChangeAuth(TPMI_RH_HIERARCHY_AUTH authHandle, +TPM_RC TPM2_HierachyChangeAuth(TPM2I_RH_HIERARCHY_AUTH authHandle, TPM2B_AUTH *newAuth); TPM_RC TPM2_RSA_ENCRYPT(TPMI_DH_OBJECT keyHandle, diff --git a/stubdom/vtpmmgr/tpm2_types.h b/stubdom/vtpmmgr/tpm2_types.h index ac2830d..a07d8f3 100644 --- a/stubdom/vtpmmgr/tpm2_types.h +++ b/stubdom/vtpmmgr/tpm2_types.h @@ -3,6 +3,7 @@ #include stdlib.h #include stdint.h +#include common_types.h // implementation.h // Table 212 -- Logic Values @@ -82,14 +83,6 @@ #defineMAX_ECC_KEY_BITS 256 #defineMAX_ECC_KEY_BYTES((MAX_ECC_KEY_BITS + 7) /
Re: [Xen-devel] Any work on sharing of large multi-page segments?
On 17.03.15 at 01:46, andreww...@gmail.com wrote: On 3/16/15, Jan Beulich jbeul...@suse.com wrote: So where do you expect the major performance / scalability improvement to be gained? Internally to Xen, each page will need to be tracked separately anyway, as what appears physically contiguous in the granting guest may (and likely will) not be contiguous in machine memory (i.e. from Xen's perspective). Furthermore the public interface is currently written such that grant lengths must be less than 64k. I.e. already at the very simple first steps you'd be faced with implementing bigger length counterparts of (almost) all existing interfaces. Since I'm looking to grant OpenGL buffers that can be many megabytes in size, I would think there would be a fair bit of overhead if the backend domain had to make a hypercall to map every single page of a buffer. I guess what I could do would be to add a hypercall that maps a contiguous group of grant entries (contiguous in the grant table, not necessarily contiguous in memory). And how would that be significantly different from the batching that's already built into the grant table hypercall? Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [OSSTEST Nested PATCH 6/6] Compose the main recipe of test-nested test job.
From: longtao.pang longtaox.p...@intel.com --- sg-run-job | 11 +++ 1 file changed, 11 insertions(+) diff --git a/sg-run-job b/sg-run-job index 94d091b..ababebe 100755 --- a/sg-run-job +++ b/sg-run-job @@ -297,6 +297,17 @@ proc run-job/test-pair {} { #run-ts . remus-failover ts-remus-check src_host dst_host + debian } +proc need-hosts/test-nested {} {return host} +proc run-job/test-nested {} { +run-ts . = ts-debian-hvm-install + host + nested +run-ts . = ts-nested-setup + host + nested +run-ts . = ts-xen-install nested_l1 +run-ts . = ts-host-reboot nested_l1 +run-ts . = ts-debian-hvm-install nested_l1 nested2 +run-ts . = ts-guest-stop nested_l1 nested2 +run-ts . = ts-guest-destroy host nested +} + proc test-guest-migr {g} { if {[catch { run-ts . = ts-migrate-support-check + host $g }]} return -- 1.7.10.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [OSSTEST Nested PATCH 5/6] Add test job for nest test case
From: longtao.pang longtaox.p...@intel.com This patch adds creation of the nested test job, when job creation procedure is invoked. 'NESTED_OS_IMAGE' is the name of 'Debian ISO Images', which defined in standalone.config. --- make-flight | 20 1 file changed, 20 insertions(+) diff --git a/make-flight b/make-flight index 63b14f5..fb35ac4 100755 --- a/make-flight +++ b/make-flight @@ -204,6 +204,25 @@ do_hvm_win7_x64_tests () { all_hostflags=$most_hostflags,hvm } +do_hvm_debian_nested_tests () { + if [ $xenarch != amd64 ]; then +return + fi + if [ $dom0arch != amd64 ]; then +return + fi + + job_create_test test-$xenarch$kern-$dom0arch-nested test-nested xl \ + $xenarch $dom0arch \ +nested_image=$NESTED_OS_IMAGE \ +nested2_image=$NESTED_OS_IMAGE \ +bios=seabios \ +kernbuildjob=build-amd64-pvops \ +kernkind=pvops \ +device_model_version=qemu-xen \ +all_hostflags=$most_hostflags,hvm +} + do_hvm_debian_test_one () { testname=$1 bios=$2 @@ -401,6 +420,7 @@ test_matrix_do_one () { done fi + do_hvm_debian_nested_tests do_passthrough_tests } -- 1.7.10.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [OSSTEST Nested PATCH 4/6] Add new ts-nested-setup script to custmize nested test configuration before the testing.
From: longtao.pang longtaox.p...@intel.com 1. In this script, make some appropriate runvars which selecthost would recognise. 2. Prepare the configurations for installing L2 guest VM. 3. Create a lv disk in L0 and hot-attach it to L1, need to restart L1 to make the block disk to be recognized by L1 system, then using this disk to create a VG that used for installing L2. --- ts-nested-setup | 54 ++ 1 file changed, 54 insertions(+) create mode 100755 ts-nested-setup diff --git a/ts-nested-setup b/ts-nested-setup new file mode 100755 index 000..69dc407 --- /dev/null +++ b/ts-nested-setup @@ -0,0 +1,54 @@ +#!/usr/bin/perl -w +# This is part of osstest, an automated testing framework for Xen. +# Copyright (C) 2015 Intel Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +use strict qw(vars); +use DBI; +use Osstest; +use Osstest::Debian; +use Osstest::TestSupport; + +tsreadconfig(); +our ($ident_l0,$nested_host) = @ARGV; +our ($l0,$l1) = ts_get_host_guest(@ARGV); + +guest_check_ip($l1); +target_cmd_root($l1, mkdir -p /home/osstest/.ssh cp /root/.ssh/authorized_keys /home/osstest/.ssh/); +my $url = $c{WebspaceUrl}.$c{WebspaceCommon}.$l0-{Name}._.'overlay.tar'; +target_cmd_root($l1, END); +wget -O overlay.tar $url +tar -xf overlay.tar -C / +rm overlay.tar -f +update-rc.d osstest-confirm-booted start 99 2 . +END + +store_runvar('nested_l1',$nested_host); +store_runvar(${nested_host}_ip,$l1-{Ip}); +store_runvar('multi_reboot_time',5); + +my $l2_disk_mb = 2; +my $l2_lv_name = 'nestedl2'; +my $vgname = $l0-{Name}; +$vgname .= .$c{TestHostDomain} if ($l0-{Suite} =~ m/lenny/); +target_cmd_root($l0, END); +lvremove -f /dev/${vgname}/${l2_lv_name} ||: +lvcreate -L ${l2_disk_mb}M -n $l2_lv_name $vgname +dd if=/dev/zero of=/dev/${vgname}/${l2_lv_name} count=10 +xl block-attach $l1-{Name} /dev/${vgname}/${l2_lv_name},raw,sdb,rw +END +target_install_packages_norec($l1, qw(lvm2 rsync genisoimage)); +target_reboot($l1); +target_cmd_root($l1, pvcreate /dev/xvdb vgcreate ${l2_lv_name}_vg /dev/xvdb); -- 1.7.10.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [OSSTEST Nested PATCH 2/6] Add and expose some testsupport APIs
From: longtao.pang longtaox.p...@intel.com 1. Designate vif model to 'e1000', otherwise, with default device model, the L1 eth0 interface disappear, hence xenbridge cannot work. Maybe this limitation can be removed later after some fix it. For now, we have to accomodate to it. 2. Since reboot L1 guest VM will take more time to boot up, we increase multi-times for reboot-confirm-booted if test nested job, and the multi value is stored as a runvar in 'ts-nested-setup' script. Added another function 'guest_editconfig_cd' and expose it, this function bascically changes guest boot device sequence, alter its on_reboot behavior to restart and enabled nestedhvm feature. 3. In L2 installation context, its host (L1) IP address is not queried from DNS, but after running ts-nested-setup + host + nested, L1 IP is stored in runvar. --- Osstest/TestSupport.pm | 21 +++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/Osstest/TestSupport.pm b/Osstest/TestSupport.pm index e1ebcb7..57c7167 100644 --- a/Osstest/TestSupport.pm +++ b/Osstest/TestSupport.pm @@ -111,6 +111,7 @@ BEGIN { iso_gen_flags_basic iso_copy_content_from_image guest_editconfig_nocd + guest_editconfig_cd ); %EXPORT_TAGS = ( ); @@ -819,6 +820,10 @@ sub selecthost ($) { logm(Host $name is in HostGroup $ho-{Properties}{HostGroup}); } +if ( $r{${name}_ip} ) { +$setprop-(IpAddr, $r{${name}_ip}); +} + # Next, we use the config file's general properites as defaults foreach my $k (keys %c) { next unless $k =~ m/^HostProp_([A-Z].*)$/; @@ -990,8 +995,9 @@ sub common_toolstack ($) { sub host_reboot ($) { my ($ho) = @_; +my $wait_time = $r{multi_reboot_time} ? $r{multi_reboot_time}*40 : 40; target_reboot($ho); -poll_loop(40,2, 'reboot-confirm-booted', sub { +poll_loop($wait_time,2, 'reboot-confirm-booted', sub { my $output; if (!eval { $output= target_cmd_output($ho, END, 40); @@ -1541,7 +1547,7 @@ sub prepareguest_part_xencfg ($) { my $xencfg= END; name= '$gho-{Name}' memory = ${ram_mb} -vif = [ 'type=ioemu,mac=$gho-{Ether}' ] +vif = [ 'type=ioemu,model=e1000,mac=$gho-{Ether}' ] # on_poweroff = '$onpoweroff' on_reboot = '$onreboot' @@ -2113,4 +2119,15 @@ sub guest_editconfig_nocd ($$) { }); } +sub guest_editconfig_cd ($) { +my ($gho) = @_; +guest_editconfig($gho-{Host}, $gho, sub { +if (m/^\s*boot\s*= '\s*d\s*c\s*'/) { +s/dc/cd/; +} +s/^on_reboot.*/on_reboot='restart'/; +s/#nestedhvm/nestedhvm/; +}); +} + 1; -- 1.7.10.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [OSSTEST Nested PATCH 0/6] Introduction of netsted HVM test job
This patch set adds nested HVM test case for osstest. In this test case, a Xen hypervisor (L1) runs on top of another Xen hypervisor (L0). Upon L1 hypervisor, we will then create a nested guest (L2), and test if the Linux guest can then be installed and run well. About nested Xen virtualization, refer to http://wiki.xenproject.org/wiki/Nested_Virtualization_in_Xen. Test steps 0. To run osstest in standalone mode, write a config file in '~/.xen-osstest/config', and then create a standalone.config file to define 'TREE_LINUX', 'REVISION_LINUX' and 'NESTED_OS_IMAGE' which will be used for nested test. The directry path of 'Debian Images' could be difined in '~/.xen-osstest/config'. 1. run 'build-amd64' job and then 'build-amd64-pvops', to prepare xen installation tarball and hvm guest kernel. 2. run 'test-amd64-amd6-nested' job, it does following: a. invoke test step of 'ts-debain-hvm-install' to install a normal HVM guest b. invoke test step of 'ts-nested-setup' to make some appropriate runvars which selecthost would recognise and prepare the configurations for installing L2 guest VM. c. invoke test step of 'ts-xen-install' to install xen on the normal guest, alter it into a L1 hypervisor d. invoke test step of 'ts-debain-hvm-install' again, but take the L1 hypervisor as host, install the L2 guest on it e. invoke test step of 'ts-guest-stop', stop L2 guest. f. invoke test step of 'ts-guest-destroy' to destroy L1 guest. This patch set reuses 'ts-debian-hvm-install' for both L1 installation and L2 installation, define 'nested' and 'nested2' as L1 and L2's hostname, define 'nested_l1 as L1's host ident. It also reuses 'ts-xen-install' with 'nested' input param to differentiate L1 Xen installation from L0 Xen installation. This patch series has been tested on test machines of amd64 arch, debian-7.6.0-amd64 as guests OS, with hvm domain0 of Linux kernel 3.18.5, in standalone mode. longtao.pang (6): parsing grub which has 'submenu' primitive Add and expose some testsupport APIs Changes on test step of debain hvm guest install in hvm. Add new ts-nested-setup script to custmize nested test configuration before the testing. Add test job for nest test case Compose the main recipe of test-nested test job. Osstest/Debian.pm | 52 Osstest/TestSupport.pm | 21 +++-- make-flight| 20 sg-run-job | 11 +++ ts-debian-hvm-install | 14 +++--- ts-nested-setup| 54 ++ 6 files changed, 147 insertions(+), 25 deletions(-) create mode 100755 ts-nested-setup ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [OSSTEST Nested PATCH 3/6] Changes on test step of debain hvm guest install in hvm.
From: longtao.pang longtaox.p...@intel.com 1. Increase disk size to accomodate to nested test requirment. 2. Since 'Debain-xxx-.iso' image will be stored there, therefore needs more disk capacity, increase root partition size in preseed generation. 3. In L1 installation context, assign more memory to it; since it acts as a nested hypervisor anyway. 4. In hvm guest configuration file, add '#nestedhvm=1', which will later be uncommented by guest_editconfig_cd() after xen installed in L1, and about to boot into a nested xen environment. --- ts-debian-hvm-install | 14 +++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/ts-debian-hvm-install b/ts-debian-hvm-install index 449b96c..f334e30 100755 --- a/ts-debian-hvm-install +++ b/ts-debian-hvm-install @@ -36,7 +36,7 @@ our $ho= selecthost($whhost); # guest memory size will be set based on host free memory, see below our $ram_mb; -our $disk_mb= 1; +our $disk_mb= 15000; our $guesthost= $gn.guest.osstest; our $gho; @@ -60,7 +60,7 @@ d-i partman-auto/expert_recipe string \\ use_filesystem{ } filesystem{ vfat } \\ mountpoint{ /boot/efi } \\ . \\ -5000 50 5000 ext4 \\ +1 50 1 ext4 \\ method{ format } format{ } \\ use_filesystem{ } filesystem{ ext4 } \\ mountpoint{ / } \\ @@ -152,6 +152,7 @@ sub prep () { more_prepareguest_hvm($ho,$gho, $ram_mb, $disk_mb, OnReboot = 'preserve', Bios = $r{bios}, + ExtraConfig = '#nestedhvm=1', PostImageHook = sub { my $cmds = iso_copy_content_from_image($gho, $newiso); $cmds .= prepare_initrd($initrddir,$newiso,$preseed_file_path); @@ -173,6 +174,8 @@ my $ram_minslop = 100; my $ram_lots = 5000; if ($host_freemem_mb $ram_lots * 2 + $ram_minslop) { $ram_mb = $ram_lots; +} elsif ($gn eq 'nested') { +$ram_mb = 3072; } else { $ram_mb = 768; } @@ -189,7 +192,12 @@ if ($stage2) { guest_destroy($gho); } -guest_editconfig_nocd($gho,$emptyiso); +if ($gn eq 'nested') { +guest_editconfig_cd($gho); +} else { +guest_editconfig_nocd($gho,$emptyiso); +} + guest_create($gho); guest_await_dhcp_tcp($gho,300); guest_check_up($gho); -- 1.7.10.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [OSSTEST Nested PATCH 1/6] parsing grub which has 'submenu' primitive
From: longtao.pang longtaox.p...@intel.com From a hvm kernel build from Linux stable Kernel tree, the auto generated grub2 menu will have 'submenu' primitive, upon the 'menuentry' items. Xen boot entries will be grouped into a submenu. This patch adds capability to support such grub formats. --- Osstest/Debian.pm | 52 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/Osstest/Debian.pm b/Osstest/Debian.pm index e3e1c90..9fdacd7 100644 --- a/Osstest/Debian.pm +++ b/Osstest/Debian.pm @@ -1,5 +1,6 @@ # This is part of osstest, an automated testing framework for Xen. # Copyright (C) 2009-2013 Citrix Inc. +# Copyright (C) 2014-2015 Intel Inc. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -362,26 +363,34 @@ sub setupboot_grub2 ($$$) { my $count= 0; my $entry; +my $submenu; while ($f) { next if m/^\s*\#/ || !m/\S/; if (m/^\s*\}\s*$/) { -die unless $entry; +die unless $entry || $submenu; + if(!defined $entry defined $submenu){ + logm(Met end of a submenu starting from . + $submenu-{StartLine}. . + Our want kern is $want_kernver); + $submenu=undef; + next; + } my (@missing) = -grep { !defined $entry-{$_} } - (defined $xenhopt -? qw(Title Hv KernDom0 KernVer) -: qw(Title Hv KernOnly KernVer)); - if (@missing) { - logm((skipping entry at $entry-{StartLine};. - no @missing)); - } elsif (defined $want_kernver -$entry-{KernVer} ne $want_kernver) { - logm((skipping entry at $entry-{StartLine};. - kernel $entry-{KernVer}, not $want_kernver)); - } else { - # yes! - last; - } +grep { !defined $entry-{$_} } +(defined $xenhopt + ? qw(Title Hv KernDom0 KernVer) + : qw(Title Hv KernOnly KernVer)); +if (@missing) { +logm((skipping entry at $entry-{StartLine};. + no @missing)); +} elsif (defined $want_kernver + $entry-{KernVer} ne $want_kernver) { +logm((skipping entry at $entry-{StartLine};. + kernel $entry-{KernVer}, not $want_kernver)); +} else { +# yes! +last; +} $entry= undef; next; } @@ -393,21 +402,24 @@ sub setupboot_grub2 ($$$) { $entry= { Title = $1, StartLine = $., Number = $count }; $count++; } -if (m/^\s*multiboot\s*\/(xen\-[0-9][-+.0-9a-z]*\S+)/) { + if(m/^submenu\s+[\'\](.*)[\'\].*\{\s*$/){ + $submenu={ StartLine =$.}; + } +if (m/^\s*multiboot\s*(?:\/boot)*\/(xen\S+)/) { die unless $entry; $entry-{Hv}= $1; } -if (m/^\s*multiboot\s*\/(vmlinu[xz]-(\S+))/) { + if (m/^\s*multiboot\s*(?:\/boot)*\/(vmlinu[xz]-(\S+))/) { die unless $entry; $entry-{KernOnly}= $1; $entry-{KernVer}= $2; } -if (m/^\s*module\s*\/(vmlinu[xz]-(\S+))/) { + if (m/^\s*module\s*(?:\/boot)*\/(vmlinu[xz]-(\S+))/) { die unless $entry; $entry-{KernDom0}= $1; $entry-{KernVer}= $2; } -if (m/^\s*module\s*\/(initrd\S+)/) { + if (m/^\s*module\s*(?:\/boot)*\/(initrd\S+)/) { $entry-{Initrd}= $1; } } -- 1.7.10.4 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 2/6] x86: add support for COS/CBM manangement
On Fri, Mar 13, 2015 at 09:53:37AM -0400, Konrad Rzeszutek Wilk wrote: +xfree(d-arch.psr_cos_ids); And d-arch.psr_cos_ids = NULL (however this depends on who calls psr_domain_init, which is unclear to me). +} + +int psr_domain_init(struct domain *d) +{ +if ( cat_socket_info ) +{ +d-arch.psr_cos_ids = xzalloc_array(unsigned int, opt_socket_num); +if ( !d-arch.psr_cos_ids ) +return -ENOMEM; +} + +return 0; +} + +void psr_domain_free(struct domain *d) So who calls this? This function is called by arch_domain_destroy() or the failure path of arch_domain_create(). In both cases the domain structure will be freed later so setting NULL is pointless. Thanks, Chao +{ +psr_free_rmid(d); +psr_free_cos(d); +} + ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] RFC: [PATCH 1/3] Enhance platform support for PCI
On 17.03.15 at 06:26, mja...@caviumnetworks.com wrote: In drivers/xen/pci.c on notification BUS_NOTIFY_ADD_DEVICE dom0 issues a hypercall to inform xen that a new pci device has been added. If we were to inform xen about a new pci bus that is added there are 2 ways a) Issue the hypercall from drivers/pci/probe.c b) When a new device is found (BUS_NOTIFY_ADD_DEVICE) issue PHYSDEVOP_pci_device_add hypercall to xen, if xen does not finds that segment number (s_bdf), it will return an error SEG_NO_NOT_FOUND. After that the linux xen code could issue the PHYSDEVOP_pci_host_bridge_add hypercall. I think (b) can be done with minimal code changes. What do you think ? I'm pretty sure (a) would even be refused by the maintainers, unless there already is a notification being sent. As to (b) - kernel code could keep track of which segment/bus pairs it informed Xen about, and hence wouldn't even need to wait for an error to be returned from the device-add request (which in your proposal would need to be re- issued after the host-bridge-add). Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [v2][PATCH 2/2] libxl: introduce gfx_passthru_kind
If I remember the context correctly this is in the autodetect case, so I think shouldn't mention IGD. Something like Unable to detect graphics passthru kind, please set gfx_passthru_kind. See xl.cfg(5) for more s/gfx_passthru_kind/gfx_passthru, right? Because actually we always get 'gfx_passthru_kind' from 'gfx_passthru'. I think you have it backwards. In the case here gfx_passthru=1 has been set by the user, but gfx_passthru_kind=DEFAULT. So libxl has tried to autodetect but it has failed. So if the user wants to make progress they should set gfx_passthru_kind to whatever type of passthrough they were trying to do. Looks you're saying 'gfx_passthru_kind' shouldn't reply on 'gfx_passthru', so 'gfx_passthru_kind' can override that previous value parsed from 'gfx_passthru', right? Alternatively I suppose you could recommend removing gfx_passthru=1 (or I'm still keep that currently. changing to=0), but given they've set =1 that doesn't seem to be the most productive suggestion. So looks the whole policy should be something like this, diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index 5c40e84..5518759 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -1953,8 +1953,27 @@ skip_vfb: xlu_cfg_replace_string (config, spice_streaming_video, b_info-u.hvm.spice.streaming_video, 0); xlu_cfg_get_defbool(config, nographic, b_info-u.hvm.nographic, 0); -xlu_cfg_get_defbool(config, gfx_passthru, -b_info-u.hvm.gfx_passthru, 0); +if (!xlu_cfg_get_long(config, gfx_passthru, l, 1)) { +libxl_defbool_set(b_info-u.hvm.gfx_passthru, l); +} else if (!xlu_cfg_get_string(config, gfx_passthru, buf, 0)) { +if (libxl_gfx_passthru_kind_from_string(buf, + b_info-u.hvm.gfx_passthru_kind)) { +fprintf(stderr, +ERROR: invalid value \%s\ for \gfx_passthru\\n, +buf); +exit (1); +} +libxl_defbool_set(b_info-u.hvm.gfx_passthru, true); +} +if (!xlu_cfg_get_string(config, gfx_passthru_kind, buf, 0)) { +if (libxl_gfx_passthru_kind_from_string(buf, + b_info-u.hvm.gfx_passthru_kind)) { +fprintf(stderr, +ERROR: invalid value \%s\ for \gfx_passthru_kind\\n, +buf); +exit (1); +} +} switch (xlu_cfg_get_list_as_string_list(config, serial, b_info-u.hvm.serial_list, 1)) Thanks Tiejun ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 04/10] xen/blkfront: separate ring information to an new struct
Hi Felipe, On 03/06/2015 06:30 PM, Felipe Franciosi wrote: -Original Message- From: Bob Liu [mailto:bob@oracle.com] Sent: 05 March 2015 00:47 To: Konrad Rzeszutek Wilk Cc: Roger Pau Monne; Felipe Franciosi; David Vrabel; xen-devel@lists.xen.org; linux-ker...@vger.kernel.org; ax...@fb.com; h...@infradead.org; avanzini.aria...@gmail.com; cheg...@amazon.de Subject: Re: [PATCH 04/10] xen/blkfront: separate ring information to an new struct ...snip... Meaning you weren't able to do the same test? I can if there are more details about how to set up this 5 and 10 guests environment and test pattern have been used. Just think it might be save time if somebody still have the similar environment by hand. Roger and Felipe, if you still have the environment could you please have a quick compare about feature-persistent performance with patch [PATCH v5 0/2] gnttab: Improve scaleability? I've been meaning to do that. I don't have the environment up, but it isn't too hard to put it back together. A bit swamped at the moment, but will try (very hard) to do it next week. Do you have gotten any testing result? -- Regards, -Bob ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 1/6] x86: detect and initialize Intel CAT feature
On Fri, Mar 13, 2015 at 09:40:13AM -0400, Konrad Rzeszutek Wilk wrote: On Fri, Mar 13, 2015 at 06:13:20PM +0800, Chao Peng wrote: Detect Intel Cache Allocation Technology(CAT) feature and store the cpuid information for later use. Currently only L3 cache allocation is supported. The L3 CAT features may vary among sockets so per-socket feature information is stored. The initialization can happen either at boot time or when CPU(s) is hot plugged after booting. Signed-off-by: Chao Peng chao.p.p...@linux.intel.com --- docs/misc/xen-command-line.markdown | 15 +++- xen/arch/x86/psr.c | 151 +--- xen/include/asm-x86/cpufeature.h| 1 + 3 files changed, 155 insertions(+), 12 deletions(-) +cat_cpu_init(smp_processor_id()); Do 'if (!cat_cpu_init(..)).`' as the CPU might not support this. At which point you should also free the cat_socket_info and not register the cpu notifier. Even the booting CPU does not support this, other CPUs may still support this. Generally the feature is a per-socket feature. So break here is not the intention. Except this, all other comments will be addressed by the next version. Thanks for your time. Chao +register_cpu_notifier(cpu_nfb); +} + ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [RFC PATCH] dpci: Put the dpci back on the list if running on another CPU.
On 16.03.15 at 18:59, konrad.w...@oracle.com wrote: Hence was wondering if it would just be easier to put this patch in (see above) - with the benfit that folks have an faster interrupt passthrough experience and then I work on another variant of this with tristate cmpxchg and -mapping atomic counter. Considering how long this issue has been pending I think we really need to get _something_ in (or revert); if this something is the patch in its most recent form, so be it (even if maybe not the simplest of all possible variants). So please submit as a proper non- RFC patch. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [RFC PATCH] dpci: Put the dpci back on the list if running on another CPU.
Tuesday, March 17, 2015, 9:18:32 AM, you wrote: On 16.03.15 at 18:59, konrad.w...@oracle.com wrote: Hence was wondering if it would just be easier to put this patch in (see above) - with the benfit that folks have an faster interrupt passthrough experience and then I work on another variant of this with tristate cmpxchg and -mapping atomic counter. Considering how long this issue has been pending I think we really need to get _something_ in (or revert); if this something is the patch in its most recent form, so be it (even if maybe not the simplest of all possible variants). So please submit as a proper non- RFC patch. Jan I'm still running with this first simple stopgap patch from Konrad, and it has worked fine for me since. I will see if this new one also works-for-me, somewhere today :-) -- Sander diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c index ae050df..d1421b0 100644 --- a/xen/drivers/passthrough/io.c +++ b/xen/drivers/passthrough/io.c @@ -804,7 +804,18 @@ static void dpci_softirq(void) d = pirq_dpci-dom; smp_mb(); /* 'd' MUST be saved before we set/clear the bits. */ if ( test_and_set_bit(STATE_RUN, pirq_dpci-state) ) -BUG(); +{ +unsigned long flags; + +/* Put back on the list and retry. */ +local_irq_save(flags); +list_add_tail(pirq_dpci-softirq_list, this_cpu(dpci_list)); +local_irq_restore(flags); + +raise_softirq(HVM_DPCI_SOFTIRQ); +continue; +} + /* * The one who clears STATE_SCHED MUST refcount the domain. */ ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] Process context switch handling by xen
Yes there is. Take a look at this sample code in LibVMI: https://github.com/libvmi/libvmi/blob/master/examples/event-example.c Cheers, Tamas On Mon, Mar 16, 2015 at 11:12 PM, HANNAS YAYA Issa issa.hannasy...@enseeiht.fr wrote: Hello I want to know when there is context switch between processes in guest the xen hypervisor can handle this switch. Is there any possibility to get this context switch from hypervisor? ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 1/6] x86: detect and initialize Intel CAT feature
On Mon, Mar 16, 2015 at 01:47:06PM +, Jan Beulich wrote: On 13.03.15 at 11:13, chao.p.p...@linux.intel.com wrote: @@ -1112,6 +1117,12 @@ The following resources are available: total/local memory bandwidth. Follow the same options with Cache Monitoring Technology. +* Cache Alllocation Technology (Broadwell and later). Information regarding + the cache allocation. + * `cat` instructs Xen to enable/disable Cache Allocation Technology. + * `socket_num` indicates socket number. Detecte automatically at boot time +if not specified(0). Useful for CPU hot-plug case. While saying something, at least for me what is being said doesn't at all make clear what socket number here is: The number of a specific socket? The number of sockets? Yet something else? Without knowing what it means by _just_ reading this description, people won't know what to use the command line option for. I agree, the name is a little confusing. Or perhaps: max_socket? E.g. `max_socket` indicates the maximum number of available sockets for CAT feature detection. All the sockets up to max_socket will be checked for CAT feature. The value is normally detected at boot time automatically if not specified(0). While the value may need to be specified manually for CPU hot-plug scenario in which case the maximum socket number detected at boot time may be not correct. -static void __init init_psr_cmt(unsigned int rmid_max) +static void __init psr_cmt_init(unsigned int rmid_max) Is this renaming really an integral part of this patch? OK, I will move it to separate one or just keep it unchanged. +on_selected_cpus(cpumask_of(cpu), do_cat_cpu_init, info, 0); Hmm, using an IPI here seems odd. Is there a reason to can't hook this onto CPU_STARTING instead of CPU_ONLINE? Looks like CPU_STARTING is what I need, IPI then is unnecessary. +static unsigned int get_max_socket(void) +{ +unsigned int cpu, max_apicid = boot_cpu_physical_apicid; + +for_each_present_cpu(cpu) +if (max_apicid x86_cpu_to_apicid[cpu]) Coding style. +max_apicid = x86_cpu_to_apicid[cpu]; + +return apicid_to_socket(max_apicid); Since when is the socket with the highest numbered APIC ID the highest numbered socket? I think the whole function needs to act on socket numbers only. Then perhaps looping cpus and checking against cpu_to_socket(cpu) directly make sense. Thanks Jan. Chao ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 1/6] x86: detect and initialize Intel CAT feature
On 17.03.15 at 09:48, chao.p.p...@linux.intel.com wrote: On Mon, Mar 16, 2015 at 01:47:06PM +, Jan Beulich wrote: On 13.03.15 at 11:13, chao.p.p...@linux.intel.com wrote: @@ -1112,6 +1117,12 @@ The following resources are available: total/local memory bandwidth. Follow the same options with Cache Monitoring Technology. +* Cache Alllocation Technology (Broadwell and later). Information regarding + the cache allocation. + * `cat` instructs Xen to enable/disable Cache Allocation Technology. + * `socket_num` indicates socket number. Detecte automatically at boot time +if not specified(0). Useful for CPU hot-plug case. While saying something, at least for me what is being said doesn't at all make clear what socket number here is: The number of a specific socket? The number of sockets? Yet something else? Without knowing what it means by _just_ reading this description, people won't know what to use the command line option for. I agree, the name is a little confusing. Or perhaps: max_socket? E.g. `max_socket` indicates the maximum number of available sockets for CAT feature detection. All the sockets up to max_socket will be checked for CAT feature. The value is normally detected at boot time automatically if not specified(0). While the value may need to be specified manually for CPU hot-plug scenario in which case the maximum socket number detected at boot time may be not correct. Thanks, yes - max_socket or (perhaps more intuitive to users) socket_count or num_sockets. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [RFC PATCH] dpci: Put the dpci back on the list if running on another CPU.
Additionally I think it should be considered whether the bitmap approach of interpreting -state is the right one, and we don't instead want a clean 3-state (idle, sched, run) model. Could you elaborate a bit more please? As in three different unsigned int (or bool_t) that set in what state we are in? An enum { STATE_IDLE, STATE_SCHED, STATE_RUN }. Especially if my comment above turns out to be wrong, you'd have no real need for the SCHED and RUN flags to be set at the same time. I cobbled what I believe is what you were thinking off. As you can see to preserve the existing functionality such as being able to schedule N amount of interrupt injections for the N interrupts we might get - I modified '-masked' to be an atomic counter. The end result is that we can still live-lock. Unless we: - Drop on the floor the injection of N interrupts and just deliever at max one per VMX_EXIT (and not bother with interrupts arriving when we are in the VMX handler). - Alter the softirq code slightly - to have an variant which will only iterate once over the pending softirq bits per call. (so save an copy of the bitmap on the stack when entering the softirq handler - and use that. We could also xor it against the current to catch any non-duplicate bits being set that we should deal with). Here is the compile, but not run-time tested patch. From e7d8bcd7c5d32c520554a4ad69c4716246036002 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk konrad.w...@oracle.com Date: Tue, 17 Mar 2015 13:31:52 -0400 Subject: [RFC PATCH] dpci: Switch to tristate instead of bitmap *TODO*: - Writeup. - Tests Suggested-by: Jan Beulich jbuel...@suse.com Signed-off-by: Konrad Rzeszutek Wilk konrad.w...@oracle.com --- xen/drivers/passthrough/io.c | 140 --- xen/include/xen/hvm/irq.h| 4 +- 2 files changed, 82 insertions(+), 62 deletions(-) diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c index ae050df..663e104 100644 --- a/xen/drivers/passthrough/io.c +++ b/xen/drivers/passthrough/io.c @@ -30,42 +30,28 @@ static DEFINE_PER_CPU(struct list_head, dpci_list); /* - * These two bit states help to safely schedule, deschedule, and wait until - * the softirq has finished. - * - * The semantics behind these two bits is as follow: - * - STATE_SCHED - whoever modifies it has to ref-count the domain (-dom). - * - STATE_RUN - only softirq is allowed to set and clear it. If it has - * been set hvm_dirq_assist will RUN with a saved value of the - * 'struct domain' copied from 'pirq_dpci-dom' before STATE_RUN was set. - * - * The usual states are: STATE_SCHED(set) - STATE_RUN(set) - - * STATE_SCHED(unset) - STATE_RUN(unset). - * - * However the states can also diverge such as: STATE_SCHED(set) - - * STATE_SCHED(unset) - STATE_RUN(set) - STATE_RUN(unset). That means - * the 'hvm_dirq_assist' never run and that the softirq did not do any - * ref-counting. - */ - -enum { -STATE_SCHED, -STATE_RUN -}; - -/* * This can be called multiple times, but the softirq is only raised once. - * That is until the STATE_SCHED state has been cleared. The state can be - * cleared by: the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'), - * or by 'pt_pirq_softirq_reset' (which will try to clear the state before + * That is until state is in init. The state can be changed by: + * the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'), + * or by 'pt_pirq_softirq_reset' (which will try to init the state before * the softirq had a chance to run). */ static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci) { unsigned long flags; -if ( test_and_set_bit(STATE_SCHED, pirq_dpci-state) ) +switch ( cmpxchg(pirq_dpci-state, STATE_INIT, STATE_SCHED) ) +{ +case STATE_RUN: +case STATE_SCHED: +/* + * The pirq_dpci-mapping has been incremented to let us know + * how many we have left to do. + */ return; +case STATE_INIT: +break; +} get_knownalive_domain(pirq_dpci-dom); @@ -85,7 +71,7 @@ static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci) */ bool_t pt_pirq_softirq_active(struct hvm_pirq_dpci *pirq_dpci) { -if ( pirq_dpci-state ((1 STATE_RUN) | (1 STATE_SCHED)) ) +if ( pirq_dpci-state != STATE_INIT ) return 1; /* @@ -109,22 +95,22 @@ static void pt_pirq_softirq_reset(struct hvm_pirq_dpci *pirq_dpci) ASSERT(spin_is_locked(d-event_lock)); -switch ( cmpxchg(pirq_dpci-state, 1 STATE_SCHED, 0) ) +switch ( cmpxchg(pirq_dpci-state, STATE_SCHED, STATE_INIT) ) { -case (1 STATE_SCHED): +case STATE_SCHED: /* - * We are going to try to de-schedule the softirq before it goes in - * STATE_RUN. Whoever clears STATE_SCHED MUST refcount the 'dom'. + * We are going to try to de-schedule the softirq before it goes to + * running state. Whoever
Re: [Xen-devel] [PATCH 3/6] X86: improve psr scheduling code
On Mon, Mar 16, 2015 at 04:53:35PM +, Jan Beulich wrote: On 13.03.15 at 11:13, chao.p.p...@linux.intel.com wrote: @@ -1473,11 +1471,10 @@ static void __context_switch(void) } vcpu_restore_fpu_eager(n); n-arch.ctxt_switch_to(n); - -if ( psr_cmt_enabled() n-domain-arch.psr_rmid 0 ) -psr_assoc_rmid(n-domain-arch.psr_rmid); } +psr_ctxt_switch_to(n-domain); But you now potentially do the MSR write despite it already being the needed value (e.g. when switching between an idle vCPU and another one also having RMID zero). Not really. The MSR is already updated lazily in psr_assoc_rmid(). Chao ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 2/6] x86: add support for COS/CBM manangement
On Tue, Mar 17, 2015 at 09:25:48AM +, Jan Beulich wrote: On 17.03.15 at 10:11, chao.p.p...@linux.intel.com wrote: On Mon, Mar 16, 2015 at 05:10:32PM +, Jan Beulich wrote: On 13.03.15 at 11:13, chao.p.p...@linux.intel.com wrote: +else +{ +unsigned int cpu = cpumask_check(get_socket_cpu(socket)); Isn't this going to trigger an assertion when the socket count got specified on the command line? Yes, the assertion is needed in tools side anyway. Here the check seems unnecessary. No - if the get_socket_cpu() may return nr_cpu_ids, you need a check. Just not in the form of an assertion. Right, error code can be returned. +if( !d-arch.psr_cos_ids ) +return; Considering this check ... +for ( socket = 0; socket opt_socket_num; socket++) +{ +cos = d-arch.psr_cos_ids[socket]; +if ( cos == 0 ) +continue; + +map = cat_socket_info[socket].cos_cbm_map; +if ( map ) +map[cos].ref--; +} + +xfree(d-arch.psr_cos_ids); ... I think you want to clear the pointer here. This function is called by arch_domain_destroy() or the failure path of arch_domain_create(). In both cases the domain structure will be freed later so setting NULL is pointless. But this is not visible from this function. The alternative to not clearing the pointer here would be to do the checks of the pointer in the callers, which then would make clear whether this is really only on error and domain destruction paths (and that hence the pointer can't be double freed). Then I'd like to clear the pointer here, so that even in the future the code will not surprise somebody. @@ -222,6 +422,17 @@ static void do_cat_cpu_init(void* data) info-cbm_len = (eax 0x1f) + 1; This means cbm_len = 32. Why is cos_cbm_map[].cbm then a uint64_t? Currently the cbm_len is EAX[4:0], 64 bits cbm here is for future possible enhancement. Unless the specification explicitly says that the field width in EAX may be extended in the future, please omit such preparations for possible future extensions - it is simply going to confuse readers (as it did for me already) trying to understand why the type of the field is what it is. I will try to understand this internally, if 32 bit is enough, then it will be used. Thanks, Chao ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] osstest going offline for a bit due to database server move
On Mon, 2015-03-16 at 12:41 +, Ian Campbell wrote: We've not yet tracked down the source of the mysterious filer reboots and there was another earlier today, we've fiddled with a few things to see if we can track them down. osstest is doing stuff now, fingers crossed. There were some more reboots overnight. We've made another config change which we hope will resolve things. If not we will look at moving the controller VM to another filer tomorrow. In the meantime in an attempt to try and keep some of the more important branches flowing with the limited bandwidth between reboots I've stopped a bunch of stuff: $ cd ~/testing.git $ touch bisect.stop $ for i in linux-{2.6.39,3.4,3.10,3.16} linux-arm-xen linux-linus linux-next ovmf seabios xen-4.0-testing xen-4.1-testing qemu-mainline qemu-upstream-4.2-testing ; do touch $i.stop; done t$ touch rumpuserxen.stop $ ls *.stop bisect.stoplinux-3.10.stop linux-3.4.stop linux-linus.stop ovmf.stop qemu-upstream-4.2-testing.stop seabios.stop xen-4.1-testing.stop linux-2.6.39.stop linux-3.16.stop linux-arm-xen.stop linux-next.stop qemu-mainline.stop rumpuserxen.stopxen-4.0-testing.stop Since Jan is trying to get 4.3.x and 4.4.x out the door I've left those stable branches going but otherwise I've stopped all the kernels which aren't actually feeding other flights, and some other stuff I reckoned we could live without for now. Once we've had 24 hours of uninterrupted operation I'll remove those again. I've also killed some sg-execute-flights corresponding to the above. Bisects: 36506 -- [linux-3.4 real-bisect] 36507 -- [qemu-upstream-unstable real-bisect] 36489 -- [rumpuserxen real-bisect] Real: flight | branch | intended +---+-- 36491 | qemu-mainline | real 36495 | linux-3.16| real 36497 | linux-3.4 | real 36498 | linux-3.10| real 36500 | ovmf | real 36501 | seabios | real 36505 | linux-next| real Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v2] xsm: add device tree labeling support
On Thu, 2015-03-12 at 16:42 -0400, Daniel De Graaf wrote: This adds support in the hypervisor and policy build toolchain for Xen/Flask policy version 30, which adds the ability to label ARM device tree nodes and expands the IOMEM ocontext entries to 64 bits. Signed-off-by: Daniel De Graaf dgde...@tycho.nsa.gov --- tools/flask/policy/Makefile | 20 -- That bit: Acked-by: Ian Campbell ian.campb...@citrix.com ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [v2][PATCH 2/2] libxl: introduce gfx_passthru_kind
On Tue, 2015-03-17 at 15:46 +0800, Chen, Tiejun wrote: If I remember the context correctly this is in the autodetect case, so I think shouldn't mention IGD. Something like Unable to detect graphics passthru kind, please set gfx_passthru_kind. See xl.cfg(5) for more s/gfx_passthru_kind/gfx_passthru, right? Because actually we always get 'gfx_passthru_kind' from 'gfx_passthru'. I think you have it backwards. In the case here gfx_passthru=1 has been set by the user, but gfx_passthru_kind=DEFAULT. So libxl has tried to autodetect but it has failed. So if the user wants to make progress they should set gfx_passthru_kind to whatever type of passthrough they were trying to do. Looks you're saying 'gfx_passthru_kind' shouldn't reply on 'gfx_passthru', so 'gfx_passthru_kind' can override that previous value parsed from 'gfx_passthru', right? I think perhaps the confusion arises because libxl and xl differ here. I was commenting on some libxl code, which has two fields, whereas you are talking about the xl cfg level, which only has one, which can be parsed in two ways. So I think I've probably been adding lots of confusion here, sorry. I think the _libxl_ message needs to be just Unable to detect graphics passthru kind. i.e. it can't/shouldn't reference anything to do with xl config options etc (which would make no sense if libvirt was being used). That's not very user friendly though, so you may want to consider adding a new specific error code for this case and returning it here, such that xl or libvirt can then give a more comprehensible error message. So looks the whole policy should be something like this, diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index 5c40e84..5518759 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -1953,8 +1953,27 @@ skip_vfb: xlu_cfg_replace_string (config, spice_streaming_video, b_info-u.hvm.spice.streaming_video, 0); xlu_cfg_get_defbool(config, nographic, b_info-u.hvm.nographic, 0); -xlu_cfg_get_defbool(config, gfx_passthru, -b_info-u.hvm.gfx_passthru, 0); +if (!xlu_cfg_get_long(config, gfx_passthru, l, 1)) { +libxl_defbool_set(b_info-u.hvm.gfx_passthru, l); +} else if (!xlu_cfg_get_string(config, gfx_passthru, buf, 0)) { +if (libxl_gfx_passthru_kind_from_string(buf, + b_info-u.hvm.gfx_passthru_kind)) { +fprintf(stderr, +ERROR: invalid value \%s\ for \gfx_passthru\\n, +buf); +exit (1); +} +libxl_defbool_set(b_info-u.hvm.gfx_passthru, true); +} Up to here is fine. +if (!xlu_cfg_get_string(config, gfx_passthru_kind, buf, 0)) { +if (libxl_gfx_passthru_kind_from_string(buf, + b_info-u.hvm.gfx_passthru_kind)) { +fprintf(stderr, +ERROR: invalid value \%s\ for \gfx_passthru_kind\\n, +buf); +exit (1); +} +} I don't think you need this bit. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [linux-3.10 test] 36498: trouble: preparing/queued
flight 36498 linux-3.10 running [real] http://www.chiark.greenend.org.uk/~xensrcts/logs/36498/ Failures and problems with tests :-( Tests which did not succeed and are blocking, including tests which could not be run: build-amd64-libvirt none executed queued build-i386-rumpuserxen none executed queued build-amd64-rumpuserxen none executed queued build-armhf-libvirt none executed queued build-i386-pvops 2 hosts-allocate running [st=running!] build-i386-libvirt none executed queued test-amd64-amd64-rumpuserxen-amd64none executed queued build-armhf 2 hosts-allocate running [st=running!] test-amd64-amd64-xl-pvh-intelnone executed queued test-amd64-i386-rumpuserxen-i386none executed queued test-amd64-i386-libvirt none executed queued build-amd64-pvops 2 hosts-allocate running [st=running!] test-amd64-amd64-libvirtnone executed queued test-amd64-amd64-xl-pvh-amd none executed queued test-amd64-amd64-xl-pcipt-intelnone executed queued test-amd64-i386-qemut-rhel6hvm-intelnone executed queued test-amd64-i386-xl none executed queued test-amd64-i386-rhel6hvm-amdnone executed queued test-amd64-i386-rhel6hvm-intelnone executed queued test-amd64-i386-qemut-rhel6hvm-amdnone executed queued test-amd64-i386-qemuu-rhel6hvm-amdnone executed queued test-amd64-amd64-xl-sedf-pinnone executed queued test-amd64-amd64-xl none executed queued build-amd64 2 hosts-allocate running [st=running!] build-armhf-pvops 2 hosts-allocate running [st=running!] test-amd64-amd64-xl-credit2 none executed queued test-armhf-armhf-xl-credit2 none executed queued test-armhf-armhf-libvirtnone executed queued test-amd64-i386-freebsd10-i386none executed queued test-armhf-armhf-xl-sedf-pinnone executed queued test-amd64-amd64-xl-sedfnone executed queued test-amd64-i386-qemuu-rhel6hvm-intelnone executed queued test-armhf-armhf-xl-sedfnone executed queued test-amd64-amd64-xl-multivcpunone executed queued test-armhf-armhf-xl none executed queued test-amd64-i386-xl-qemut-win7-amd64none executed queued test-amd64-i386-xl-qemut-debianhvm-amd64none executed queued build-i3862 hosts-allocate running [st=running!] test-armhf-armhf-xl-midway none executed queued test-armhf-armhf-xl-multivcpunone executed queued test-amd64-i386-xl-qemut-winxpsp3none executed queued test-amd64-i386-xl-qemuu-winxpsp3-vcpus1none executed queued test-amd64-i386-freebsd10-amd64none executed queued test-amd64-i386-xl-winxpsp3-vcpus1none executed queued test-amd64-amd64-xl-qemuu-winxpsp3none executed queued test-amd64-amd64-pair none executed queued test-amd64-i386-xl-qemuu-ovmf-amd64none executed queued test-amd64-amd64-xl-qemut-debianhvm-amd64none executedqueued test-amd64-i386-xl-qemuu-debianhvm-amd64none executed queued test-amd64-i386-xl-qemuu-win7-amd64none executed queued test-amd64-i386-xl-winxpsp3 none executed queued test-amd64-amd64-xl-win7-amd64none executed queued test-amd64-i386-xl-win7-amd64none executed queued test-amd64-amd64-xl-qemuu-win7-amd64none executed queued test-amd64-amd64-xl-qemut-win7-amd64none executed queued test-amd64-amd64-xl-qemuu-debianhvm-amd64none executedqueued test-amd64-amd64-xl-qemuu-ovmf-amd64none executed queued test-amd64-i386-xl-qemut-winxpsp3-vcpus1none executed queued test-amd64-i386-pairnone executed queued test-amd64-i386-xl-qemuu-winxpsp3none executed queued test-amd64-amd64-xl-winxpsp3none executed queued test-amd64-amd64-xl-qemut-winxpsp3none executed queued version targeted for testing: linux389fb5fb0b8b812ce0e853d5eca748b08fc73289 baseline version: linuxbe67db109090b17b56eb8eb2190cd70700f107aa 961 people touched revisions under test, not listing them all
[Xen-devel] [seabios test] 36501: trouble: preparing/queued
flight 36501 seabios running [real] http://www.chiark.greenend.org.uk/~xensrcts/logs/36501/ Failures and problems with tests :-( Tests which did not succeed and are blocking, including tests which could not be run: test-amd64-amd64-libvirtnone executed queued test-amd64-amd64-xl-winxpsp3none executed queued test-amd64-i386-libvirt none executed queued test-amd64-amd64-xl-qemuu-win7-amd64none executed queued test-amd64-amd64-xl-win7-amd64none executed queued test-amd64-i386-xl-winxpsp3 none executed queued build-amd64-libvirt none executed queued test-amd64-amd64-xl-qemuu-ovmf-amd64none executed queued test-amd64-i386-xl-winxpsp3-vcpus1none executed queued test-amd64-i386-xl-win7-amd64none executed queued test-amd64-amd64-xl-pvh-intelnone executed queued test-amd64-amd64-xl-qemuu-winxpsp3none executed queued test-amd64-amd64-xl-pcipt-intelnone executed queued test-amd64-i386-xl-qemuu-winxpsp3-vcpus1none executed queued test-amd64-i386-freebsd10-amd64none executed queued test-amd64-amd64-xl-qemuu-debianhvm-amd64none executedqueued test-amd64-amd64-xl-sedfnone executed queued test-amd64-amd64-pair none executed queued test-amd64-amd64-xl-multivcpunone executed queued test-amd64-i386-xl-qemuu-winxpsp3none executed queued test-amd64-i386-freebsd10-i386none executed queued test-amd64-i386-xl-qemut-win7-amd64none executed queued test-amd64-amd64-xl-qemut-debianhvm-amd64none executedqueued test-amd64-amd64-xl-qemut-win7-amd64none executed queued test-amd64-amd64-xl-sedf-pinnone executed queued test-amd64-i386-xl-qemuu-ovmf-amd64none executed queued test-amd64-i386-qemut-rhel6hvm-amdnone executed queued test-amd64-amd64-xl-pvh-amd none executed queued test-amd64-i386-xl-qemut-winxpsp3none executed queued test-amd64-amd64-xl-credit2 none executed queued test-amd64-i386-xl-qemut-winxpsp3-vcpus1none executed queued test-amd64-amd64-xl none executed queued test-amd64-i386-xl-qemuu-win7-amd64none executed queued test-amd64-amd64-xl-qemut-winxpsp3none executed queued test-amd64-i386-xl-qemuu-debianhvm-amd64none executed queued test-amd64-i386-rhel6hvm-amdnone executed queued test-amd64-i386-pairnone executed queued test-amd64-i386-qemuu-rhel6hvm-intelnone executed queued test-amd64-i386-qemut-rhel6hvm-intelnone executed queued test-amd64-i386-rhel6hvm-intelnone executed queued test-amd64-i386-xl-qemut-debianhvm-amd64none executed queued test-amd64-i386-qemuu-rhel6hvm-amdnone executed queued test-amd64-i386-xl none executed queued build-amd64 2 hosts-allocate running [st=running!] build-amd64-pvops 2 hosts-allocate running [st=running!] build-i386-libvirt none executed queued build-i3862 hosts-allocate running [st=running!] build-i386-pvops 2 hosts-allocate running [st=running!] version targeted for testing: seabios a1ac8861049a5ffefc26ca294293ad666954fcc8 baseline version: seabios d23eba6ea3d429ed8a4a34bae7faad20ce44d8a1 People who touched revisions under test: Gerd Hoffmann kra...@redhat.com Kevin O'Connor ke...@koconnor.net Marcel Apfelbaum marce...@redhat.com Marcel Apfelbaum mar...@redhat.com Paolo Bonzini pbonz...@redhat.com jobs: build-amd64 preparing build-i386 preparing build-amd64-libvirt queued build-i386-libvirt queued build-amd64-pvopspreparing build-i386-pvops preparing test-amd64-amd64-xl queued test-amd64-i386-xl queued test-amd64-amd64-xl-pvh-amd queued test-amd64-i386-rhel6hvm-amd queued test-amd64-i386-qemut-rhel6hvm-amd queued
Re: [Xen-devel] [PATCH v2] Build: Fix stubdom vtpm build failure
On Mon, Mar 16, Quan Xu wrote: Typedefs are duplicated in stubdom/vtpmmgr/tcg.h and supported compilers do not cope with current staging branch. This version finally compiles. Thanks! Olaf ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 4/6] x86: add scheduling support for Intel CAT
On 13.03.15 at 11:13, chao.p.p...@linux.intel.com wrote: @@ -407,10 +388,75 @@ void psr_domain_free(struct domain *d) psr_free_cos(d); } +static void psr_assoc_init(struct psr_assoc *psra) +{ +unsigned int socket; +struct psr_cat_socket_info *info; + +socket = cpu_to_socket(smp_processor_id()); +psra-socket = socket; +if ( cat_socket_info socket opt_socket_num ) +{ +info = cat_socket_info + socket; +if ( info-enabled ) +psra-cos_mask = (~(~0ull (get_count_order(info-cos_max) + + 32))) (~0ull 32); ((1ull get_count_order()) - 1) 32 seems better readable to me. void psr_ctxt_switch_to(struct domain *d) { +uint64_t reg; +struct psr_assoc *psra = this_cpu(psr_assoc); + +psr_assoc_reg_read(psra, reg); This ultimately leading to psr_assoc_init() makes it really undesirable here imo: Initialization work shouldn't be done during a context switch, even is this a one time thing on each CPU. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 2/6] x86: add support for COS/CBM manangement
On 17.03.15 at 10:11, chao.p.p...@linux.intel.com wrote: On Mon, Mar 16, 2015 at 05:10:32PM +, Jan Beulich wrote: On 13.03.15 at 11:13, chao.p.p...@linux.intel.com wrote: +else +{ +unsigned int cpu = cpumask_check(get_socket_cpu(socket)); Isn't this going to trigger an assertion when the socket count got specified on the command line? Yes, the assertion is needed in tools side anyway. Here the check seems unnecessary. No - if the get_socket_cpu() may return nr_cpu_ids, you need a check. Just not in the form of an assertion. +if( !d-arch.psr_cos_ids ) +return; Considering this check ... +for ( socket = 0; socket opt_socket_num; socket++) +{ +cos = d-arch.psr_cos_ids[socket]; +if ( cos == 0 ) +continue; + +map = cat_socket_info[socket].cos_cbm_map; +if ( map ) +map[cos].ref--; +} + +xfree(d-arch.psr_cos_ids); ... I think you want to clear the pointer here. This function is called by arch_domain_destroy() or the failure path of arch_domain_create(). In both cases the domain structure will be freed later so setting NULL is pointless. But this is not visible from this function. The alternative to not clearing the pointer here would be to do the checks of the pointer in the callers, which then would make clear whether this is really only on error and domain destruction paths (and that hence the pointer can't be double freed). @@ -222,6 +422,17 @@ static void do_cat_cpu_init(void* data) info-cbm_len = (eax 0x1f) + 1; This means cbm_len = 32. Why is cos_cbm_map[].cbm then a uint64_t? Currently the cbm_len is EAX[4:0], 64 bits cbm here is for future possible enhancement. Unless the specification explicitly says that the field width in EAX may be extended in the future, please omit such preparations for possible future extensions - it is simply going to confuse readers (as it did for me already) trying to understand why the type of the field is what it is. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 4/6] x86: add scheduling support for Intel CAT
On Tue, Mar 17, 2015 at 09:19:42AM +, Jan Beulich wrote: +psra-cos_mask = (~(~0ull (get_count_order(info-cos_max) + + 32))) (~0ull 32); ((1ull get_count_order()) - 1) 32 seems better readable to me. Indeed :) void psr_ctxt_switch_to(struct domain *d) { +uint64_t reg; +struct psr_assoc *psra = this_cpu(psr_assoc); + +psr_assoc_reg_read(psra, reg); This ultimately leading to psr_assoc_init() makes it really undesirable here imo: Initialization work shouldn't be done during a context switch, even is this a one time thing on each CPU. This is what we did in CMT patch. But now I think psr_assoc_init() can be called from cat_cpu_init() other than here. Chao ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 3/7] xen: rework locking for dump of scheduler info (debug-key r)
On 16.03.15 at 18:05, dario.faggi...@citrix.com wrote: such as it is taken care of by the various schedulers, rather than happening in schedule.c. In fact, it is the schedulers that know better which locks are necessary for the specific dumping operations. While there, fix a few style issues (indentation, trailing whitespace, parentheses and blank line after var declarations) Signed-off-by: Dario Faggioli dario.faggi...@citrix.com Cc: George Dunlap george.dun...@eu.citrix.com Cc: Meng Xu xumengpa...@gmail.com Cc: Jan Beulich jbeul...@suse.com Cc: Keir Fraser k...@xen.org --- xen/common/sched_credit.c | 42 -- xen/common/sched_credit2.c | 40 xen/common/sched_rt.c |7 +-- xen/common/schedule.c |5 ++--- 4 files changed, 79 insertions(+), 15 deletions(-) Is it really correct that sched_sedf.c doesn't need any change here? --- a/xen/common/sched_credit.c +++ b/xen/common/sched_credit.c @@ -26,6 +26,23 @@ /* + * Locking: + * - Scheduler-lock (a.k.a. runqueue lock): + * + is per-runqueue, and there is one runqueue per-cpu; + * + serializes all runqueue manipulation operations; + * - Private data lock (a.k.a. private scheduler lock): + * + serializes accesses to the scheduler global state (weight, + *credit, balance_credit, etc); + * + serializes updates to the domains' scheduling parameters. + * + * Ordering is private lock always comes first: + * + if we need both locks, we must acquire the private + *scheduler lock for first; + * + if we already own a runqueue lock, we must never acquire + *the private scheduler lock. + */ And this is Credit1 specific? Regardless of that, even if that's just reflecting current state, isn't acquiring a private lock around a generic lock backwards? Finally, as said in different contexts earlier, I think unconditionally acquiring locks in dumping routines isn't the best practice. At least in non-debug builds I think these should be try-locks only, skipping the dumping when a lock is busy. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [qemu-mainline test] 36491: trouble: preparing/queued
flight 36491 qemu-mainline running [real] http://www.chiark.greenend.org.uk/~xensrcts/logs/36491/ Failures and problems with tests :-( Tests which did not succeed and are blocking, including tests which could not be run: build-amd64-libvirt none executed queued build-amd64-pvops 2 hosts-allocate running [st=running!] build-i386-pvops 2 hosts-allocate running [st=running!] test-amd64-amd64-xl-pvh-intelnone executed queued test-armhf-armhf-xl-credit2 none executed queued test-armhf-armhf-xl-sedf-pinnone executed queued test-armhf-armhf-xl none executed queued test-armhf-armhf-libvirtnone executed queued test-armhf-armhf-xl-multivcpunone executed queued test-amd64-amd64-xl-sedfnone executed queued test-amd64-amd64-xl-pvh-amd none executed queued test-amd64-amd64-xl-pcipt-intelnone executed queued test-amd64-i386-xl none executed queued test-armhf-armhf-xl-midway none executed queued test-amd64-amd64-xl-credit2 none executed queued test-amd64-i386-qemut-rhel6hvm-amdnone executed queued test-amd64-amd64-xl-sedf-pinnone executed queued test-amd64-i386-rhel6hvm-intelnone executed queued test-amd64-i386-rhel6hvm-amdnone executed queued test-amd64-i386-libvirt none executed queued test-armhf-armhf-xl-sedfnone executed queued build-armhf-libvirt none executed queued test-amd64-amd64-xl none executed queued test-amd64-amd64-libvirtnone executed queued test-amd64-i386-qemuu-rhel6hvm-amdnone executed queued test-amd64-amd64-xl-multivcpunone executed queued test-amd64-i386-qemut-rhel6hvm-intelnone executed queued build-armhf-pvops 2 hosts-allocate running [st=running!] build-armhf 2 hosts-allocate running [st=running!] test-amd64-i386-freebsd10-amd64none executed queued test-amd64-i386-qemuu-rhel6hvm-intelnone executed queued test-amd64-amd64-xl-win7-amd64none executed queued build-i3862 hosts-allocate running [st=running!] test-amd64-i386-xl-qemut-debianhvm-amd64none executed queued test-amd64-i386-xl-win7-amd64none executed queued test-amd64-i386-freebsd10-i386none executed queued build-i386-libvirt none executed queued test-amd64-amd64-xl-winxpsp3none executed queued test-amd64-i386-xl-qemut-win7-amd64none executed queued test-amd64-i386-xl-qemuu-winxpsp3-vcpus1none executed queued test-amd64-i386-xl-qemut-winxpsp3-vcpus1none executed queued test-amd64-amd64-xl-qemuu-debianhvm-amd64none executedqueued test-amd64-amd64-xl-qemut-winxpsp3none executed queued test-amd64-i386-xl-qemuu-ovmf-amd64none executed queued test-amd64-i386-xl-qemuu-debianhvm-amd64none executed queued test-amd64-amd64-pair none executed queued test-amd64-i386-xl-qemuu-win7-amd64none executed queued test-amd64-amd64-xl-qemut-win7-amd64none executed queued test-amd64-i386-xl-qemut-winxpsp3none executed queued test-amd64-amd64-xl-qemuu-win7-amd64none executed queued test-amd64-amd64-xl-qemuu-winxpsp3none executed queued test-amd64-amd64-xl-qemut-debianhvm-amd64none executedqueued build-amd64 2 hosts-allocate running [st=running!] test-amd64-amd64-xl-qemuu-ovmf-amd64none executed queued test-amd64-i386-xl-winxpsp3-vcpus1none executed queued test-amd64-i386-xl-winxpsp3 none executed queued test-amd64-i386-pairnone executed queued test-amd64-i386-xl-qemuu-winxpsp3none executed queued version targeted for testing: qemuu17b11a1406fdc43b5022f32a6fbfcb005a353b38 baseline version: qemuu576a94d8bcaa1bb07a81d9ffd2cf76095a66ad9a People who touched revisions under test: Alberto Garcia be...@igalia.com Alex Williamson alex.william...@redhat.com Alexander Graf ag...@suse.de Alexey Kardashevskiy a...@ozlabs.ru Alistair Francis alistai...@gmail.com Alistair Francis alist...@alistair23.me Amit Shah amit.s...@redhat.com Andreas Färber afaer...@suse.de Andrew Jones drjo...@redhat.com Aneesh Kumar K.V
[Xen-devel] [ovmf test] 36500: trouble: preparing/queued
flight 36500 ovmf running [real] http://www.chiark.greenend.org.uk/~xensrcts/logs/36500/ Failures and problems with tests :-( Tests which did not succeed and are blocking, including tests which could not be run: test-amd64-amd64-xl-pvh-intelnone executed queued build-i386-libvirt none executed queued build-amd64-pvops 2 hosts-allocate running [st=running!] build-amd64 2 hosts-allocate running [st=running!] test-amd64-i386-libvirt none executed queued test-amd64-amd64-xl-pvh-amd none executed queued test-amd64-amd64-xl none executed queued test-amd64-i386-qemut-rhel6hvm-amdnone executed queued test-amd64-i386-freebsd10-i386none executed queued test-amd64-i386-rhel6hvm-amdnone executed queued test-amd64-amd64-xl-pcipt-intelnone executed queued test-amd64-i386-qemuu-rhel6hvm-amdnone executed queued build-i386-pvops 2 hosts-allocate running [st=running!] test-amd64-amd64-libvirtnone executed queued build-i3862 hosts-allocate running [st=running!] test-amd64-i386-xl none executed queued test-amd64-amd64-xl-multivcpunone executed queued test-amd64-amd64-xl-sedf-pinnone executed queued test-amd64-i386-qemut-rhel6hvm-intelnone executed queued test-amd64-i386-qemuu-rhel6hvm-intelnone executed queued test-amd64-amd64-xl-credit2 none executed queued test-amd64-i386-freebsd10-amd64none executed queued test-amd64-amd64-xl-sedfnone executed queued test-amd64-i386-rhel6hvm-intelnone executed queued test-amd64-i386-xl-qemut-win7-amd64none executed queued test-amd64-i386-xl-qemut-debianhvm-amd64none executed queued test-amd64-amd64-pair none executed queued test-amd64-i386-xl-qemuu-ovmf-amd64none executed queued build-amd64-libvirt none executed queued test-amd64-amd64-xl-qemut-win7-amd64none executed queued test-amd64-i386-xl-qemuu-debianhvm-amd64none executed queued test-amd64-i386-xl-winxpsp3 none executed queued test-amd64-i386-xl-win7-amd64none executed queued test-amd64-amd64-xl-qemuu-win7-amd64none executed queued test-amd64-i386-xl-qemuu-win7-amd64none executed queued test-amd64-i386-xl-qemut-winxpsp3-vcpus1none executed queued test-amd64-i386-xl-qemuu-winxpsp3none executed queued test-amd64-amd64-xl-winxpsp3none executed queued test-amd64-amd64-xl-qemuu-debianhvm-amd64none executedqueued test-amd64-amd64-xl-qemut-debianhvm-amd64none executedqueued test-amd64-amd64-xl-qemuu-ovmf-amd64none executed queued test-amd64-i386-xl-qemuu-winxpsp3-vcpus1none executed queued test-amd64-i386-xl-winxpsp3-vcpus1none executed queued test-amd64-i386-pairnone executed queued test-amd64-i386-xl-qemut-winxpsp3none executed queued test-amd64-amd64-xl-qemuu-winxpsp3none executed queued test-amd64-amd64-xl-win7-amd64none executed queued test-amd64-amd64-xl-qemut-winxpsp3none executed queued version targeted for testing: ovmf ba9d087b8fb91f19c9accf9541332a36889e18ed baseline version: ovmf 69a99d0b2c0d601c60a1d65bac057adb045d002b People who touched revisions under test: Ard Biesheuvel ard.biesheu...@linaro.org Chen Fan chen.fan.f...@cn.fujitsu.com David Wei david@intel.com Elvin Li elvin...@intel.com Feng Tian feng.t...@intel.com Laszlo Ersek ler...@redhat.com Liming Gao liming@intel.com Qiu Shumin shumin@intel.com Shifei Lu shifeix.a...@intel.com Shumin Qiu shumin@intel.com Star Zeng star.z...@intel.com Tapan Shah tapands...@hp.com Tim He tim...@intel.com jobs: build-amd64 preparing build-i386 preparing build-amd64-libvirt queued build-i386-libvirt queued build-amd64-pvopspreparing build-i386-pvops preparing test-amd64-amd64-xl queued test-amd64-i386-xl
Re: [Xen-devel] [PATCH v2 2/4] xen: sched: make counters for vCPU sleep and wakeup generic
On Mon, 2015-03-16 at 17:18 +, George Dunlap wrote: On 02/27/2015 04:51 PM, Dario Faggioli wrote: diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index ad0a5d4..2b852cc 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -931,6 +931,7 @@ csched2_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) struct csched2_vcpu * const svc = CSCHED2_VCPU(vc); BUG_ON( is_idle_vcpu(vc) ); +SCHED_STAT_CRANK(vcpu_sleep); if ( per_cpu(schedule_data, vc-processor).curr == vc ) cpu_raise_softirq(vc-processor, SCHEDULE_SOFTIRQ); @@ -956,19 +957,22 @@ csched2_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) BUG_ON( is_idle_vcpu(vc) ); -/* Make sure svc priority mod happens before runq check */ if ( unlikely(per_cpu(schedule_data, vc-processor).curr == vc) ) { +SCHED_STAT_CRANK(vcpu_wake_running); goto out; } - if ( unlikely(__vcpu_on_runq(svc)) ) Does this make the 'if' butt right up against the '{'? Is that bad? Other than that: Acked-by: George Dunlap george.dun...@eu.citrix.com As far as I can see, the series is in: http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=8b0e94da0e23221a6d7ea19bfbd24a407db44de8 and whoever committed it (Jan, probably?), took care of leaving that blank line in place, which I'm fine with, of course. :-) However, it doesn't apply cleanly at the moment, so you'll probably need to send a refresh. (Sorry for taking so long!) NP, and thanks for reviewing. I was prepared to refresh and resend, but as I said, it's been checked-in already, so thanks for that! :-D Regards, Dario signature.asc Description: This is a digitally signed message part ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 2/6] x86: add support for COS/CBM manangement
On Mon, Mar 16, 2015 at 05:10:32PM +, Jan Beulich wrote: On 13.03.15 at 11:13, chao.p.p...@linux.intel.com wrote: +else +{ +unsigned int cpu = cpumask_check(get_socket_cpu(socket)); Isn't this going to trigger an assertion when the socket count got specified on the command line? Yes, the assertion is needed in tools side anyway. Here the check seems unnecessary. +if( !d-arch.psr_cos_ids ) +return; Considering this check ... +for ( socket = 0; socket opt_socket_num; socket++) +{ +cos = d-arch.psr_cos_ids[socket]; +if ( cos == 0 ) +continue; + +map = cat_socket_info[socket].cos_cbm_map; +if ( map ) +map[cos].ref--; +} + +xfree(d-arch.psr_cos_ids); ... I think you want to clear the pointer here. This function is called by arch_domain_destroy() or the failure path of arch_domain_create(). In both cases the domain structure will be freed later so setting NULL is pointless. @@ -222,6 +422,17 @@ static void do_cat_cpu_init(void* data) info-cbm_len = (eax 0x1f) + 1; This means cbm_len = 32. Why is cos_cbm_map[].cbm then a uint64_t? Currently the cbm_len is EAX[4:0], 64 bits cbm here is for future possible enhancement. Chao ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v2] Build: Fix stubdom vtpm build failure
Olaf, Thanks for your test. Ian, Could you help me check it again? Any comments, I will fix it soon. If no any other comments, could you help me merge it? Thanks. Quan -Original Message- From: Olaf Hering [mailto:o...@aepfle.de] Sent: Tuesday, March 17, 2015 5:20 PM To: Xu, Quan Cc: ian.campb...@citrix.com; xen-devel@lists.xen.org; dgde...@tycho.nsa.gov; andrew.coop...@citrix.com Subject: Re: [PATCH v2] Build: Fix stubdom vtpm build failure On Mon, Mar 16, Quan Xu wrote: Typedefs are duplicated in stubdom/vtpmmgr/tcg.h and supported compilers do not cope with current staging branch. This version finally compiles. Thanks! Olaf ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 18/18] x86: add multiboot2 protocol support for EFI platforms
On 30.01.15 at 18:54, daniel.ki...@oracle.com wrote: @@ -94,6 +111,17 @@ ENTRY(start) gdt_boot_descr: .word 6*8-1 .long sym_phys(trampoline_gdt) +.long 0 /* Needed for 64-bit lgdt */ + +cs32_switch_addr: +.long sym_phys(cs32_switch) +.long BOOT_CS32 + +efi_st: +.quad 0 + +efi_ih: +.quad 0 .Lbad_cpu_msg: .asciz ERR: Not a 64-bit CPU! .Lbad_ldr_msg: .asciz ERR: Not a Multiboot bootloader! @@ -124,6 +152,133 @@ print_err: .Lhalt: hlt jmp .Lhalt +.code64 + +__efi64_start: +cld + +/* Bootloaders may set multiboot[12].mem_lower to a nonzero value */ +xor %edx,%edx + +/* Check for Multiboot2 bootloader */ +cmp $MULTIBOOT2_BOOTLOADER_MAGIC,%eax +je efi_multiboot2_proto + +jmp not_multiboot + +efi_multiboot2_proto: jne not_multiboot (and efi_multiboot2_proto dropped altogether) +/* Skip Multiboot2 information fixed part */ +lea MB2_fixed_sizeof(%ebx),%ecx Let's please not add more assumptions than necessary about stuff being below 4G. + +0: +/* Get mem_lower from Multiboot2 information */ +cmpl$MULTIBOOT2_TAG_TYPE_BASIC_MEMINFO,(%ecx) +jne 1f + +mov MB2_mem_lower(%ecx),%edx +jmp 4f + +1: +/* Get EFI SystemTable address from Multiboot2 information */ +cmpl$MULTIBOOT2_TAG_TYPE_EFI64,(%ecx) +jne 2f + +lea MB2_efi64_st(%ecx),%esi +lea efi_st(%rip),%edi +movsq A simple pair of mov-s please, assuming all of this really needs to be done in assembly in the first place. Also please use .Lname labels in this assembly coded switch statement to ease reading. + +/* Do not go into real mode on EFI platform */ +movb$1,skip_realmode(%rip) + +jmp 4f + +2: +/* Get EFI ImageHandle address from Multiboot2 information */ +cmpl$MULTIBOOT2_TAG_TYPE_EFI64_IH,(%ecx) +jne 3f + +lea MB2_efi64_ih(%ecx),%esi +lea efi_ih(%rip),%edi +movsq +jmp 4f + +3: +/* Is it the end of Multiboot2 information? */ +cmpl$MULTIBOOT2_TAG_TYPE_END,(%ecx) +je run_bs + +4: Please switch the order so that 2 can fall through into 4 (and you then won't need the run_bs label, which otherwise should also becom .Lrun_bs). +/* Go to next Multiboot2 information tag */ +add MB2_tag_size(%ecx),%ecx +add $(MULTIBOOT2_TAG_ALIGN-1),%ecx +and $~(MULTIBOOT2_TAG_ALIGN-1),%ecx +jmp 0b + +run_bs: +push%rax +push%rdx + +/* Initialize BSS (no nasty surprises!) */ +lea __bss_start(%rip),%rdi +lea _end(%rip),%rcx +sub %rdi,%rcx +xor %rax,%rax +rep stosb rep stosb + +mov efi_ih(%rip),%rdi /* EFI ImageHandle */ +mov efi_st(%rip),%rsi /* EFI SystemTable */ +callefi_multiboot2 With efi_multiboot2 being a C function it really looks like much of the above doesn't need to be done in assembly. + +pop %rcx +pop %rax + +shl $10-4,%rcx /* Convert multiboot2.mem_lower to bytes/16 */ + +cli + +/* Initialise GDT */ +lgdtgdt_boot_descr(%rip) + +/* Reload code selector */ +ljmpl *cs32_switch_addr(%rip) + +.code32 + +cs32_switch: +/* Initialise basic data segments */ +mov $BOOT_DS,%edx +mov %edx,%ds +mov %edx,%es +mov %edx,%fs +mov %edx,%gs +mov %edx,%ss + +mov $sym_phys(cpu0_stack)+1024,%esp + +/* Disable paging */ +mov %cr0,%edx +and $(~X86_CR0_PG),%edx +mov %edx,%cr0 + +push%eax +push%ecx + +/* Disable Long Mode */ +mov $MSR_EFER,%ecx +rdmsr +and $(~EFER_LME),%eax +wrmsr I don't think this is really needed. + +pop %ecx +pop %eax + +/* Turn off PAE */ +mov %cr4,%edx +and $(~X86_CR4_PAE),%edx +mov %edx,%cr4 Nor this. @@ -179,7 +334,7 @@ multiboot2_proto: and $~(MULTIBOOT2_TAG_ALIGN-1),%ecx jmp 0b -trampoline_setup: +bios_platform: /* Set up trampoline segment 64k below EBDA */ This is still trampoline setup code, so it's not clear why you rename the label. If you need another named label ... @@ -195,12 +350,13 @@ trampoline_setup: * multiboot structure (if available) and use the smallest. */ cmp $0x100,%edx /* is the multiboot value too small? */ -
Re: [Xen-devel] [PATCH 1/7] xen: sched_rt: avoid ASSERT()ing on runq dump if there are no domains
On 16.03.15 at 18:04, dario.faggi...@citrix.com wrote: --- a/xen/common/sched_rt.c +++ b/xen/common/sched_rt.c @@ -264,18 +264,17 @@ rt_dump(const struct scheduler *ops) struct list_head *iter_sdom, *iter_svc, *runq, *depletedq, *iter; struct rt_private *prv = rt_priv(ops); struct rt_vcpu *svc; -cpumask_t *online; struct rt_dom *sdom; unsigned long flags; -ASSERT(!list_empty(prv-sdom)); +spin_lock_irqsave(prv-lock, flags); + +if (list_empty(prv-sdom)) Coding style. +goto out; -sdom = list_entry(prv-sdom.next, struct rt_dom, sdom_elem); -online = cpupool_scheduler_cpumask(sdom-dom-cpupool); Unrelated change (together with the variable deletion above). I'm fine for this to stay here, but it should at least be mentioned in the description so that future archeologists don't wonder about the connection to the actual issue fixed here. @@ -303,6 +302,7 @@ rt_dump(const struct scheduler *ops) } } +out: Labels should be indented by at least one space. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 1/2] VT-d: make XSA-59 workaround fully cover XeonE5/E7 v2
Note that the following Nehalem/Westmere chipsets should be included in this list: Nehalem - 0x40, 0x2c01, 0x2c41, 0x313x Westmere - 0x2c70, 0x2d81, 0xd15x -- Don Dugger Censeo Toto nos in Kansa esse decisse. - D. Gale Ph: 303/443-3786 -Original Message- From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Friday, December 19, 2014 1:41 AM To: xen-devel Cc: Dugger, Donald D; Tian, Kevin; Zhang, Yang Z Subject: [PATCH 1/2] VT-d: make XSA-59 workaround fully cover XeonE5/E7 v2 So far only the VT-d UR masking was being done for them. Signed-off-by: Jan Beulich jbeul...@suse.com --- a/xen/drivers/passthrough/vtd/quirks.c +++ b/xen/drivers/passthrough/vtd/quirks.c @@ -440,6 +440,9 @@ void pci_vtd_quirk(const struct pci_dev seg, bus, dev, func); break; +/* Xeon E5/E7 v2 */ +case 0x0e00: /* host bridge */ +case 0x0e01: case 0x0e04 ... 0x0e0b: /* root ports */ /* Tylersburg (EP)/Boxboro (MP) chipsets (NHM-EP/EX, WSM-EP/EX) */ case 0x3400 ... 0x3407: /* host bridges */ case 0x3408 ... 0x3411: case 0x3420 ... 0x3421: /* root ports */ ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v2 2/2] sched_credit2.c: runqueue_per_core code
On Mon, 2015-03-16 at 12:56 +, Jan Beulich wrote: On 16.03.15 at 13:51, george.dun...@eu.citrix.com wrote: On 03/16/2015 12:48 PM, Jan Beulich wrote: Them returning garbage isn't what needs fixing. Instead the code here should use a different condition to check whether this is the boot CPU (e.g. looking at system_state). And that can very well be done directly in this patch. What do you suggest, then? My preferred solution would be, as said, to leverage system_state. Provided the state to look for is consistent between x86 and ARM. Would something like this make sense? diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index cfca5a7..2f2aa73 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -1936,12 +1936,8 @@ static void init_pcpu(const struct scheduler *ops, int cpu) } /* Figure out which runqueue to put it in */ -rqi = 0; - -/* Figure out which runqueue to put it in */ -/* NB: cpu 0 doesn't get a STARTING callback, so we hard-code it to runqueue 0. */ -if ( cpu == 0 ) -rqi = 0; +if ( system_state == SYS_STATE_boot ) +rqi = boot_cpu_to_socket(cpu); else rqi = cpu_to_socket(cpu); @@ -1986,9 +1982,13 @@ static void init_pcpu(const struct scheduler *ops, int cpu) static void * csched2_alloc_pdata(const struct scheduler *ops, int cpu) { -/* Check to see if the cpu is online yet */ -/* Note: cpu 0 doesn't get a STARTING callback */ -if ( cpu == 0 || cpu_to_socket(cpu) = 0 ) +/* + * Actual initialization is deferred to when the pCPU will be + * online, via a STARTING callback. The only exception is + * the boot cpu, which does not get such a notification, and + * hence needs to be taken care of here. + */ +if ( system_state == SYS_STATE_boot ) init_pcpu(ops, cpu); else printk(%s: cpu %d not online yet, deferring initializatgion\n, diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index cfca5a7..2f2aa73 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -1936,12 +1936,8 @@ static void init_pcpu(const struct scheduler *ops, int cpu) } /* Figure out which runqueue to put it in */ -rqi = 0; - -/* Figure out which runqueue to put it in */ -/* NB: cpu 0 doesn't get a STARTING callback, so we hard-code it to runqueue 0. */ -if ( cpu == 0 ) -rqi = 0; +if ( system_state == SYS_STATE_boot ) +rqi = boot_cpu_to_socket(cpu); else rqi = cpu_to_socket(cpu); @@ -1986,9 +1982,13 @@ static void init_pcpu(const struct scheduler *ops, int cpu) static void * csched2_alloc_pdata(const struct scheduler *ops, int cpu) { -/* Check to see if the cpu is online yet */ -/* Note: cpu 0 doesn't get a STARTING callback */ -if ( cpu == 0 || cpu_to_socket(cpu) = 0 ) +/* + * Actual initialization is deferred to when the pCPU will be + * online, via a STARTING callback. The only exception is + * the boot cpu, which does not get such a notification, and + * hence needs to be taken care of here. + */ +if ( system_state == SYS_STATE_boot ) init_pcpu(ops, cpu); else printk(%s: cpu %d not online yet, deferring initializatgion\n, signature.asc Description: This is a digitally signed message part ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 2/3] slightly reduce vm_assist code
At 15:55 + on 17 Mar (1426607705), Jan Beulich wrote: - drop an effectively unused struct pv_vcpu field (x86) - adjust VM_ASSIST() to prepend VMASST_TYPE_ Signed-off-by: Jan Beulich jbeul...@suse.com Reviewed-by: Tim Deegan t...@xen.org, though I think these would have been better as two separate patches. Cheers, Tim. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 1/3] libxl: In domain death search, start search at first domid we want
On Tue, Mar 17, 2015 at 09:30:57AM -0600, Jim Fehlig wrote: From: Ian Jackson ian.jack...@eu.citrix.com From: Ian Jackson ian.jack...@eu.citrix.com When domain_death_xswatch_callback needed a further call to xc_domain_getinfolist it would restart it with the last domain it found rather than the first one it wants. If it only wants one it will also only ask for one domain. The result would then be that it gets the previous domain again (ie, the previous one to the one it wants), which still doesn't reveal the answer to the question, and it would therefore loop again. It's completely unclear to me why I thought it was a good idea to start the xc_domain_getinfolist with the last domain previously found rather than the first one left un-confirmed. The code has been that way since it was introduced. Instead, start each xc_domain_getinfolist at the next domain whose status we need to check. We also need to move the test for !evg into the loop, we now need evg to compute the arguments to getinfolist. Signed-off-by: Ian Jackson ian.jack...@eu.citrix.com Reported-by: Jim Fehlig jfeh...@suse.com Reviewed-by: Jim Fehlig jfeh...@suse.com Tested-by: Jim Fehlig jfeh...@suse.com Acked-by: Wei Liu wei.l...@citrix.com ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 2/3] libxl: Domain destroy: unlock userdata earlier
On Tue, Mar 17, 2015 at 09:30:58AM -0600, Jim Fehlig wrote: From: Ian Jackson ian.jack...@eu.citrix.com Unlock the userdata before we actually call xc_domain_destroy. This leaves open the possibility that other libxl callers will see the half-destroyed domain (with no devices, paused), but this is fine. Signed-off-by: Ian Jackson ian.jack...@eu.citrix.com CC: Wei Liu wei.l...@citrix.com Reviewed-by: Jim Fehlig jfeh...@suse.com Tested-by: Jim Fehlig jfeh...@suse.com Acked-by: Wei Liu wei.l...@citrix.com ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 3/3] libxl: Domain destroy: fork
On Tue, Mar 17, 2015 at 09:30:59AM -0600, Jim Fehlig wrote: From: Ian Jackson ian.jack...@eu.citrix.com Call xc_domain_destroy in a subprocess. That allows us to do so asynchronously, rather than blocking the whole process calling libxl. The changes in detail: * Provide an libxl__ev_child in libxl__domain_destroy_state, and initialise it in libxl__domain_destroy. There is no possibility to `clean up' a libxl__ev_child, but there need to clean it up, as the control flow ensures that we only continue after the child has exited. * Call libxl__ev_child_fork at the right point and put the call to xc_domain_destroy and associated logging in the child. (The child opens a new xenctrl handle because we mustn't use the parent's.) * Consequently, the success return path from domain_destroy_domid_cb no longer calls dis-callback. Instead it simply returns. * We plumb the errorno value through the child's exit status, if it fits. This means we normally do the logging only in the parent. * Incidentally, we fix the bug that we were treating the return value from xc_domain_destroy as an errno value when in fact it is a return value from do_domctl (in this case, 0 or -1 setting errno). Signed-off-by: Ian Jackson ian.jack...@eu.citrix.com Reviewed-by: Jim Fehlig jfeh...@suse.com Tested-by: Jim Fehlig jfeh...@suse.com Reviewed-by: Wei Liu wei.l...@citrix.com One nit below. --- [...] +ctx-xch = xc_interface_open(ctx-lg,0,0); +if (!ctx-xch) goto badchild; + +rc = xc_domain_destroy(ctx-xch, domid); +if (rc 0) goto badchild; +_exit(0); + +badchild: +if (errno 0 errno 126) { +_exit(errno); +} else { +LOGE(ERROR, + xc_domain_destroy failed for %d (with difficult errno value %d), Indentation is wrong. Wei. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 2/2] VT-d: extend XSA-59 workaround to XeonE5 v3 (Haswell)
Note that the following Haswell chipsets should also be included in this list: Haswell - 0xc0f, 0xd00, 0xd04, 0xd08, 0xd0f, 0xa00, 0xa08, 0xa0f -- Don Dugger Censeo Toto nos in Kansa esse decisse. - D. Gale Ph: 303/443-3786 -Original Message- From: Jan Beulich [mailto:jbeul...@suse.com] Sent: Friday, December 19, 2014 1:42 AM To: xen-devel Cc: Dugger, Donald D; Tian, Kevin; Zhang, Yang Z Subject: [PATCH 2/2] VT-d: extend XSA-59 workaround to XeonE5 v3 (Haswell) Note that the datasheet lacks PCI IDs for Dev 1 Fn 0-1, so their IDs are being added based on what https://pci-ids.ucw.cz/read/PC/8086 says. Signed-off-by: Jan Beulich jbeul...@suse.com --- a/xen/drivers/passthrough/vtd/quirks.c +++ b/xen/drivers/passthrough/vtd/quirks.c @@ -431,6 +431,7 @@ void pci_vtd_quirk(const struct pci_dev * - Potential security issue if malicious guest trigger VT-d faults. */ case 0x0e28: /* Xeon-E5v2 (IvyBridge) */ +case 0x2f28: /* Xeon-E5v3 (Haswell) */ case 0x342e: /* Tylersburg chipset (Nehalem / Westmere systems) */ case 0x3728: /* Xeon C5500/C3500 (JasperForest) */ case 0x3c28: /* Sandybridge */ @@ -443,6 +444,9 @@ void pci_vtd_quirk(const struct pci_dev /* Xeon E5/E7 v2 */ case 0x0e00: /* host bridge */ case 0x0e01: case 0x0e04 ... 0x0e0b: /* root ports */ +/* Xeon E5 v3 */ +case 0x2f00: /* host bridge */ +case 0x2f01 ... 0x2f0b: /* root ports */ /* Tylersburg (EP)/Boxboro (MP) chipsets (NHM-EP/EX, WSM-EP/EX) */ case 0x3400 ... 0x3407: /* host bridges */ case 0x3408 ... 0x3411: case 0x3420 ... 0x3421: /* root ports */ ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] Running update-server-info on push
It seems some repos have this via the hooks/post-update.sample having been renamed to hooks/post-update, but a few don't. So I've done: xen@xenbits:~/git$ for i in rumpuser-xen.git mini-os.git libvirt.git ; do mv -iv $i/hooks/post-update.sample $i/hooks/post-update done `rumpuser-xen.git/hooks/post-update.sample' - `rumpuser-xen.git/hooks/post-update' `mini-os.git/hooks/post-update.sample' - `mini-os.git/hooks/post-update' `libvirt.git/hooks/post-update.sample' - `libvirt.git/hooks/post-update' xen@xenbits:~/git$ for i in rumpuser-xen.git mini-os.git libvirt.git ; do ( cd $i git update-server-info ) done xen@xenbits:~/git$ I did not investigate people/* or xenclient/*. This will explain the failure of flight 36502 which has yet to be posted. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 7/7] xen: sched_rt: print useful affinity info when dumping
This change also takes the chance to add a scratch cpumask, to avoid having to create one more cpumask_var_t on the stack of the dumping routine. Actually, I have a question about the strength of this design. When we have a machine with many cpus, we will end up with allocating a cpumask for each cpu. Is this better than having a cpumask_var_t on the stack of the dumping routine, since the dumping routine is not in the hot path? The reason for taking this off the stack is that the hypervisor stack is a fairly limited resource -- IIRC it's only 8k (for each cpu). If the call stack gets too deep, the hypervisor will triple-fault. Keeping really large variables like cpumasks off the stack is key to making sure we don't get close to that. I see. I didn't realize the fact of the limited size of hypervisor stack. That makes sense. Thank you very much for clarification! :-) Best, Meng --- Meng Xu PhD Student in Computer and Information Science University of Pennsylvania ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 04/10] xen/blkfront: separate ring information to an new struct
On 03/17/2015 10:52 PM, Felipe Franciosi wrote: Hi Bob, I've put the hardware back together and am sorting out the software for testing. Things are not moving as fast as I wanted due to other commitments. I'll keep this thread updated as I progress. Malcolm is OOO and I'm trying to get his patches to work on a newer Xen. Thank you! The evaluation will compare: 1) bare metal i/o (for baseline) 2) tapdisk3 (currently using grant copy, which is what scales best in my experience) 3) blkback w/ persistent grants 4) blkback w/o persistent grants (I will just comment out the handshake bits in blkback/blkfront) 5) blkback w/o persistent grants + Malcolm's grant map patches I think you need to add the patches from Christoph Egger with title [PATCH v5 0/2] gnttab: Improve scaleability here. http://lists.xen.org/archives/html/xen-devel/2015-02/msg01188.html To my knowledge, blkback (w/ or w/o persistent grants) is always faster than user space alternatives (e.g. tapdisk, qemu-qdisk) as latency is much lower. However, tapdisk with grant copy has been shown to produce (much) better aggregate throughput figures as it avoids any issues with grant (un)mapping. I'm hoping to show that (5) above scales better than (3) and (4) in a representative scenario. If it does, I will recommend that we get rid of persistent grants in favour of a better and more scalable grant (un)mapping implementation. Right, but even if 5) have better performance, we have to make sure older hypervisors with new linux kernel won't be affected after get rid of persistent grants. -- Regards, -Bob ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 7/7] xen: sched_rt: print useful affinity info when dumping
Hi Dario, 2015-03-17 10:12 GMT-04:00 Dario Faggioli dario.faggi...@citrix.com: On Mon, 2015-03-16 at 16:30 -0400, Meng Xu wrote: Hi Dario, Hey, 2015-03-16 13:05 GMT-04:00 Dario Faggioli dario.faggi...@citrix.com: This change also takes the chance to add a scratch cpumask, to avoid having to create one more cpumask_var_t on the stack of the dumping routine. Actually, I have a question about the strength of this design. When we have a machine with many cpus, we will end up with allocating a cpumask for each cpu. Just FTR, what we will end up allocating is: - an array of *pointers* to cpumasks with as many elements as the number of pCPUs, - a cpumask *only* for the pCPUs subjected to an instance of the RTDS scheduler. So, for instance, if you have 64 pCPUs, but are using the RTDS scheduler only in a cpupool with 2 pCPUs, you'll have an array of 64 pointers to cpumask_t, but only 2 actual cpumasks. Is this better than having a cpumask_var_t on the stack of the dumping routine, since the dumping routine is not in the hot path? George and Jan replied to this already, I think. Allow me to add just a few words: Such scratch area can be used to kill most of the cpumasks_var_t local variables in other functions in the file, but that is *NOT* done in this chage. This is the point, actually! As said here, this is not only for the sake of the dumping routine. In fact, ideally, someone will, in the near future, go throughout the whole file and kill most of the cpumask_t local variables, and most of the cpumask dynamic allocations, in favour of using this scratch area. @@ -409,6 +423,10 @@ rt_init(struct scheduler *ops) if ( prv == NULL ) return -ENOMEM; +_cpumask_scratch = xmalloc_array(cpumask_var_t, nr_cpu_ids); Is it better to use xzalloc_array? Why? IMO, not really. I'm only free()-ing (in rt_free_pdata()) the elements of the array that have been previously successfully allocated (in rt_alloc_pdata()), so I don't think there is any special requirement for all the elements to be NULL right away. OK. I see. +if ( _cpumask_scratch == NULL ) +return -ENOMEM; + spin_lock_init(prv-lock); INIT_LIST_HEAD(prv-sdom); INIT_LIST_HEAD(prv-runq); @@ -426,6 +444,7 @@ rt_deinit(const struct scheduler *ops) { struct rt_private *prv = rt_priv(ops); +xfree(_cpumask_scratch); xfree(prv); } @@ -443,6 +462,9 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu) per_cpu(schedule_data, cpu).schedule_lock = prv-lock; spin_unlock_irqrestore(prv-lock, flags); +if ( !alloc_cpumask_var(_cpumask_scratch[cpu]) ) Is it better to use zalloc_cpumask_var() here? Nope. It's a scratch area, after all, so one really should not assume it to be in a specific state (e.g., no bits set as you're suggesting) when using it. I see the point. Now I got it. :-) Thanks and Regards, Thank you very much for clarification! :-) Best, Meng --- Meng Xu PhD Student in Computer and Information Science University of Pennsylvania ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v2 5/5] xen: sched_rt: print useful affinity info when dumping
2015-03-17 11:33 GMT-04:00 Dario Faggioli dario.faggi...@citrix.com: In fact, printing the cpupool's CPU online mask for each vCPU is just redundant, as that is the same for all the vCPUs of all the domains in the same cpupool, while hard affinity is already part of the output of dumping domains info. Instead, print the intersection between hard affinity and online CPUs, which is --in case of this scheduler-- the effective affinity always used for the vCPUs. This change also takes the chance to add a scratch cpumask area, to avoid having to either put one (more) cpumask_t on the stack, or dynamically allocate it within the dumping routine. (The former being bad because hypervisor stack size is limited, the latter because dynamic allocations can fail, if the hypervisor was built for a large enough number of CPUs.) Such scratch area can be used to kill most of the cpumasks{_var}_t local variables in other functions in the file, but that is *NOT* done in this chage. Finally, convert the file to use keyhandler scratch, instead of open coded string buffers. Signed-off-by: Dario Faggioli dario.faggi...@citrix.com Cc: George Dunlap george.dun...@eu.citrix.com Cc: Meng Xu xumengpa...@gmail.com Cc: Jan Beulich jbeul...@suse.com Cc: Keir Fraser k...@xen.org --- Changes from v1: * improved changelog; * made a local variable to point to the correct scratch mask, as suggested during review. --- Reviewed-by: Meng Xu men...@cis.upenn.edu Thanks, Best, Meng --- Meng Xu PhD Student in Computer and Information Science University of Pennsylvania ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [RFC PATCH] dpci: Put the dpci back on the list if running on another CPU.
Tuesday, March 17, 2015, 6:44:54 PM, you wrote: Additionally I think it should be considered whether the bitmap approach of interpreting -state is the right one, and we don't instead want a clean 3-state (idle, sched, run) model. Could you elaborate a bit more please? As in three different unsigned int (or bool_t) that set in what state we are in? An enum { STATE_IDLE, STATE_SCHED, STATE_RUN }. Especially if my comment above turns out to be wrong, you'd have no real need for the SCHED and RUN flags to be set at the same time. I cobbled what I believe is what you were thinking off. As you can see to preserve the existing functionality such as being able to schedule N amount of interrupt injections for the N interrupts we might get - I modified '-masked' to be an atomic counter. The end result is that we can still live-lock. Unless we: - Drop on the floor the injection of N interrupts and just deliever at max one per VMX_EXIT (and not bother with interrupts arriving when we are in the VMX handler). - Alter the softirq code slightly - to have an variant which will only iterate once over the pending softirq bits per call. (so save an copy of the bitmap on the stack when entering the softirq handler - and use that. We could also xor it against the current to catch any non-duplicate bits being set that we should deal with). Here is the compile, but not run-time tested patch. From e7d8bcd7c5d32c520554a4ad69c4716246036002 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk konrad.w...@oracle.com Date: Tue, 17 Mar 2015 13:31:52 -0400 Subject: [RFC PATCH] dpci: Switch to tristate instead of bitmap *TODO*: - Writeup. - Tests Done, and unfortunately it doesn't fly .. Some devices seem to work fine, others don't receive any interrupts shortly after boot like: 40: 3 0 0 0 xen-pirq-ioapic-level cx25821[1] Don't see any crashes or errors though, so it seems to silently lock somewhere. -- Sander Suggested-by: Jan Beulich jbuel...@suse.com Signed-off-by: Konrad Rzeszutek Wilk konrad.w...@oracle.com --- xen/drivers/passthrough/io.c | 140 --- xen/include/xen/hvm/irq.h| 4 +- 2 files changed, 82 insertions(+), 62 deletions(-) diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c index ae050df..663e104 100644 --- a/xen/drivers/passthrough/io.c +++ b/xen/drivers/passthrough/io.c @@ -30,42 +30,28 @@ static DEFINE_PER_CPU(struct list_head, dpci_list); /* - * These two bit states help to safely schedule, deschedule, and wait until - * the softirq has finished. - * - * The semantics behind these two bits is as follow: - * - STATE_SCHED - whoever modifies it has to ref-count the domain (-dom). - * - STATE_RUN - only softirq is allowed to set and clear it. If it has - * been set hvm_dirq_assist will RUN with a saved value of the - * 'struct domain' copied from 'pirq_dpci-dom' before STATE_RUN was set. - * - * The usual states are: STATE_SCHED(set) - STATE_RUN(set) - - * STATE_SCHED(unset) - STATE_RUN(unset). - * - * However the states can also diverge such as: STATE_SCHED(set) - - * STATE_SCHED(unset) - STATE_RUN(set) - STATE_RUN(unset). That means - * the 'hvm_dirq_assist' never run and that the softirq did not do any - * ref-counting. - */ - -enum { -STATE_SCHED, -STATE_RUN -}; - -/* * This can be called multiple times, but the softirq is only raised once. - * That is until the STATE_SCHED state has been cleared. The state can be - * cleared by: the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'), - * or by 'pt_pirq_softirq_reset' (which will try to clear the state before + * That is until state is in init. The state can be changed by: + * the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'), + * or by 'pt_pirq_softirq_reset' (which will try to init the state before * the softirq had a chance to run). */ static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci) { unsigned long flags; -if ( test_and_set_bit(STATE_SCHED, pirq_dpci-state) ) +switch ( cmpxchg(pirq_dpci-state, STATE_INIT, STATE_SCHED) ) +{ +case STATE_RUN: +case STATE_SCHED: +/* + * The pirq_dpci-mapping has been incremented to let us know + * how many we have left to do. + */ return; +case STATE_INIT: +break; +} get_knownalive_domain(pirq_dpci-dom); @@ -85,7 +71,7 @@ static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci) */ bool_t pt_pirq_softirq_active(struct hvm_pirq_dpci *pirq_dpci) { -if ( pirq_dpci-state ((1 STATE_RUN) | (1 STATE_SCHED)) ) +if ( pirq_dpci-state != STATE_INIT ) return 1; /* @@ -109,22 +95,22 @@ static void pt_pirq_softirq_reset(struct hvm_pirq_dpci *pirq_dpci)
Re: [Xen-devel] Any work on sharing of large multi-page segments?
On 3/17/15, Jan Beulich jbeul...@suse.com wrote: And how would that be significantly different from the batching that's already built into the grant table hypercall? I guess it does do more or less what I want already. I was looking more at the inner mapping/unmapping functions, rather than the wrappers around them that implement the actual hypercalls. What would be a useful addition would be support for granting 2M pages. That would eliminate any problem with running out of grant table slots. On 3/17/15, George Dunlap george.dun...@eu.citrix.com wrote: Any deduplication code would run in as a process probably in domain 0, and may be somewhat slow; but the actual mechanism of sharing is a generic mechanism in the hypervisor which any client can use. Jan is suggesting that you might be able to use that interface to pro-actively tell Xen about the memory pages shared between your various domains. I wasn't quite sure if it's generic enough to use to implement shared segments, or if it were specific to deduplication at the hypervisor level. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH v3 0/3] Xen/FLASK policy updates for device contexts
In order to support assigning security lables to ARM device tree nodes in Xen's XSM policy, a new ocontext type is needed in the security policy. In addition to adding the new ocontext, the existing I/O memory range ocontext is expanded to 64 bits in order to support hardware with more than 44 bits of physical address space (32-bit count of 4K pages). Changes from v2: - Clean up printf format strings for 32-bit builds Changes from v1: - Use policy version 30 instead of forking the version numbers for Xen; this removes the need for v1's patch 3. - Report an error when attempting to use an I/O memory range that requires a 64-bit representation with an old policy output version that cannot support this - Fix a few incorrect references to PCIDEVICECON - Reorder patches to clarify the allowed characterset of device tree paths [PATCH 1/3] checkpolicy: Expand allowed character set in paths [PATCH 2/3] libsepol, checkpolicy: widen Xen IOMEM ocontext entries [PATCH 3/3] libsepol, checkpolicy: add device tree ocontext nodes to ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH 3/3] libsepol, checkpolicy: add device tree ocontext nodes to Xen policy
In Xen on ARM, device tree nodes identified by a path (string) need to be labeled by the security policy. Signed-off-by: Daniel De Graaf dgde...@tycho.nsa.gov --- checkpolicy/policy_define.c| 55 + checkpolicy/policy_define.h| 1 + checkpolicy/policy_parse.y | 8 +++- checkpolicy/policy_scan.l | 2 + libsepol/cil/src/cil.c | 17 libsepol/cil/src/cil_binary.c | 29 + libsepol/cil/src/cil_build_ast.c | 66 ++ libsepol/cil/src/cil_build_ast.h | 2 + libsepol/cil/src/cil_copy_ast.c| 24 +++ libsepol/cil/src/cil_flavor.h | 1 + libsepol/cil/src/cil_internal.h| 10 + libsepol/cil/src/cil_post.c| 34 +++ libsepol/cil/src/cil_reset_ast.c | 10 + libsepol/cil/src/cil_resolve_ast.c | 28 + libsepol/cil/src/cil_tree.c| 13 ++ libsepol/cil/src/cil_verify.c | 24 +++ libsepol/include/sepol/policydb/policydb.h | 1 + libsepol/src/expand.c | 7 libsepol/src/policydb.c| 18 +++- libsepol/src/write.c | 14 ++- sepolgen/src/sepolgen/refparser.py | 11 + sepolgen/src/sepolgen/refpolicy.py | 9 22 files changed, 379 insertions(+), 5 deletions(-) diff --git a/checkpolicy/policy_define.c b/checkpolicy/policy_define.c index 66c1ff2..de01f6f 100644 --- a/checkpolicy/policy_define.c +++ b/checkpolicy/policy_define.c @@ -4116,6 +4116,61 @@ bad: return -1; } +int define_devicetree_context() +{ + ocontext_t *newc, *c, *l, *head; + + if (policydbp-target_platform != SEPOL_TARGET_XEN) { + yyerror(devicetreecon not supported for target); + return -1; + } + + if (pass == 1) { + free(queue_remove(id_queue)); + parse_security_context(NULL); + return 0; + } + + newc = malloc(sizeof(ocontext_t)); + if (!newc) { + yyerror(out of memory); + return -1; + } + memset(newc, 0, sizeof(ocontext_t)); + + newc-u.name = (char *)queue_remove(id_queue); + if (!newc-u.name) { + free(newc); + return -1; + } + + if (parse_security_context(newc-context[0])) { + free(newc-u.name); + free(newc); + return -1; + } + + head = policydbp-ocontexts[OCON_XEN_DEVICETREE]; + for (l = NULL, c = head; c; l = c, c = c-next) { + if (strcmp(newc-u.name, c-u.name) == 0) { + yyerror2(duplicate devicetree entry for '%s', newc-u.name); + goto bad; + } + } + + if (l) + l-next = newc; + else + policydbp-ocontexts[OCON_XEN_DEVICETREE] = newc; + + return 0; + +bad: + free(newc-u.name); + free(newc); + return -1; +} + int define_port_context(unsigned int low, unsigned int high) { ocontext_t *newc, *c, *l, *head; diff --git a/checkpolicy/policy_define.h b/checkpolicy/policy_define.h index 14d30e1..a87ced3 100644 --- a/checkpolicy/policy_define.h +++ b/checkpolicy/policy_define.h @@ -49,6 +49,7 @@ int define_pirq_context(unsigned int pirq); int define_iomem_context(uint64_t low, uint64_t high); int define_ioport_context(unsigned long low, unsigned long high); int define_pcidevice_context(unsigned long device); +int define_devicetree_context(void); int define_range_trans(int class_specified); int define_role_allow(void); int define_role_trans(int class_specified); diff --git a/checkpolicy/policy_parse.y b/checkpolicy/policy_parse.y index e3899b9..8b81f04 100644 --- a/checkpolicy/policy_parse.y +++ b/checkpolicy/policy_parse.y @@ -130,7 +130,7 @@ typedef int (* require_func_t)(int pass); %token TARGET %token SAMEUSER %token FSCON PORTCON NETIFCON NODECON -%token PIRQCON IOMEMCON IOPORTCON PCIDEVICECON +%token PIRQCON IOMEMCON IOPORTCON PCIDEVICECON DEVICETREECON %token FSUSEXATTR FSUSETASK FSUSETRANS %token GENFSCON %token U1 U2 U3 R1 R2 R3 T1 T2 T3 L1 L2 H1 H2 @@ -644,7 +644,8 @@ dev_contexts: dev_context_def dev_context_def: pirq_context_def | iomem_context_def | ioport_context_def | - pci_context_def + pci_context_def | + dtree_context_def ; pirq_context_def : PIRQCON number security_context_def {if (define_pirq_context($2)) return -1;} @@ -662,6 +663,9 @@ ioport_context_def : IOPORTCON number security_context_def pci_context_def: PCIDEVICECON number security_context_def
[Xen-devel] [PATCH 1/3] checkpolicy: Expand allowed character set in paths
In order to support paths containing spaces or other characters, allow a quoted string with these characters to be parsed as a path in addition to the existing unquoted string. Signed-off-by: Daniel De Graaf dgde...@tycho.nsa.gov --- checkpolicy/policy_parse.y | 3 +++ checkpolicy/policy_scan.l | 1 + 2 files changed, 4 insertions(+) diff --git a/checkpolicy/policy_parse.y b/checkpolicy/policy_parse.y index 15c8997..e5210bd 100644 --- a/checkpolicy/policy_parse.y +++ b/checkpolicy/policy_parse.y @@ -81,6 +81,7 @@ typedef int (* require_func_t)(int pass); %type require_func require_decl_def %token PATH +%token QPATH %token FILENAME %token CLONE %token COMMON @@ -805,6 +806,8 @@ filesystem : FILESYSTEM ; path : PATH { if (insert_id(yytext,0)) return -1; } + | QPATH + { yytext[strlen(yytext) - 1] = '\0'; if (insert_id(yytext + 1,0)) return -1; } ; filename : FILENAME { yytext[strlen(yytext) - 1] = '\0'; if (insert_id(yytext + 1,0)) return -1; } diff --git a/checkpolicy/policy_scan.l b/checkpolicy/policy_scan.l index 648e1d6..6763c38 100644 --- a/checkpolicy/policy_scan.l +++ b/checkpolicy/policy_scan.l @@ -240,6 +240,7 @@ HIGH{ return(HIGH); } low | LOW{ return(LOW); } /({alnum}|[_\.\-/])* { return(PATH); } +\/[ !#-~]*\{ return(QPATH); } \({alnum}|[_\.\-\+\~\: ])+\ { return(FILENAME); } {letter}({alnum}|[_\-])*([\.]?({alnum}|[_\-]))*{ return(IDENTIFIER); } {digit}+|0x{hexval}+{ return(NUMBER); } -- 2.1.0 ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] dpci: Put the dpci back on the list if scheduled from another CPU.
On Tue, Mar 17, 2015 at 04:06:14PM +, Jan Beulich wrote: On 17.03.15 at 16:38, konrad.w...@oracle.com wrote: --- a/xen/drivers/passthrough/io.c +++ b/xen/drivers/passthrough/io.c @@ -804,7 +804,17 @@ static void dpci_softirq(void) d = pirq_dpci-dom; smp_mb(); /* 'd' MUST be saved before we set/clear the bits. */ if ( test_and_set_bit(STATE_RUN, pirq_dpci-state) ) -BUG(); +{ +unsigned long flags; + +/* Put back on the list and retry. */ +local_irq_save(flags); +list_add_tail(pirq_dpci-softirq_list, this_cpu(dpci_list)); +local_irq_restore(flags); + +raise_softirq(HVM_DPCI_SOFTIRQ); +continue; +} As just said in another mail - unless there are convincing new arguments in favor of this (more of a hack than a real fix), I'm not going to accept it and instead consider reverting the offending commit. Iirc the latest we had come to looked quite a bit better than this one. The latest one (please see attached) would cause an dead-lock iff on the CPU we are running the softirq and an do_IRQ comes for the exact dpci we are in process of executing. Jan From 6b32dccfbe00518d3ca9cd94d19a6e007b2645d9 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk konrad.w...@oracle.com Date: Tue, 17 Mar 2015 09:46:09 -0400 Subject: [PATCH] dpci: when scheduling spin until STATE_RUN or STATE_SCHED has been cleared. There is race when we clear the STATE_SCHED in the softirq - which allows the 'raise_softirq_for' (on another CPU) to schedule the dpci. Specifically this can happen whenthe other CPU receives an interrupt, calls 'raise_softirq_for', and puts the dpci on its per-cpu list (same dpci structure). There would be two 'dpci_softirq' running at the same time (on different CPUs) where on one CPU it would be executing hvm_dirq_assist (so had cleared STATE_SCHED and set STATE_RUN) and on the other CPU it is trying to call: if ( test_and_set_bit(STATE_RUN, pirq_dpci-state) ) BUG(); Since STATE_RUN is already set it would end badly. The reason we can get his with this is when an interrupt affinity is set over multiple CPUs. Potential solutions: a) Instead of the BUG() we can put the dpci back on the per-cpu list to deal with later (when the softirq are activated again). This putting the 'dpci' back on the per-cpu list is an spin until the bad condition clears. b) We could also expand the test-and-set(STATE_SCHED) in raise_softirq_for to detect for 'STATE_RUN' bit being set and schedule the dpci in a more safe manner (delay it). The dpci would stil not be scheduled when STATE_SCHED bit was set. c) This patch explores a third option - we will only schedule the dpci when the state is cleared (no STATE_SCHED and no STATE_RUN). We will spin if STATE_RUN is set (as it is in progress and will finish). If the STATE_SCHED is set (so hasn't run yet) we won't try to spin and just exit. This can cause an dead-lock if the interrupt comes when we are processing the dpci in the softirq. Interestingly the old ('tasklet') code used an a) mechanism. If the function assigned to the tasklet was running - the softirq that ran said function (hvm_dirq_assist) would be responsible for putting the tasklet back on the per-cpu list. This would allow to have an running tasklet and an 'to-be-scheduled' tasklet at the same time. This solution moves this 'to-be-scheduled' job to be done in 'raise_softirq_for' (instead of the 'softirq'). Reported-by: Sander Eikelenboom li...@eikelenboom.it Reported-by: Malcolm Crossley malcolm.cross...@citrix.com Signed-off-by: Konrad Rzeszutek Wilk konrad.w...@oracle.com --- xen/drivers/passthrough/io.c | 28 +--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c index ae050df..9c30ebb 100644 --- a/xen/drivers/passthrough/io.c +++ b/xen/drivers/passthrough/io.c @@ -63,10 +63,32 @@ enum { static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci) { unsigned long flags; +unsigned long old; -if ( test_and_set_bit(STATE_SCHED, pirq_dpci-state) ) -return; - +/* + * This cmpxchg spins until the state is zero (unused). + */ +for ( ;; ) +{ +old = cmpxchg(pirq_dpci-state, 0, 1 STATE_SCHED); +switch ( old ) +{ +case (1 STATE_SCHED): +/* + * Whenever STATE_SCHED is set we MUST not schedule it. + */ +return; +case (1 STATE_RUN) | (1 STATE_SCHED): +case (1 STATE_RUN): +/* Getting close to finish. Spin. */ +continue; +} +/* + * If the 'state' is 0 (not in use) we can schedule it. + */ +if ( old == 0 ) +break; +} get_knownalive_domain(pirq_dpci-dom); local_irq_save(flags); -- 2.1.0
[Xen-devel] [PATCH 2/3] libsepol, checkpolicy: widen Xen IOMEM ocontext entries
This expands IOMEMCON device context entries to 64 bits. This change is required to support static I/O memory range labeling for systems with over 16TB of physical address space. The policy version number change is shared with the next patch. While this makes no changes to SELinux policy, a new SELinux policy compatibility entry was added in order to avoid breaking compilation of an SELinux policy without explicitly specifying the policy version. Signed-off-by: Daniel De Graaf dgde...@tycho.nsa.gov --- checkpolicy/policy_define.c| 11 +- checkpolicy/policy_define.h| 2 +- checkpolicy/policy_parse.y | 9 ++-- libsepol/cil/src/cil_build_ast.c | 32 ++--- libsepol/cil/src/cil_build_ast.h | 1 + libsepol/cil/src/cil_internal.h| 4 ++-- libsepol/cil/src/cil_policy.c | 3 ++- libsepol/cil/src/cil_tree.c| 3 ++- libsepol/include/sepol/policydb/policydb.h | 7 --- libsepol/src/policydb.c| 33 +- libsepol/src/write.c | 32 ++--- policycoreutils/hll/pp/pp.c| 4 ++-- 12 files changed, 109 insertions(+), 32 deletions(-) diff --git a/checkpolicy/policy_define.c b/checkpolicy/policy_define.c index a6c5d65..66c1ff2 100644 --- a/checkpolicy/policy_define.c +++ b/checkpolicy/policy_define.c @@ -39,6 +39,7 @@ #include arpa/inet.h #include stdlib.h #include limits.h +#include inttypes.h #include sepol/policydb/expand.h #include sepol/policydb/policydb.h @@ -3932,7 +3933,7 @@ bad: return -1; } -int define_iomem_context(unsigned long low, unsigned long high) +int define_iomem_context(uint64_t low, uint64_t high) { ocontext_t *newc, *c, *l, *head; char *id; @@ -3960,7 +3961,7 @@ int define_iomem_context(unsigned long low, unsigned long high) newc-u.iomem.high_iomem = high; if (low high) { - yyerror2(low memory 0x%lx exceeds high memory 0x%lx, low, high); + yyerror2(low memory 0x%PRIx64 exceeds high memory 0x%PRIx64, low, high); free(newc); return -1; } @@ -3972,13 +3973,13 @@ int define_iomem_context(unsigned long low, unsigned long high) head = policydbp-ocontexts[OCON_XEN_IOMEM]; for (l = NULL, c = head; c; l = c, c = c-next) { - uint32_t low2, high2; + uint64_t low2, high2; low2 = c-u.iomem.low_iomem; high2 = c-u.iomem.high_iomem; if (low = high2 low2 = high) { - yyerror2(iomemcon entry for 0x%lx-0x%lx overlaps with - earlier entry 0x%x-0x%x, low, high, + yyerror2(iomemcon entry for 0x%PRIx64-0x%PRIx64 overlaps with + earlier entry 0x%PRIx64-0x%PRIx64, low, high, low2, high2); goto bad; } diff --git a/checkpolicy/policy_define.h b/checkpolicy/policy_define.h index 4ef0f4f..14d30e1 100644 --- a/checkpolicy/policy_define.h +++ b/checkpolicy/policy_define.h @@ -46,7 +46,7 @@ int define_permissive(void); int define_polcap(void); int define_port_context(unsigned int low, unsigned int high); int define_pirq_context(unsigned int pirq); -int define_iomem_context(unsigned long low, unsigned long high); +int define_iomem_context(uint64_t low, uint64_t high); int define_ioport_context(unsigned long low, unsigned long high); int define_pcidevice_context(unsigned long device); int define_range_trans(int class_specified); diff --git a/checkpolicy/policy_parse.y b/checkpolicy/policy_parse.y index e5210bd..e3899b9 100644 --- a/checkpolicy/policy_parse.y +++ b/checkpolicy/policy_parse.y @@ -67,6 +67,7 @@ typedef int (* require_func_t)(int pass); %union { unsigned int val; + uint64_t val64; uintptr_t valptr; void *ptr; require_func_t require_func; @@ -78,6 +79,7 @@ typedef int (* require_func_t)(int pass); %type ptr role_def roles %type valptr cexpr cexpr_prim op role_mls_op %type val ipv4_addr_def number +%type val64 number64 %type require_func require_decl_def %token PATH @@ -647,9 +649,9 @@ dev_context_def : pirq_context_def | pirq_context_def : PIRQCON number security_context_def {if (define_pirq_context($2)) return -1;} ; -iomem_context_def : IOMEMCON number security_context_def +iomem_context_def : IOMEMCON number64 security_context_def {if (define_iomem_context($2,$2)) return -1;} - | IOMEMCON number '-' number security_context_def + | IOMEMCON number64 '-' number64 security_context_def {if (define_iomem_context($2,$4)) return -1;}
Re: [Xen-devel] [PATCH 3/7] xen: rework locking for dump of scheduler info (debug-key r)
On Tue, 2015-03-17 at 10:54 +, Jan Beulich wrote: On 16.03.15 at 18:05, dario.faggi...@citrix.com wrote: such as it is taken care of by the various schedulers, rather than happening in schedule.c. In fact, it is the schedulers that know better which locks are necessary for the specific dumping operations. While there, fix a few style issues (indentation, trailing whitespace, parentheses and blank line after var declarations) Signed-off-by: Dario Faggioli dario.faggi...@citrix.com Cc: George Dunlap george.dun...@eu.citrix.com Cc: Meng Xu xumengpa...@gmail.com Cc: Jan Beulich jbeul...@suse.com Cc: Keir Fraser k...@xen.org --- xen/common/sched_credit.c | 42 -- xen/common/sched_credit2.c | 40 xen/common/sched_rt.c |7 +-- xen/common/schedule.c |5 ++--- 4 files changed, 79 insertions(+), 15 deletions(-) Is it really correct that sched_sedf.c doesn't need any change here? Good point. I feel like mentioning though that SEDF is broken in many ways, and I (and I suspect George is in a similar situation) really don't have the bandwidth to fix it. Therefore, I really think we should start a discussion on how to deprecate/get rid of it. That being said, this is simple enough a case that I certainly can have a quick look. --- a/xen/common/sched_credit.c +++ b/xen/common/sched_credit.c @@ -26,6 +26,23 @@ /* + * Locking: + * - Scheduler-lock (a.k.a. runqueue lock): + * + is per-runqueue, and there is one runqueue per-cpu; + * + serializes all runqueue manipulation operations; + * - Private data lock (a.k.a. private scheduler lock): + * + serializes accesses to the scheduler global state (weight, + *credit, balance_credit, etc); + * + serializes updates to the domains' scheduling parameters. + * + * Ordering is private lock always comes first: + * + if we need both locks, we must acquire the private + *scheduler lock for first; + * + if we already own a runqueue lock, we must never acquire + *the private scheduler lock. + */ And this is Credit1 specific? It is similar (although with some differences) in Credit2, but there is already a comment there, similar to this, stating it. RTDS, as of now, only use _one_ lock, so not much to say about ordering. :-D Regardless of that, even if that's just reflecting current state, isn't acquiring a private lock around a generic lock backwards? It sounds like so, I agree, when describing it in English. But then, looking at the code, things make sense, IMO. Finally, as said in different contexts earlier, I think unconditionally acquiring locks in dumping routines isn't the best practice. At least in non-debug builds I think these should be try-locks only, skipping the dumping when a lock is busy. That makes sense. It also looks, if not completely orthogonal, something that can be done in a follow-up patch wrt this series. I.e., we first move the locking logic where it belongs, and then we rework it toward using _trylock(), does this make sense? Regards, Dario signature.asc Description: This is a digitally signed message part ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [RFC PATCH] xen-netback: making the bandwidth limiter runtime settable
On Fri, Mar 13, 2015 at 01:51:05PM +0100, Imre Palik wrote: From: Palik, Imre im...@amazon.de With the current netback, the bandwidth limiter's parameters are only settable during vif setup time. This patch register a watch on them, and thus makes them runtime changeable. When the watch fires, the timer is reset. The timer's mutex is used for fencing the change. I think this is a valid idea. Just that this commit message is not complete. It doesn't describe everything this patch does. Cc: Anthony Liguori aligu...@amazon.com Signed-off-by: Imre Palik im...@amazon.de --- [...] queue-rx_queue_max = XENVIF_RX_QUEUE_BYTES; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index cab9f52..bcc1880 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -641,7 +641,7 @@ static void tx_add_credit(struct xenvif_queue *queue) queue-remaining_credit = min(max_credit, max_burst); } -static void tx_credit_callback(unsigned long data) +void xenvif_tx_credit_callback(unsigned long data) Please keep this function static. And say in the commit message you change tx_credit_callback to a better name. { struct xenvif_queue *queue = (struct xenvif_queue *)data; tx_add_credit(queue); @@ -1170,7 +1170,7 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size) queue-credit_timeout.data = (unsigned long)queue; queue-credit_timeout.function = - tx_credit_callback; + xenvif_tx_credit_callback; mod_timer(queue-credit_timeout, next_credit); [...] @@ -594,13 +597,9 @@ static void xen_net_read_rate(struct xenbus_device *dev, unsigned long b, u; char *ratestr; - /* Default to unlimited bandwidth. */ - *bytes = ~0UL; - *usec = 0; - ratestr = xenbus_read(XBT_NIL, dev-nodename, rate, NULL); if (IS_ERR(ratestr)) - return; + goto reset; s = ratestr; b = simple_strtoul(s, e, 10); @@ -612,15 +611,21 @@ static void xen_net_read_rate(struct xenbus_device *dev, if ((s == e) || (*e != '\0')) goto fail; + kfree(ratestr); + *bytes = b; *usec = u; - kfree(ratestr); return; - fail: +fail: pr_warn(Failed to parse network rate limit. Traffic unlimited.\n); kfree(ratestr); + +reset: + /* Default to unlimited bandwidth. */ + *bytes = ~0UL; + *usec = 0; } Any reason you modify this function? It is still doing the exact same thing, right? static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) @@ -645,6 +650,59 @@ static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) return 0; } +static void xen_net_rate_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + struct xenvif *vif = container_of(watch, struct xenvif, credit_watch); + struct xenbus_device *dev = xenvif_to_xenbus_device(vif); + unsigned long credit_bytes; + unsigned long credit_usec; + unsigned int queue_index; + + xen_net_read_rate(dev, credit_bytes, credit_usec); + for (queue_index = 0; queue_index vif-num_queues; queue_index++) { + struct xenvif_queue *queue = vif-queues[queue_index]; + + queue-credit_bytes = credit_bytes; + queue-credit_usec = credit_usec; + if (!mod_timer_pending(queue-credit_timeout, jiffies) + queue-remaining_credit queue-credit_bytes) { + queue-remaining_credit = queue-credit_bytes; + } + } +} + +static int xen_register_watchers(struct xenbus_device *dev, struct xenvif *vif) +{ + int err = 0; + char *node; + unsigned maxlen = strlen(dev-nodename) + sizeof(/rate); + + node = kmalloc(maxlen, GFP_KERNEL); + if (!node) + return -ENOMEM; + sprintf(node, %s/rate, dev-nodename); Please use snprintf. (Though I can see using sprintf is fine here but I want the code to be a bit future proof.) + vif-credit_watch.node = node; + vif-credit_watch.callback = xen_net_rate_changed; + err = register_xenbus_watch(vif-credit_watch); + if (err) { + pr_err(Failed to set watcher %s\n, vif-credit_watch.node); + kfree(node); + vif-credit_watch.node = 0; + vif-credit_watch.callback = 0; Please use NULL here. Wei. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [RFC PATCH] xen-netback: making the bandwidth limiter runtime settable
On Fri, 2015-03-13 at 13:51 +0100, Imre Palik wrote: [...] diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 3aa8648..34d8038 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -463,6 +463,7 @@ int xenvif_init_queue(struct xenvif_queue *queue) queue-credit_bytes = queue-remaining_credit = ~0UL; queue-credit_usec = 0UL; init_timer(queue-credit_timeout); + queue-credit_timeout.function = xenvif_tx_credit_callback; queue-credit_window_start = get_jiffies_64(); queue-rx_queue_max = XENVIF_RX_QUEUE_BYTES; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index cab9f52..bcc1880 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c [...] @@ -1170,7 +1170,7 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size) queue-credit_timeout.data = (unsigned long)queue; queue-credit_timeout.function = - tx_credit_callback; + xenvif_tx_credit_callback; Are this init and the one in xenvif_init_queue really both needed? Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 7/7] xen: sched_rt: print useful affinity info when dumping
On 17.03.15 at 12:10, george.dun...@eu.citrix.com wrote: On 03/16/2015 08:30 PM, Meng Xu wrote: 2015-03-16 13:05 GMT-04:00 Dario Faggioli dario.faggi...@citrix.com: This change also takes the chance to add a scratch cpumask, to avoid having to create one more cpumask_var_t on the stack of the dumping routine. Actually, I have a question about the strength of this design. When we have a machine with many cpus, we will end up with allocating a cpumask for each cpu. Is this better than having a cpumask_var_t on the stack of the dumping routine, since the dumping routine is not in the hot path? The reason for taking this off the stack is that the hypervisor stack is a fairly limited resource -- IIRC it's only 8k (for each cpu). If the call stack gets too deep, the hypervisor will triple-fault. Keeping really large variables like cpumasks off the stack is key to making sure we don't get close to that. Actually here you talk about cpumask_t-s on the stack. cpumask_var_t-s aren't a problem stack size wise, but are an issue due to the need to dynamically allocate them (and the potential failure thereof) when the hypervisor was built for a large enough number of CPUs. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 3/7] xen: rework locking for dump of scheduler info (debug-key r)
On 17.03.15 at 12:32, george.dun...@eu.citrix.com wrote: On 03/17/2015 11:25 AM, Jan Beulich wrote: On 17.03.15 at 12:05, george.dun...@eu.citrix.com wrote: On 03/17/2015 10:54 AM, Jan Beulich wrote: Finally, as said in different contexts earlier, I think unconditionally acquiring locks in dumping routines isn't the best practice. At least in non-debug builds I think these should be try-locks only, skipping the dumping when a lock is busy. You mean so that we don't block the console if there turns out to be a deadlock? For example. And also to not unduly get in the way of an otherwise extremely busy system. I don't understand this last argument. If you're using the debug keys, you want to know about the state of the system. I would much rather my system ran 25% slower for the 5 seconds the debug key was dumping information, and have a complete snapshot of the system, than for it to only run 10% slower and to have half the information missing. The upshot of missing information would likely be that I have to press the debug key 3-4 times in a row, meaning I'd be running 10% slower for 20 seconds rather than 25% slower for 5 seconds. Yes, I understand this, and in many cases this is the perspective to take. Yet I've been in the situation where suggesting the use of debug keys to learn something about a (partially) live locked system would have had the risk of causing further corruption to it, and hence a more careful state dumping approach would have been desirable. All in all, I don't think the performance of the debug keys should be a major concern. The only thing I'd be worried about is making the system as diagnosable as possible if things have already gone pear-shaped (e.g., if there's a deadlock). It's not their performance that's of concern, but the effect they may have on the performance (or even correctness - see how many process_pending_softirqs() calls we had to sprinkle around over the years) of other code. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCHv1] xen/balloon: disable memory hotplug in PV guests
On Mon, Mar 16, 2015 at 11:31:49AM +0100, Juergen Gross wrote: On 03/16/2015 11:03 AM, Daniel Kiper wrote: On Mon, Mar 16, 2015 at 06:35:04AM +0100, Juergen Gross wrote: On 03/11/2015 04:40 PM, Boris Ostrovsky wrote: On 03/11/2015 10:42 AM, David Vrabel wrote: On 10/03/15 13:35, Boris Ostrovsky wrote: On 03/10/2015 07:40 AM, David Vrabel wrote: On 09/03/15 14:10, David Vrabel wrote: Memory hotplug doesn't work with PV guests because: a) The p2m cannot be expanded to cover the new sections. Broken by 054954eb051f35e74b75a566a96fe756015352c8 (xen: switch to linear virtual mapped sparse p2m list). This one would be non-trivial to fix. We'd need a sparse set of vm_area's for the p2m or similar. b) add_memory() builds page tables for the new sections which means the new pages must have valid p2m entries (or a BUG occurs). After some more testing this appears to be broken by: 25b884a83d487fd62c3de7ac1ab5549979188482 (x86/xen: set regions above the end of RAM as 1:1) included 3.16. This one can be trivially fixed by setting the new sections in the p2m to INVALID_P2M_ENTRY before calling add_memory(). Have you tried 3.17? As I said yesterday, it worked for me (with 4.4 Xen). No. But there are three bugs that prevent it from working in 3.16+ so I'm really not sure how you had a working in a 3.17 PV guest. This is what I have: [build@build-mk2 linux-boris]$ ssh root@tst008 cat /mnt/lab/bootstrap-x86_64/test_small.xm extra=console=hvc0 debug earlyprintk=xen kernel=/mnt/lab/bootstrap-x86_64/vmlinuz ramdisk=/mnt/lab/bootstrap-x86_64/initramfs.cpio.gz memory=1024 maxmem = 4096 vcpus=1 maxvcpus=3 name=bootstrap-x86_64 on_crash=preserve vif = [ 'mac=00:0F:4B:00:00:68, bridge=switch' ] vnc=1 vnclisten=0.0.0.0 disk=['phy:/dev/guests/bootstrap-x86_64,xvda,w'] [build@build-mk2 linux-boris]$ ssh root@tst008 xl create /mnt/lab/bootstrap-x86_64/test_small.xm Parsing config from /mnt/lab/bootstrap-x86_64/test_small.xm [build@build-mk2 linux-boris]$ ssh root@tst008 xl list |grep bootstrap-x86_64 bootstrap-x86_64 2 1024 1 -b 5.4 [build@build-mk2 linux-boris]$ ssh root@g-pvops uname -r 3.17.0upstream [build@build-mk2 linux-boris]$ ssh root@g-pvops dmesg|grep paravirtualized [0.00] Booting paravirtualized kernel on Xen [build@build-mk2 linux-boris]$ ssh root@g-pvops grep MemTotal /proc/meminfo MemTotal: 968036 kB [build@build-mk2 linux-boris]$ ssh root@tst008 xl mem-set bootstrap-x86_64 2048 [build@build-mk2 linux-boris]$ ssh root@tst008 xl list |grep bootstrap-x86_64 bootstrap-x86_64 2 2048 1 -b 5.7 [build@build-mk2 linux-boris]$ ssh root@g-pvops grep MemTotal /proc/meminfo MemTotal:2016612 kB [build@build-mk2 linux-boris]$ Regardless, it definitely doesn't work now because of the linear p2m change. What do you want to do about this? Since backing out p2m changes is not an option I guess your patch is the only short-term alternative. But this still looks like a regression so perhaps Juergen can take a look to see how it can be fixed. Hmm, the p2m list is allocated for the maximum memory size of the domain which is obtained from the hypervisor. In case of Dom0 it is read via XENMEM_maximum_reservation, for a domU it is based on the E820 memory map read via XENMEM_memory_map. I just tested it with a 4.0-rc1 domU kernel with 512MB initial memory and 4GB of maxmem. The E820 map looked like this: [0.00] Xen: [mem 0x-0x0009] usable [0.00] Xen: [mem 0x000a-0x000f] reserved [0.00] Xen: [mem 0x0010-0x] usable So the complete 4GB were included, like they should. The resulting p2m list is allocated in the needed size: [0.00] p2m virtual area at c900, size is 80 So what is your problem here? Can you post the E820 map and the p2m map info for your failing domain, please? If you use memory hotplug then maxmem is not a limit from guest kernel point of view (host still must allow that operation but it is another not related issue). The problem is that p2m must be dynamically expendable to support it. Earlier implementation supported that thing and memory hotplug worked without any issue. Okay, now I get it. The problem with the earlier p2m implementation was that it was expendable to support only up to 512GB of RAM. So we need some way to tell the kernel how much virtual memory it should reserve for the p2m list if memory hotplug is enabled. We could: a) use a configurable maximum (e.g. for 512GB RAM as today) b) use the maximum of RAM the machine the domain is started on can ever have (what about migration then?) Memory hotplug for Xen is planned, so, this would not work. c) use a kernel parameter specifying the maximum memory size to support d) a
Re: [Xen-devel] PVH DomU panics on boot on Xen 4.5.0 whereas it was fine on 4.4.1
On Mon, Mar 16, 2015 at 11:08:50PM +, Ian Murray wrote: On 16/03/15 14:12, Konrad Rzeszutek Wilk wrote: On Sun, Mar 15, 2015 at 09:34:16PM +, Ian Murray wrote: Hi, I have a domU guest that booted fine under Xen 4.4.1 with pvh=1 but now fails to boot with it under Xen 4.5.0. Removing pvh=1, i.e. booting it as traditional PV results in it booting fine. The only odd thing is that I had to compile with debug=y on Config.mk to avoid a compiler warning that was causing compilation to fail outright. I will create another mail about that. DomU is Ubuntu 14.10 and Dom0 is 12.04.5 there were some incompatible changes in xen 4.5 in regards to PVH which were then updated in Linux 3.19 (or was it 3.18?) I would recommend you rev up to the latest version of Linux. I tried the mainline support kernel for Ubuntu (GNU/Linux 3.19.1-031901-generic x86_64) and it booted fine. Thanks for the assistance and happy to assist if anyone wants to treat it as a regression. Nah, it is labelled 'expermintal' for that exact reason - as we did realized we made a mistake in Xen 4.4 that we ended up fixing in Xen 4.5 - and the fixed it in Linux. Sorry thought that it was not widely mentioned and it made your day a bit sad. Was there an specific webpage you looked first for help? (Asking so I can at least edit it to mention this). Here is the boot output when booting with pvh=1 [0.00] Initializing cgroup subsys cpuset [0.00] Initializing cgroup subsys cpu [0.00] Initializing cgroup subsys cpuacct [0.00] Linux version 3.16.0-31-generic (buildd@batsu) (gcc version 4.9.1 (Ubuntu 4.9.1-16ubuntu6) ) #41-Ubuntu SMP Tue Feb 10 15:24:04 UTC 2015 (Ubuntu 3.16.0-31.41-generic 3.16.7-ckt5) [0.00] Command line: root=UUID=edfcef2a-dcf1-4c77-ad69-22456606702e ro nomodeset xen-fbfront.video=16,1024,768 [0.00] KERNEL supported cpus: [0.00] Intel GenuineIntel [0.00] AMD AuthenticAMD [0.00] Centaur CentaurHauls [0.00] ACPI in unprivileged domain disabled [0.00] e820: BIOS-provided physical RAM map: [0.00] Xen: [mem 0x-0x3fff] usable [0.00] NX (Execute Disable) protection: active [0.00] DMI not present or invalid. [0.00] AGP: No AGP bridge found [0.00] e820: last_pfn = 0x4 max_arch_pfn = 0x4 [0.00] Scanning 1 areas for low memory corruption [0.00] init_memory_mapping: [mem 0x-0x000f] [0.00] init_memory_mapping: [mem 0x3fe0-0x3fff] [0.00] init_memory_mapping: [mem 0x3c00-0x3fdf] [0.00] init_memory_mapping: [mem 0x0010-0x3bff] [0.00] RAMDISK: [mem 0x023f6000-0x0589] [0.00] NUMA turned off [0.00] Faking a node at [mem 0x-0x3fff] [0.00] Initmem setup node 0 [mem 0x-0x3fff] [0.00] NODE_DATA [mem 0x3fffb000-0x3fff] [0.00] Zone ranges: [0.00] DMA [mem 0x1000-0x00ff] [0.00] DMA32[mem 0x0100-0x] [0.00] Normal empty [0.00] Movable zone start for each node [0.00] Early memory node ranges [0.00] node 0: [mem 0x1000-0x0009] [0.00] node 0: [mem 0x0010-0x3fff] [0.00] SFI: Simple Firmware Interface v0.81 http://simplefirmware.org [0.00] smpboot: Allowing 2 CPUs, 0 hotplug CPUs [0.00] PM: Registered nosave memory: [mem 0x000a-0x000f] [0.00] e820: [mem 0x4000-0x] available for PCI devices [0.00] Booting paravirtualized kernel with PVH extensions on Xen [0.00] Xen version: 4.5.0 [0.00] setup_percpu: NR_CPUS:256 nr_cpumask_bits:256 nr_cpu_ids:2 nr_node_ids:1 [0.00] PERCPU: Embedded 28 pages/cpu @88003fc0 s83328 r8192 d23168 u1048576 [0.00] Built 1 zonelists in Node order, mobility grouping on. Total pages: 257930 [0.00] Policy zone: DMA32 [0.00] Kernel command line: root=UUID=edfcef2a-dcf1-4c77-ad69-22456606702e ro nomodeset xen-fbfront.video=16,1024,768 [0.00] PID hash table entries: 4096 (order: 3, 32768 bytes) [0.00] xsave: enabled xstate_bv 0x3, cntxt size 0x240 [0.00] AGP: Checking aperture... [0.00] AGP: No AGP bridge found [0.00] Memory: 958828K/1048188K available (7743K kernel code, 1197K rwdata, 3612K rodata, 1360K init, 1308K bss, 89360K reserved) [0.00] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=2, Nodes=1 [0.00] Hierarchical RCU implementation. [0.00] RCU dyntick-idle grace-period acceleration is enabled. [0.00] RCU restricting CPUs from NR_CPUS=256 to nr_cpu_ids=2. [0.00] Offload RCU
Re: [Xen-devel] OpenStack - Libvirt+Xen CI overview
On Tue, Mar 10, 2015 at 12:03 PM, Bob Ball bob.b...@citrix.com wrote: For the last few weeks Anthony and I have been working on creating a CI environment to run against all OpenStack jobs. We're now in a position where we can share the current status, overview of how it works and next steps. We actively want to support involvement in this effort from others with an interest in libvirt+Xen's openstack integration. The CI we have set up is follow the recommendations made by the OpenStack official infrastructure maintainers, and reproduces a notable portion of the official OpenStack CI environment to run these tests. Namely this setup is using: - Puppet to deploy the master node - Zuul to watch for code changes uploaded to review.openstack.org - Jenkins job builder to create Jenkins job definitions from a YAML file - Nodepool to automatically create single-use virtual machines in the Rackspace public cloud - Devstack-gate to run Tempest tests in serial More information on Zuul, JJB, Nodepool and devstack-gate is available through http://ci.openstack.org The current status is that we have a zuul instance monitoring for jobs and adding them to the queue of jobs to be run at http://zuul.openstack.xenproject.org/ In the background Nodepool provisions virtual machines into a pool of nodes ready to be used. All ready nodes are automatically added to Jenkins (https://jenkins.openstack.xenproject.org/), and then Zuul+Jenkins will trigger a particular job on a node when one is available. Logs are then uploaded to Rackspace's Cloud Files with sample logs for a passing job at http://logs.openstack.xenproject.org/52/162352/3/silent/dsvm-tempest-xen/da3ff30/index.html I'd like to organise a meeting to walk through the various components of the CI with those who are interested, so this is an initial call to find out who is interested in finding out more! I'd be interested. -George ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 3/7] xen: rework locking for dump of scheduler info (debug-key r)
On Tue, 2015-03-17 at 11:05 +, George Dunlap wrote: On 03/17/2015 10:54 AM, Jan Beulich wrote: Finally, as said in different contexts earlier, I think unconditionally acquiring locks in dumping routines isn't the best practice. At least in non-debug builds I think these should be try-locks only, skipping the dumping when a lock is busy. You mean so that we don't block the console if there turns out to be a deadlock? That makes some sense; but on a busy system that would mean a non-negligible chance that any give keystroke would be missing information about some cpu or other, which would be pretty frustrating for someone trying to figure out the state of their system. Mmm... that's a very good point, actually, which I did not think about. Would it make sense to have a version of spin_trylock for use in this kind of situation that waits retries a reasonable number of times before giving up? Probably. After all, if one comes to the point of sending a debug-key, he most likely want to see the system status at (almost) any cost... Regards, Dario signature.asc Description: This is a digitally signed message part ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v2 2/4] xen: sched: make counters for vCPU sleep and wakeup generic
On 03/17/2015 10:48 AM, Dario Faggioli wrote: On Mon, 2015-03-16 at 17:18 +, George Dunlap wrote: On 02/27/2015 04:51 PM, Dario Faggioli wrote: diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c index ad0a5d4..2b852cc 100644 --- a/xen/common/sched_credit2.c +++ b/xen/common/sched_credit2.c @@ -931,6 +931,7 @@ csched2_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) struct csched2_vcpu * const svc = CSCHED2_VCPU(vc); BUG_ON( is_idle_vcpu(vc) ); +SCHED_STAT_CRANK(vcpu_sleep); if ( per_cpu(schedule_data, vc-processor).curr == vc ) cpu_raise_softirq(vc-processor, SCHEDULE_SOFTIRQ); @@ -956,19 +957,22 @@ csched2_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) BUG_ON( is_idle_vcpu(vc) ); -/* Make sure svc priority mod happens before runq check */ if ( unlikely(per_cpu(schedule_data, vc-processor).curr == vc) ) { +SCHED_STAT_CRANK(vcpu_wake_running); goto out; } - if ( unlikely(__vcpu_on_runq(svc)) ) Does this make the 'if' butt right up against the '{'? Is that bad? Other than that: Acked-by: George Dunlap george.dun...@eu.citrix.com As far as I can see, the series is in: http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=8b0e94da0e23221a6d7ea19bfbd24a407db44de8 and whoever committed it (Jan, probably?), took care of leaving that blank line in place, which I'm fine with, of course. :-) Sweet. :-) -George ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 3/7] xen: rework locking for dump of scheduler info (debug-key r)
On 17.03.15 at 12:14, dario.faggi...@citrix.com wrote: On Tue, 2015-03-17 at 10:54 +, Jan Beulich wrote: Finally, as said in different contexts earlier, I think unconditionally acquiring locks in dumping routines isn't the best practice. At least in non-debug builds I think these should be try-locks only, skipping the dumping when a lock is busy. That makes sense. It also looks, if not completely orthogonal, something that can be done in a follow-up patch wrt this series. I.e., we first move the locking logic where it belongs, and then we rework it toward using _trylock(), does this make sense? Sure, the order doesn't matter all that much. And the keyhandler issue is certainly affecting code elsewhere too, so getting this improved would presumably not be part of a scheduler specific patch series. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 3/7] xen: rework locking for dump of scheduler info (debug-key r)
On 03/17/2015 11:25 AM, Jan Beulich wrote: On 17.03.15 at 12:05, george.dun...@eu.citrix.com wrote: On 03/17/2015 10:54 AM, Jan Beulich wrote: Finally, as said in different contexts earlier, I think unconditionally acquiring locks in dumping routines isn't the best practice. At least in non-debug builds I think these should be try-locks only, skipping the dumping when a lock is busy. You mean so that we don't block the console if there turns out to be a deadlock? For example. And also to not unduly get in the way of an otherwise extremely busy system. I don't understand this last argument. If you're using the debug keys, you want to know about the state of the system. I would much rather my system ran 25% slower for the 5 seconds the debug key was dumping information, and have a complete snapshot of the system, than for it to only run 10% slower and to have half the information missing. The upshot of missing information would likely be that I have to press the debug key 3-4 times in a row, meaning I'd be running 10% slower for 20 seconds rather than 25% slower for 5 seconds. And in any case, the effect of being able to *successfully* grab the private lock is going to have a much larger impact on the system. All in all, I don't think the performance of the debug keys should be a major concern. The only thing I'd be worried about is making the system as diagnosable as possible if things have already gone pear-shaped (e.g., if there's a deadlock). Yes, that might be a possible compromise. I could also imagine another debug key allowing to alter the behavior, i.e. for when one absolutely wants the information and doesn't care about the state of the system. Possible, but it seems like a lot of complication for what it buys you. -George ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] RFC: [PATCH 1/3] Enhance platform support for PCI
On Tuesday 17 March 2015 12:58 PM, Jan Beulich wrote: On 17.03.15 at 06:26, mja...@caviumnetworks.com wrote: In drivers/xen/pci.c on notification BUS_NOTIFY_ADD_DEVICE dom0 issues a hypercall to inform xen that a new pci device has been added. If we were to inform xen about a new pci bus that is added there are 2 ways a) Issue the hypercall from drivers/pci/probe.c b) When a new device is found (BUS_NOTIFY_ADD_DEVICE) issue PHYSDEVOP_pci_device_add hypercall to xen, if xen does not finds that segment number (s_bdf), it will return an error SEG_NO_NOT_FOUND. After that the linux xen code could issue the PHYSDEVOP_pci_host_bridge_add hypercall. I think (b) can be done with minimal code changes. What do you think ? I'm pretty sure (a) would even be refused by the maintainers, unless there already is a notification being sent. As to (b) - kernel code could keep track of which segment/bus pairs it informed Xen about, and hence wouldn't even need to wait for an error to be returned from the device-add request (which in your proposal would need to be re- issued after the host-bridge-add). Have a query on the CFG space address to be passed as hypercall parameter. The of_pci_get_host_bridge_resource only parses the ranges property and not reg. reg property has the CFG space address, which is usually stored in private pci host controller driver structures. so pci_dev 's parent pci_bus would not have that info. One way is to add a method in struct pci_ops but not sure it will be accepted or not. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 3/7] xen: rework locking for dump of scheduler info (debug-key r)
On 03/17/2015 11:43 AM, Jan Beulich wrote: On 17.03.15 at 12:32, george.dun...@eu.citrix.com wrote: On 03/17/2015 11:25 AM, Jan Beulich wrote: On 17.03.15 at 12:05, george.dun...@eu.citrix.com wrote: On 03/17/2015 10:54 AM, Jan Beulich wrote: Finally, as said in different contexts earlier, I think unconditionally acquiring locks in dumping routines isn't the best practice. At least in non-debug builds I think these should be try-locks only, skipping the dumping when a lock is busy. You mean so that we don't block the console if there turns out to be a deadlock? For example. And also to not unduly get in the way of an otherwise extremely busy system. I don't understand this last argument. If you're using the debug keys, you want to know about the state of the system. I would much rather my system ran 25% slower for the 5 seconds the debug key was dumping information, and have a complete snapshot of the system, than for it to only run 10% slower and to have half the information missing. The upshot of missing information would likely be that I have to press the debug key 3-4 times in a row, meaning I'd be running 10% slower for 20 seconds rather than 25% slower for 5 seconds. Yes, I understand this, and in many cases this is the perspective to take. Yet I've been in the situation where suggesting the use of debug keys to learn something about a (partially) live locked system would have had the risk of causing further corruption to it, and hence a more careful state dumping approach would have been desirable. All in all, I don't think the performance of the debug keys should be a major concern. The only thing I'd be worried about is making the system as diagnosable as possible if things have already gone pear-shaped (e.g., if there's a deadlock). It's not their performance that's of concern, but the effect they may have on the performance (or even correctness - see how many process_pending_softirqs() calls we had to sprinkle around over the years) of other code. So it sounds like maybe we're actually on the same page, but are using words slightly differently. :-) It sounds like we agree that the ability to tread carefully on a system which may be having trouble, in order not to make it worse, is important. For instance, not wedging the serial console behind a deadlocked lock, and not further corrupting a system that had gotten itself wedged in livelock. Those are things I would classify under correctness and/or diagnosis. When I say performance is not a concern, I mean it does not concern me that someone's web page loads 25% slower for the five seconds it takes to dump the information. If delaying other parts of the system causes the system to get wedged or crash, that's obviously a problem. -George ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] RFC: [PATCH 1/3] Enhance platform support for PCI
On 17.03.15 at 13:06, mja...@caviumnetworks.com wrote: On Tuesday 17 March 2015 12:58 PM, Jan Beulich wrote: On 17.03.15 at 06:26, mja...@caviumnetworks.com wrote: In drivers/xen/pci.c on notification BUS_NOTIFY_ADD_DEVICE dom0 issues a hypercall to inform xen that a new pci device has been added. If we were to inform xen about a new pci bus that is added there are 2 ways a) Issue the hypercall from drivers/pci/probe.c b) When a new device is found (BUS_NOTIFY_ADD_DEVICE) issue PHYSDEVOP_pci_device_add hypercall to xen, if xen does not finds that segment number (s_bdf), it will return an error SEG_NO_NOT_FOUND. After that the linux xen code could issue the PHYSDEVOP_pci_host_bridge_add hypercall. I think (b) can be done with minimal code changes. What do you think ? I'm pretty sure (a) would even be refused by the maintainers, unless there already is a notification being sent. As to (b) - kernel code could keep track of which segment/bus pairs it informed Xen about, and hence wouldn't even need to wait for an error to be returned from the device-add request (which in your proposal would need to be re- issued after the host-bridge-add). Have a query on the CFG space address to be passed as hypercall parameter. The of_pci_get_host_bridge_resource only parses the ranges property and not reg. reg property has the CFG space address, which is usually stored in private pci host controller driver structures. so pci_dev 's parent pci_bus would not have that info. One way is to add a method in struct pci_ops but not sure it will be accepted or not. I'm afraid I don't understand what you're trying to tell me. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 3/7] xen: rework locking for dump of scheduler info (debug-key r)
On 17.03.15 at 12:05, george.dun...@eu.citrix.com wrote: On 03/17/2015 10:54 AM, Jan Beulich wrote: Finally, as said in different contexts earlier, I think unconditionally acquiring locks in dumping routines isn't the best practice. At least in non-debug builds I think these should be try-locks only, skipping the dumping when a lock is busy. You mean so that we don't block the console if there turns out to be a deadlock? For example. And also to not unduly get in the way of an otherwise extremely busy system. That makes some sense; but on a busy system that would mean a non-negligible chance that any give keystroke would be missing information about some cpu or other, which would be pretty frustrating for someone trying to figure out the state of their system. Right - ideally there would be an indication of the skipped state left in the log. Would it make sense to have a version of spin_trylock for use in this kind of situation that waits retries a reasonable number of times before giving up? Yes, that might be a possible compromise. I could also imagine another debug key allowing to alter the behavior, i.e. for when one absolutely wants the information and doesn't care about the state of the system. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v7] sndif: add ABI for Para-virtual sound
On Thu, 2015-03-12 at 18:14 +, Lars Kurth wrote: Hi,I nearly missed this. Please make sure you forward stuff and change the headline if you want me to look into things. Otherwise I may miss it. Sure, I'll try and remember. FYI before Ian J went away he mentioned that he had raised some questions/issues (either on this or a previous version) which had not yet been answered (or maybe not answered to his satisfaction, I'm not sure) but that if those were addressed he would take a look with a view to acking the interface for inclusion in xen.git. (I've not looked in the threads for it, so I don't know the exact state). From my perspective, this exactly the kind of scenario why we created the embedded / automotive subproject, with an option to store code in repos owned by the project. Given that the primary use-case of these drivers is embedded / automotive, my suggestion would be to 1.a) Use a repo in the embedded / automotive pv driver subproject to host the spec - but use a file system structure that matches the xen tree 1.b) I would assume there would be one back-end and several front-ends for these drivers and some would eventually appear in trees owned by the embedded / automotive pv driver subproject In this case, the maintainer responsibility would fall to members of the embedded / automotive pv driver subproject. Once there are several implementations, and enough people with skills to review we can re-visit where the spec and drivers live. We can have a discussion about criteria of when to move, but I don't think that makes a lot of sense. I think the concerns that need to be addressed are: 2.a) Enough skills to review the code / protocols from different stake-holders - this should happen with time, once the spec and code are there. And of course once the embedded / automotive pv driver subproject graduates, that will also give extra weight to its maintainers in the wider community 2.b) Of course if there was a strong case that PV sound drivers are extremely useful for core data centre use-cases, I would probably suggest another approach Maybe 2.b) needs to be checked with Intel folks - there may be some sound requirement for XenGT Would this work as a way forward? I think the main things which is missing is some decision as to the the point at which we would consider the ABI for a PV protocol fixed, i.e. to be maintained in a backwards compatible manner from then on. That's of particular importance when one end of the pair is implemented in external projects (e.g. OS driver frontends). If the interface is not declared stable then changes would be allowed which would invalidate those drivers. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 18/18] x86: add multiboot2 protocol support for EFI platforms
On Tue, Mar 17, 2015 at 10:32:01AM +, Jan Beulich wrote: On 30.01.15 at 18:54, daniel.ki...@oracle.com wrote: @@ -94,6 +111,17 @@ ENTRY(start) gdt_boot_descr: .word 6*8-1 .long sym_phys(trampoline_gdt) +.long 0 /* Needed for 64-bit lgdt */ + +cs32_switch_addr: +.long sym_phys(cs32_switch) +.long BOOT_CS32 + +efi_st: +.quad 0 + +efi_ih: +.quad 0 .Lbad_cpu_msg: .asciz ERR: Not a 64-bit CPU! .Lbad_ldr_msg: .asciz ERR: Not a Multiboot bootloader! @@ -124,6 +152,133 @@ print_err: .Lhalt: hlt jmp .Lhalt +.code64 + +__efi64_start: +cld + +/* Bootloaders may set multiboot[12].mem_lower to a nonzero value */ +xor %edx,%edx + +/* Check for Multiboot2 bootloader */ +cmp $MULTIBOOT2_BOOTLOADER_MAGIC,%eax +je efi_multiboot2_proto + +jmp not_multiboot + +efi_multiboot2_proto: jne not_multiboot (and efi_multiboot2_proto dropped altogether) [...] Jan, thanks for your comments. Now I am working on relocatable early Xen code. I hope that I will finish that this week and start tests on this crazy UEFI platforms. Then I get back to this patch series and replay for your and other guys comments. Daniel ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] OpenStack - Libvirt+Xen CI overview
On Mon, 2015-03-16 at 14:00 -0400, Konrad Rzeszutek Wilk wrote: On Tue, Mar 10, 2015 at 12:03:13PM +, Bob Ball wrote: I'd like to organise a meeting to walk through the various components of the CI with those who are interested, so this is an initial call to find out who is interested in finding out more! I am always interested in it. I know next to nothing about OpenStack, and would be interested in attending something like this to improve that. :-) Regards, Dario signature.asc Description: This is a digitally signed message part ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v7] sndif: add ABI for Para-virtual sound
On Tue, 17 Mar 2015, Ian Campbell wrote: On Thu, 2015-03-12 at 18:14 +, Lars Kurth wrote: Hi,I nearly missed this. Please make sure you forward stuff and change the headline if you want me to look into things. Otherwise I may miss it. Sure, I'll try and remember. FYI before Ian J went away he mentioned that he had raised some questions/issues (either on this or a previous version) which had not yet been answered (or maybe not answered to his satisfaction, I'm not sure) but that if those were addressed he would take a look with a view to acking the interface for inclusion in xen.git. (I've not looked in the threads for it, so I don't know the exact state). From my perspective, this exactly the kind of scenario why we created the embedded / automotive subproject, with an option to store code in repos owned by the project. Given that the primary use-case of these drivers is embedded / automotive, my suggestion would be to 1.a) Use a repo in the embedded / automotive pv driver subproject to host the spec - but use a file system structure that matches the xen tree 1.b) I would assume there would be one back-end and several front-ends for these drivers and some would eventually appear in trees owned by the embedded / automotive pv driver subproject In this case, the maintainer responsibility would fall to members of the embedded / automotive pv driver subproject. Once there are several implementations, and enough people with skills to review we can re-visit where the spec and drivers live. We can have a discussion about criteria of when to move, but I don't think that makes a lot of sense. I think the concerns that need to be addressed are: 2.a) Enough skills to review the code / protocols from different stake-holders - this should happen with time, once the spec and code are there. And of course once the embedded / automotive pv driver subproject graduates, that will also give extra weight to its maintainers in the wider community 2.b) Of course if there was a strong case that PV sound drivers are extremely useful for core data centre use-cases, I would probably suggest another approach Maybe 2.b) needs to be checked with Intel folks - there may be some sound requirement for XenGT Would this work as a way forward? I think the main things which is missing is some decision as to the the point at which we would consider the ABI for a PV protocol fixed, i.e. to be maintained in a backwards compatible manner from then on. That's of particular importance when one end of the pair is implemented in external projects (e.g. OS driver frontends). If the interface is not declared stable then changes would be allowed which would invalidate those drivers. I think that you are right. Declaring the interface stable or unstable is far more important than where the code or the spec lives. If we formally specified within the spec that the ABI is not maintained for backward compatibility, the bar for acceptance in xen-unstable would be far lower. Maybe the spec could even be accepted as is if nobody has any comments? ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] RFC: [PATCH 1/3] Enhance platform support for PCI
On Tue, Mar 17, 2015 at 10:56:48AM +0530, Manish Jaggi wrote: On Friday 27 February 2015 10:20 PM, Ian Campbell wrote: On Fri, 2015-02-27 at 16:35 +, Jan Beulich wrote: On 27.02.15 at 16:24, ian.campb...@citrix.com wrote: On Fri, 2015-02-27 at 14:54 +, Stefano Stabellini wrote: MMCFG is a Linux config option, not to be confused with PHYSDEVOP_pci_mmcfg_reserved that is a Xen hypercall interface. I don't think that the way Linux (or FreeBSD) call PHYSDEVOP_pci_mmcfg_reserved is relevant. My (possibly flawed) understanding was that pci_mmcfg_reserved was intended to propagate the result of dom0 parsing some firmware table or other to the hypevisor. That's not flawed at all. I think that's a first in this thread ;-) In Linux dom0 we call it walking pci_mmcfg_list, which looking at arch/x86/pci/mmconfig-shared.c pci_parse_mcfg is populated by walking over a struct acpi_table_mcfg (there also appears to be a bunch of processor family derived entries, which I guess are quirks of some sort). Right - this parses ACPI tables (plus applies some knowledge about certain specific systems/chipsets/CPUs) and verifies that the space needed for the MMCFG region is properly reserved either in E820 or in the ACPI specified resources (only if so Linux decides to use MMCFG and consequently also tells Xen that it may use it). Thanks. So I think what I wrote in 1424948710.14641.25.ca...@citrix.com applies as is to Device Tree based ARM devices, including the need for the PHYSDEVOP_pci_host_bridge_add call. On ACPI based devices we will have the MCFG table, and things follow much as for x86: * Xen should parse MCFG to discover the PCI host-bridges * Dom0 should do likewise and call PHYSDEVOP_pci_mmcfg_reserved in the same way as Xen/x86 does. The SBSA, an ARM standard for servers, mandates various things which we can rely on here because ACPI on ARM requires an SBSA compliant system. So things like odd quirks in PCI controllers or magic setup are spec'd out of our zone of caring (into the firmware I suppose), hence there is nothing like the DT_DEVICE_START stuff to register specific drivers etc. The PHYSDEVOP_pci_host_bridge_add call is not AFAICT needed on ACPI ARM systems (any more than it is on x86). We can decide whether to omit it from dom0 or ignore it from Xen later on. (Manish, this is FYI, I don't expect you to implement ACPI support!) In drivers/xen/pci.c on notification BUS_NOTIFY_ADD_DEVICE dom0 issues a hypercall to inform xen that a new pci device has been added. If we were to inform xen about a new pci bus that is added there are 2 ways a) Issue the hypercall from drivers/pci/probe.c b) When a new device is found (BUS_NOTIFY_ADD_DEVICE) issue PHYSDEVOP_pci_device_add hypercall to xen, if xen does not finds that segment number (s_bdf), it will return an error SEG_NO_NOT_FOUND. After that the linux xen code could issue the PHYSDEVOP_pci_host_bridge_add hypercall. Couldn't the code figure out from 'struct pci_dev' whether the device is a bridge or an PCI device? And then do the proper hypercall? Interesting thing you _might_ hit (that I did) was that if you use 'bus=reassign' which re-assigns the bus numbers during scan - Xen gets very very confused. As in, the bus devices that Xen sees vs the ones Linux sees are different. Whether you will encounter this depends on whether the bridge devices and pci devices end up having an differnet bus number from what Xen scanned, and from what Linux has determined. (As in, Linux has found a bridge device with more PCI devices -so it repograms the bridge which moves all of the other PCI devices below it by X number). The reason I am bringing it up - it sounds like Xen will have no clue about some devices - and be told about it by Linux - if some reason it has the same bus number as some that Xen already scanned - gah! I think (b) can be done with minimal code changes. What do you think ? Less code == better. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v7] sndif: add ABI for Para-virtual sound
On Tue, 2015-03-17 at 13:05 +, Lars Kurth wrote: On 17 Mar 2015, at 11:40, Ian Campbell ian.campb...@citrix.com wrote: On Thu, 2015-03-12 at 18:14 +, Lars Kurth wrote: Hi,I nearly missed this. Please make sure you forward stuff and change the headline if you want me to look into things. Otherwise I may miss it. Sure, I'll try and remember. FYI before Ian J went away he mentioned that he had raised some questions/issues (either on this or a previous version) which had not yet been answered (or maybe not answered to his satisfaction, I'm not sure) but that if those were addressed he would take a look with a view to acking the interface for inclusion in xen.git. OK. So this means there are some concrete lose ends, which need to be followed up on. I also remember that there was a discussion on how we should specify protocols, which does not appear to have fully concluded either. Would this work as a way forward? I think the main things which is missing is some decision as to the the point at which we would consider the ABI for a PV protocol fixed, i.e. to be maintained in a backwards compatible manner from then on. What do we do with new APIs in such situations? We review then carefully and hope we get them right. We manage to get this right at least some of the time because many of us are familiar with the issues WRT e.g. memory management hypercalls. This is what I was getting at with people are naturally a bit cautious about creating new ABIs, which must be maintained long term, for types of device with which they are not really familiar. in my initial mail. The which they are not really familiar is pretty key. It's also (normally) not too hard to add a new hypercall fixing a shortcoming in an existing one while retaining backwards compat, compared with doing that for an I/O protocol (see: netchannel2). In the I/O case adding extensions also is reasonably well understood and something we manage, but fixing a core issue is much harder (see: the non-uniformity of the blk protocol over different architectures, or the ring space wastage due to various power of two requirements, neither of which can realistically be properly fixed). Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH RFC V2 3/5] libxl: add pvusb API
Hi Chunyan, I've found another problem while trying to write a qemu based pvUSB backend. On 01/19/2015 09:28 AM, Chunyan Liu wrote: Add pvusb APIs, including: - attach/detach (create/destroy) virtual usb controller. - attach/detach usb device - list assignable usb devices in host - some other helper functions Signed-off-by: Chunyan Liu cy...@suse.com Signed-off-by: Simon Cao caobosi...@gmail.com --- ... diff --git a/tools/libxl/libxl_usb.c b/tools/libxl/libxl_usb.c new file mode 100644 index 000..830a846 --- /dev/null +++ b/tools/libxl/libxl_usb.c ... +/* xenstore usb data */ +static int libxl__device_usb_add_xenstore(libxl__gc *gc, uint32_t domid, + libxl_device_usb *usb) +{ +libxl_ctx *ctx = CTX; +char *be_path; +int rc; +libxl_domain_config d_config; +libxl_device_usb usb_saved; +libxl__domain_userdata_lock *lock = NULL; + +libxl_domain_config_init(d_config); +libxl_device_usb_init(usb_saved); +libxl_device_usb_copy(CTX, usb_saved, usb); + +be_path = libxl__sprintf(gc, %s/backend/vusb/%d/%d, +libxl__xs_get_dompath(gc, 0), domid, usb-ctrl); +if (libxl__wait_for_backend(gc, be_path, 4) 0) { Don't do this! That's the reason I had to change my backend driver in order to support assignment of a usb device via config file. Normally the backend will witch to state 4 only after the frontend is started. You can just remove waiting for the backend here. The backend has to check all ports when it is changing is state to 4 (connected). +rc = ERROR_FAIL; +goto out; +} + +lock = libxl__lock_domain_userdata(gc, domid); +if (!lock) { +rc = ERROR_LOCK_FAIL; +goto out; +} + +rc = libxl__get_domain_configuration(gc, domid, d_config); +if (rc) goto out; + +DEVICE_ADD(usb, usbs, domid, usb_saved, COMPARE_USB, d_config); + +rc = libxl__set_domain_configuration(gc, domid, d_config); +if (rc) goto out; + +be_path = libxl__sprintf(gc, %s/port/%d, be_path, usb-port); +LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, Adding new usb device to xenstore); +if (libxl__xs_write_checked(gc, XBT_NULL, be_path, usb-intf)) { +rc = ERROR_FAIL; +goto out; +} + +rc = 0; + +out: +if (lock) libxl__unlock_domain_userdata(lock); +libxl_device_usb_dispose(usb_saved); +libxl_domain_config_dispose(d_config); +return rc; + +} + +static int libxl__device_usb_remove_xenstore(libxl__gc *gc, uint32_t domid, + libxl_device_usb *usb) +{ +libxl_ctx *ctx = CTX; +char *be_path; + +be_path = libxl__sprintf(gc, %s/backend/vusb/%d/%d, +libxl__xs_get_dompath(gc, 0), domid, usb-ctrl); +if (libxl__wait_for_backend(gc, be_path, 4) 0) Remove this one, too. Juergen ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH v3 21/24] tools/(lib)xl: Add partial device tree support for ARM
Hi Ian, Sorry for the late answer. On 23/02/15 17:22, Ian Campbell wrote: On Mon, 2015-02-23 at 17:06 +, Julien Grall wrote: On 23/02/15 11:46, Ian Campbell wrote: On Tue, 2015-01-13 at 14:25 +, Julien Grall wrote: Let the user to pass additional nodes to the guest device tree. For this purpose, everything in the node /passthrough from the partial device tree will be copied into the guest device tree. The node /aliases will be also copied to allow the user to define aliases which can be used by the guest kernel. A simple partial device tree will look like: /dts-v1/; / { #address-cells = 2; #size-cells = 2; Are these mandatory/required as implied below, or only the ones inside the passthrough node (which is what I would expect). It's to make DTC quiet. Maybe add /* Keep DTC happy */ to both lines? passthrough { compatible = simple-bus; ranges; #address-cells = 2; #size-cells = 2; /* List of your nodes */ } }; Note that: * The interrupt-parent proporties will be added by the toolstack in properties the root node * The properties compatible, ranges, #address-cells and #size-cells in /passthrough are mandatory. Does ranges need to be the empty form? I think ranges = stuff stuff would be illegal? It's not illegal as long as you correctly use it in the inner reg. OK. This could be explained in some more complete documentaiton I think. (It's a doc day on Wednesday ;-)) Also, I admit that the ranges is confusing to read. Signed-off-by: Julien Grall julien.gr...@linaro.org Cc: Ian Jackson ian.jack...@eu.citrix.com Cc: Wei Liu wei.l...@citrix.com --- Changes in v3: - Patch added --- docs/man/xl.cfg.pod.5 | 7 ++ tools/libxl/libxl_arm.c | 253 tools/libxl/libxl_types.idl | 1 + tools/libxl/xl_cmdimpl.c| 1 + 4 files changed, 262 insertions(+) diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5 index e2f91fc..225b782 100644 --- a/docs/man/xl.cfg.pod.5 +++ b/docs/man/xl.cfg.pod.5 @@ -398,6 +398,13 @@ not emulated. Specify that this domain is a driver domain. This enables certain features needed in order to run a driver domain. +=item Bdevice_tree=PATH + +Specify a partial device tree (compiled via the Device Tree Compiler). +Everything under the node /passthrough will be copied into the guest +device tree. For convenience, the node /aliases is also copied to allow +the user to defined aliases which can be used by the guest kernel. + =back =head2 Devices diff --git a/tools/libxl/libxl_arm.c b/tools/libxl/libxl_arm.c index 53177eb..619458b 100644 --- a/tools/libxl/libxl_arm.c +++ b/tools/libxl/libxl_arm.c @@ -540,6 +540,238 @@ out: } } +static bool check_overrun(uint64_t a, uint64_t b, uint32_t max) +{ +return ((a + b) UINT_MAX || (a + b) max); Both halves here will fail if e.g. a == UINT64_MAX-1 and b == 2, so e..g a+b = UINT_MAX and max. Oops right. To avoid this you should check that a and b are both less than some fraction of UINT64_MAX before the other checks, which would ensure the overflow can't happen, perhaps even UINT32_MAX would be acceptable for this use, depending on the input types involved. max is an uint32_t so a and b should be inferior to UINT32_MAX. by inferior to do you mean less than? Or something to do with type promotion/demotion rules? I meant less than. What about a UINT_MAX b UINT_MAX (a + b) UINT_MAX Isn't that inverted from the sense which the function name requires? Given the complexity in reasoning about this I think a series of individual if and return statements which check each precondition one at a time and return failure if necessary wuold be clearer to read and reason about than trying to encode it all in one expression. Given that we will mark the option unsafe. I'm thinking to drop this check and some others. This would make the code less complex and avoid to check on half of the FDT. diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl index 1214d2e..5651110 100644 --- a/tools/libxl/libxl_types.idl +++ b/tools/libxl/libxl_types.idl @@ -399,6 +399,7 @@ libxl_domain_build_info = Struct(domain_build_info,[ (kernel, string), (cmdline, string), (ramdisk, string), +(device_tree, string), Needs a #define LIBXL_HAVE... in libxl.h Hmmm why? This will be set to empty when libxl_domain_build_info is initialized. So that applications which use libxl can take advantage of the new feature when built against versions of the library which support it, without breaking when built against versions which do not. Hmmm right. I will add the LIBXL_HAVE_PARTIAL_DEVICE_TREE. Regards, -- Julien Grall ___
Re: [Xen-devel] [PATCH] flask/policy: fix static device labeling examples
(CC Ian and Jan) Hi, Is there any blocker to push this patch? It's useful for using XSM with passthrough. Regards, On 11/03/15 14:59, Daniel De Graaf wrote: The definitions of static device labels must be placed at the end of the policy.conf before passing it to checkpolicy; the existing examples (which are commented out) are in the wrong location. Create a new file for device contexts which will place them in the proper location. This also removes some directions about using the xen policy type in checkpolicy which is no longer needed. Reported-by: Julien Grall julien.gr...@linaro.org Signed-off-by: Daniel De Graaf dgde...@tycho.nsa.gov --- docs/misc/xsm-flask.txt | 31 +++ tools/flask/policy/Makefile | 3 ++- tools/flask/policy/policy/device_contexts| 32 +++ tools/flask/policy/policy/modules/xen/xen.te | 38 +++- 4 files changed, 41 insertions(+), 63 deletions(-) create mode 100644 tools/flask/policy/policy/device_contexts diff --git a/docs/misc/xsm-flask.txt b/docs/misc/xsm-flask.txt index ab05913..e169937 100644 --- a/docs/misc/xsm-flask.txt +++ b/docs/misc/xsm-flask.txt @@ -335,33 +335,8 @@ memory, or even changing certain BIOS settings). Dynamic labeling requires that the domain performing the labeling be trusted to label all the devices in the system properly. -To enable static device labeling, a checkpolicy = 2.0.20 and libsepol =2.0.39 -are required. The policy Makefile (tools/flask/policy/Makefile) must also be -changed as follows: - - -# -# Build a binary policy locally -# -$(POLVER): policy.conf -@echo Compiling $(NAME) $(POLVER) - $(QUIET) $(CHECKPOLICY) $^ -o $@(Comment out this line) -# Uncomment line below to enable policies for devices -#$(QUIET) $(CHECKPOLICY) -t Xen $^ -o $@ (Uncomment this line) - - -# -# Install a binary policy -# -$(LOADPATH): policy.conf -@echo Compiling and installing $(NAME) $(LOADPATH) - $(QUIET) $(CHECKPOLICY) $^ -o $@(Comment out this line) -# Uncomment line below to enable policies for devices -#$(QUIET) $(CHECKPOLICY) -t Xen $^ -o $@ (Uncomment this line) - - -IRQs, PCI devices, I/O memory and ports can all be labeled. There are -commented out lines in xen.te policy for examples on how to label devices. +IRQs, PCI devices, I/O memory and x86 IO ports can all have labels defined. +There are examples commented out in tools/flask/policy/policy/device_contexts. Device Labeling --- @@ -378,7 +353,7 @@ lspci output is.. Region 2: I/O ports at ecc0 [size=32] Kernel modules: e1000e -The labeling can be done with these commands +The labeling can be done with these lines in device_contexts: pirqcon 33 system_u:object_r:nicP_t iomemcon 0xfebe0-0xfebff system_u:object_r:nicP_t diff --git a/tools/flask/policy/Makefile b/tools/flask/policy/Makefile index 58d9ce1..e564396 100644 --- a/tools/flask/policy/Makefile +++ b/tools/flask/policy/Makefile @@ -56,6 +56,7 @@ MLSSUPPORT := $(POLDIR)/mls USERS := $(POLDIR)/users CONSTRAINTS := $(POLDIR)/constraints ISID_DEFS := $(POLDIR)/initial_sids +DEV_OCONS := $(POLDIR)/device_contexts # config file paths GLOBALTUN := $(POLDIR)/global_tunables @@ -98,7 +99,7 @@ POLICY_SECTIONS += $(M4SUPPORT) $(MLSSUPPORT) POLICY_SECTIONS += $(ALL_INTERFACES) POLICY_SECTIONS += $(GLOBALTUN) POLICY_SECTIONS += $(ALL_MODULES) -POLICY_SECTIONS += $(USERS) $(CONSTRAINTS) $(ISID_DEFS) +POLICY_SECTIONS += $(USERS) $(CONSTRAINTS) $(ISID_DEFS) $(DEV_OCONS) all: $(POLICY_FILENAME) diff --git a/tools/flask/policy/policy/device_contexts b/tools/flask/policy/policy/device_contexts new file mode 100644 index 000..c2de7e7 --- /dev/null +++ b/tools/flask/policy/policy/device_contexts @@ -0,0 +1,32 @@ +### +# +# Label devices for delegation +# +# The PCI, IRQ, memory, and I/O port ranges are hardware-specific. +# +### + +# label e1000e nic +#pirqcon 33 system_u:object_r:nic_dev_t +#pirqcon 55 system_u:object_r:nic_dev_t +#iomemcon 0xfebe0-0xfebff system_u:object_r:nic_dev_t +#iomemcon 0xfebd9 system_u:object_r:nic_dev_t +#ioportcon 0xecc0-0xecdf system_u:object_r:nic_dev_t +#pcidevicecon 0xc800 system_u:object_r:nic_dev_t + +# label e100 nic +#pirqcon 16 system_u:object_r:nic_dev_t +#iomemcon 0xfe5df system_u:object_r:nic_dev_t +#iomemcon 0xfe5e0-0xfe5ff system_u:object_r:nic_dev_t +#iomemcon 0xc2000-0xc200f system_u:object_r:nic_dev_t +#ioportcon 0xccc0-0xcd00 system_u:object_r:nic_dev_t + +# label usb 1d.0-2 1d.7 +#pirqcon 23 system_u:object_r:nic_dev_t
Re: [Xen-devel] [PATCH v2 2/4] xen/arm: Add GSER region to ThunderX platform mapping
Hi Ian, On Thu, Mar 5, 2015 at 10:40 PM, Ian Campbell ian.campb...@citrix.com wrote: On Thu, 2015-03-05 at 16:46 +, Ian Campbell wrote: On Wed, 2015-03-04 at 11:36 +0530, vijay.kil...@gmail.com wrote: From: Vijaya Kumar K vijaya.ku...@caviumnetworks.com Add GSER region to thunderx platfrom specific mappings. This region is not mentioned in DT. This is required by PCI driver to detect and configure pci devices attached. In future we can remove this mapping, if pci driver in Dom does not require this. How do we know what the PCI driver in dom0 needs? I don't think we can, so we can in effect never remove this specific mapping, which is a shame. Unless you have some scheme in mind which would allow us to do so? IMHO by far the best solution would be to add this device to the DTB so that it is correctly mapped. I'm not quite sure what that will look like since thne mainline DTB doesn't have the PCI node at all. Looking at a more recent DTB which I have access to it seems like 0x87e09000 is correctly covered by a ranges entry on the PCI controller node. Where did you find recent DTB?. AFAIK, this region does not fall under any pci controller range. So I think all which is needed is a) to use this updated DTB and b) my series xen: arm: Parse PCI DT nodes' ranges and interrupt-map from last October which, as it happens, I've been working on bringing up to date yesterday and today (one more thing to clean up before I repost). Because it is not covered under any PCI ranges, your patch series still does not help. Infact, this is common region for SERDES configuration so cannot bind to any particular pci controller range. Regards Vijay ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH] flask/policy: fix static device labeling examples
On 17.03.15 at 14:03, julien.gr...@linaro.org wrote: (CC Ian and Jan) This is mostly about tools stuff: docs/misc/xsm-flask.txt | 31 +++ tools/flask/policy/Makefile | 3 ++- tools/flask/policy/policy/device_contexts| 32 +++ tools/flask/policy/policy/modules/xen/xen.te | 38 +++- 4 files changed, 41 insertions(+), 63 deletions(-) Hence I don't see why you ping me about it. Jan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH RFC V2 4/6] xen: Support for VMCALL mem_events
On 07/11/2014 08:23 PM, Andrew Cooper wrote: On 11/07/14 16:43, Razvan Cojocaru wrote: Added support for VMCALL events (the memory introspection library will have the guest trigger VMCALLs, which will then be sent along via the mem_event mechanism). Changes since V1: - Added a #define and an comment explaining a previous magic constant. - Had MEM_EVENT_REASON_VMCALL explicitly not honour HVMPME_onchangeonly. Signed-off-by: Razvan Cojocaru rcojoc...@bitdefender.com --- xen/arch/x86/hvm/hvm.c |9 + xen/arch/x86/hvm/vmx/vmx.c | 18 +- xen/include/asm-x86/hvm/hvm.h |1 + xen/include/public/hvm/params.h |4 +++- xen/include/public/mem_event.h |5 + 5 files changed, 35 insertions(+), 2 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 89a0382..6e86d7c 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -5564,6 +5564,7 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg) case HVM_PARAM_MEMORY_EVENT_INT3: case HVM_PARAM_MEMORY_EVENT_SINGLE_STEP: case HVM_PARAM_MEMORY_EVENT_MSR: +case HVM_PARAM_MEMORY_EVENT_VMCALL: if ( d == current-domain ) { rc = -EPERM; @@ -6199,6 +6200,14 @@ void hvm_memory_event_msr(unsigned long msr, unsigned long value) value, ~value, 1, msr); } +void hvm_memory_event_vmcall(unsigned long rip, unsigned long eax) +{ +hvm_memory_event_traps(current-domain-arch.hvm_domain + .params[HVM_PARAM_MEMORY_EVENT_VMCALL], + MEM_EVENT_REASON_VMCALL, + rip, ~rip, 1, eax); +} + int hvm_memory_event_int3(unsigned long gla) { uint32_t pfec = PFEC_page_present; diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 2caa04a..6c63225 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -2879,8 +2879,24 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs) case EXIT_REASON_VMCALL: { int rc; +unsigned long eax = regs-eax; + HVMTRACE_1D(VMMCALL, regs-eax); -rc = hvm_do_hypercall(regs); + +/* Don't send a VMCALL mem_event unless something + * caused the guests's eax register to contain the + * VMCALL_EVENT_REQUEST constant. */ +if ( regs-eax != VMCALL_EVENT_REQUEST ) +{ +rc = hvm_do_hypercall(regs); +} +else +{ +hvm_memory_event_vmcall(guest_cpu_user_regs()-eip, eax); +update_guest_eip(); +break; +} Thinking more about this, it is really a hypercall pretending not to be. It would be better to introduce a real HVMOP_send_mem_event. From the point of view of your in-guest agent, it would be a vmcall with rax = 34 (hvmop) rdi = $N (send_mem_event subop) rsi = data or pointer to struct containing data, depending on how exactly you implement the hypercall. You would have the bonus of being able to detect errors, e.g. -ENOENT for mem_event not active, get SVM support for free, and not need magic numbers, or vendor specific terms like vmcall finding their way into the Xen public API. Actually, this only seems to be the case where mode == 8 in hvm_do_hypercall() (xen/arch/x86/hvm/hvm.c): 4987 : hvm_hypercall64_table)[eax](rdi, rsi, rdx, r10, r8, r9); Otherwise (and this seems to be the case with my Xen build), ebx seems to be used for the subop: 5033 regs-_eax = hvm_hypercall32_table[eax](ebx, ecx, edx, esi, edi, ebp); So, ebx needs to be $N (send_mem_event subop), not rdi. Is this intended (rdi in one case and ebx in the other)? Thanks, Razvan ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 7/7] xen: sched_rt: print useful affinity info when dumping
On Mon, 2015-03-16 at 16:30 -0400, Meng Xu wrote: Hi Dario, Hey, 2015-03-16 13:05 GMT-04:00 Dario Faggioli dario.faggi...@citrix.com: This change also takes the chance to add a scratch cpumask, to avoid having to create one more cpumask_var_t on the stack of the dumping routine. Actually, I have a question about the strength of this design. When we have a machine with many cpus, we will end up with allocating a cpumask for each cpu. Just FTR, what we will end up allocating is: - an array of *pointers* to cpumasks with as many elements as the number of pCPUs, - a cpumask *only* for the pCPUs subjected to an instance of the RTDS scheduler. So, for instance, if you have 64 pCPUs, but are using the RTDS scheduler only in a cpupool with 2 pCPUs, you'll have an array of 64 pointers to cpumask_t, but only 2 actual cpumasks. Is this better than having a cpumask_var_t on the stack of the dumping routine, since the dumping routine is not in the hot path? George and Jan replied to this already, I think. Allow me to add just a few words: Such scratch area can be used to kill most of the cpumasks_var_t local variables in other functions in the file, but that is *NOT* done in this chage. This is the point, actually! As said here, this is not only for the sake of the dumping routine. In fact, ideally, someone will, in the near future, go throughout the whole file and kill most of the cpumask_t local variables, and most of the cpumask dynamic allocations, in favour of using this scratch area. @@ -409,6 +423,10 @@ rt_init(struct scheduler *ops) if ( prv == NULL ) return -ENOMEM; +_cpumask_scratch = xmalloc_array(cpumask_var_t, nr_cpu_ids); Is it better to use xzalloc_array? Why? IMO, not really. I'm only free()-ing (in rt_free_pdata()) the elements of the array that have been previously successfully allocated (in rt_alloc_pdata()), so I don't think there is any special requirement for all the elements to be NULL right away. +if ( _cpumask_scratch == NULL ) +return -ENOMEM; + spin_lock_init(prv-lock); INIT_LIST_HEAD(prv-sdom); INIT_LIST_HEAD(prv-runq); @@ -426,6 +444,7 @@ rt_deinit(const struct scheduler *ops) { struct rt_private *prv = rt_priv(ops); +xfree(_cpumask_scratch); xfree(prv); } @@ -443,6 +462,9 @@ rt_alloc_pdata(const struct scheduler *ops, int cpu) per_cpu(schedule_data, cpu).schedule_lock = prv-lock; spin_unlock_irqrestore(prv-lock, flags); +if ( !alloc_cpumask_var(_cpumask_scratch[cpu]) ) Is it better to use zalloc_cpumask_var() here? Nope. It's a scratch area, after all, so one really should not assume it to be in a specific state (e.g., no bits set as you're suggesting) when using it. Thanks and Regards, Dario signature.asc Description: This is a digitally signed message part ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 1/6] x86: detect and initialize Intel CAT feature
On Tue, Mar 17, 2015 at 04:11:33PM +0800, Chao Peng wrote: On Fri, Mar 13, 2015 at 09:40:13AM -0400, Konrad Rzeszutek Wilk wrote: On Fri, Mar 13, 2015 at 06:13:20PM +0800, Chao Peng wrote: Detect Intel Cache Allocation Technology(CAT) feature and store the cpuid information for later use. Currently only L3 cache allocation is supported. The L3 CAT features may vary among sockets so per-socket feature information is stored. The initialization can happen either at boot time or when CPU(s) is hot plugged after booting. Signed-off-by: Chao Peng chao.p.p...@linux.intel.com --- docs/misc/xen-command-line.markdown | 15 +++- xen/arch/x86/psr.c | 151 +--- xen/include/asm-x86/cpufeature.h| 1 + 3 files changed, 155 insertions(+), 12 deletions(-) +cat_cpu_init(smp_processor_id()); Do 'if (!cat_cpu_init(..)).`' as the CPU might not support this. At which point you should also free the cat_socket_info and not register the cpu notifier. Even the booting CPU does not support this, other CPUs may still support this. Generally the feature is a per-socket feature. So break here is not the intention. Oooh, and you did mention that in the git commit description and I dived right in the code - without looking there - sorry for that noise! Thought I am curious - what if all the sockets don't support and the user does try enable it on the command line (user error)? Shouldn't we then figure out that all of the CPUs don't support and xfree cat_socket_info and not register the CPU notifier? Except this, all other comments will be addressed by the next version. thank you! Thanks for your time. Chao +register_cpu_notifier(cpu_nfb); +} + ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 7/7] xen: sched_rt: print useful affinity info when dumping
On Mon, 2015-03-16 at 19:05 +, George Dunlap wrote: On 03/16/2015 05:05 PM, Dario Faggioli wrote: @@ -218,7 +224,6 @@ __q_elem(struct list_head *elem) static void rt_dump_vcpu(const struct scheduler *ops, const struct rt_vcpu *svc) { -char cpustr[1024]; cpumask_t *cpupool_mask; ASSERT(svc != NULL); @@ -229,10 +234,22 @@ rt_dump_vcpu(const struct scheduler *ops, const struct rt_vcpu *svc) return; } -cpumask_scnprintf(cpustr, sizeof(cpustr), svc-vcpu-cpu_hard_affinity); +cpupool_mask = cpupool_scheduler_cpumask(svc-vcpu-domain-cpupool); +/* + * We can't just use 'cpumask_scratch' because the dumping can + * happen from a pCPU outside of this scheduler's cpupool, and + * hence it's not right to use the pCPU's scratch mask (which + * may even not exist!). On the other hand, it is safe to use + * svc-vcpu-processor's own scratch space, since we own the + * runqueue lock. Since we *hold* the lock. Right, thanks. + */ +cpumask_and(_cpumask_scratch[svc-vcpu-processor], cpupool_mask, +svc-vcpu-cpu_hard_affinity); +cpulist_scnprintf(keyhandler_scratch, sizeof(keyhandler_scratch), + _cpumask_scratch[svc-vcpu-processor]); Just a suggestion, would it be worth making a local variable to avoid typing this long thing twice? It probably would. Then you could also put the comment about using the svc-vcpu-processor's scratch space above the place where you set the local variable, while avoiding breaking up the logic of the cpumask operations. I like this, will do. Regards, Dario signature.asc Description: This is a digitally signed message part ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel