In order to analyze PI blocking list operation frequence and obtain the list length, add some relevant events to xentrace and some associated code in xenalyze. Event ASYNC_PI_LIST_DEL may happen in interrupt context, which incurs current assumptions checked in toplevel_assert_check() are not suitable any more. Thus, this patch extends the toplevel_assert_check() to remove such assumptions for events of type ASYNC_PI_LIST_DEL.
Signed-off-by: Chao Gao <chao....@intel.com> --- v4: - trace part of Patch 1 in v3 --- tools/xentrace/formats | 3 + tools/xentrace/xenalyze.c | 154 +++++++++++++++++++++++++++++++++++++--- xen/arch/x86/hvm/vmx/vmx.c | 21 +++++- xen/include/asm-x86/hvm/trace.h | 1 + xen/include/public/trace.h | 5 ++ 5 files changed, 172 insertions(+), 12 deletions(-) diff --git a/tools/xentrace/formats b/tools/xentrace/formats index 8b31780..54e0b11 100644 --- a/tools/xentrace/formats +++ b/tools/xentrace/formats @@ -125,6 +125,9 @@ 0x00082020 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) INTR_WINDOW [ value = 0x%(1)08x ] 0x00082021 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) NPF [ gpa = 0x%(2)08x%(1)08x mfn = 0x%(4)08x%(3)08x qual = 0x%(5)04x p2mt = 0x%(6)04x ] 0x00082023 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) TRAP [ vector = 0x%(1)02x ] +0x00082026 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) PI_LIST_ADD [ domid = 0x%(1)04x vcpu = 0x%(2)04x, pcpu = 0x%(3)04x, #entry = 0x%(4)04x ] + +0x00088001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) ASYNC_PI_LIST_DEL [ domid = 0x%(1)04x vcpu = 0x%(2)04x ] 0x0010f001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) page_grant_map [ domid = %(1)d ] 0x0010f002 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) page_grant_unmap [ domid = %(1)d ] diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c index fa608ad..fbc2429 100644 --- a/tools/xentrace/xenalyze.c +++ b/tools/xentrace/xenalyze.c @@ -296,6 +296,7 @@ struct symbol_struct { }; void error(enum error_level l, struct record_info *ri); +struct vcpu_data * vcpu_find(int did, int vid); void parse_symbol_file(char *fn) { unsigned long long last_addr = 0; @@ -944,6 +945,7 @@ enum { HVM_EVENT_TRAP, HVM_EVENT_TRAP_DEBUG, HVM_EVENT_VLAPIC, + HVM_EVENT_PI_LIST_ADD, HVM_EVENT_HANDLER_MAX }; char * hvm_event_handler_name[HVM_EVENT_HANDLER_MAX] = { @@ -979,13 +981,15 @@ char * hvm_event_handler_name[HVM_EVENT_HANDLER_MAX] = { "realmode_emulate", "trap", "trap_debug", - "vlapic" + "vlapic", + "pi_list_add", }; enum { HVM_VOL_VMENTRY, HVM_VOL_VMEXIT, HVM_VOL_HANDLER, + HVM_VOL_ASYNC, HVM_VOL_MAX }; @@ -1012,6 +1016,7 @@ char *hvm_vol_name[HVM_VOL_MAX] = { [HVM_VOL_VMENTRY]="vmentry", [HVM_VOL_VMEXIT] ="vmexit", [HVM_VOL_HANDLER]="handler", + [HVM_VOL_ASYNC]="async", }; enum { @@ -1337,6 +1342,9 @@ struct hvm_data { struct { struct io_address *mmio, *pio; } io; + struct { + int pi_list_add, pi_list_del; + } pi; } summary; /* In-flight accumulation information */ @@ -1391,6 +1399,9 @@ struct hvm_data { /* Historical info */ tsc_t last_rdtsc; + + /* Destination pcpu of posted interrupt's wakeup interrupt */ + int pi_cpu; }; enum { @@ -1457,6 +1468,8 @@ void init_hvm_data(struct hvm_data *h, struct vcpu_data *v) { } for(i=0; i<GUEST_INTERRUPT_MAX+1; i++) h->summary.guest_interrupt[i].count=0; + + h->pi_cpu = -1; } /* PV data */ @@ -1766,6 +1779,14 @@ char * toplevel_name[TOPLEVEL_MAX] = { [TOPLEVEL_HW]="hw", }; +enum { + SUBLEVEL_HVM_ENTRYEXIT=1, + SUBLEVEL_HVM_HANDLER, + SUBLEVEL_HVM_EMUL, + SUBLEVEL_HVM_ASYNC, + SUBLEVEL_HVM_MAX=SUBLEVEL_HVM_ASYNC+1, +}; + struct trace_volume { unsigned long long toplevel[TOPLEVEL_MAX]; unsigned long long sched_verbose; @@ -1852,6 +1873,9 @@ struct pcpu_info { tsc_t tsc; struct cycle_summary idle, running, lost; } time; + + /* Posted Interrupt List Length */ + int pi_list_length; }; void __fill_in_record_info(struct pcpu_info *p); @@ -4726,6 +4750,71 @@ void hvm_generic_dump(struct record_info *ri, char * prefix) printf(" ]\n"); } +void hvm_pi_list_add_process(struct record_info *ri, struct hvm_data *h) +{ + struct { + int did; + int vid; + int pcpu; + int len; + } *data; + struct vcpu_data *v; + + data = (typeof(data))ri->rec.u.tsc.data; + v = vcpu_find(data->did, data->vid); + if ( !v->hvm.init ) + init_hvm_data(&v->hvm, v); + + if ( opt.dump_all ) + printf("d%uv%u is added to pi blocking list of pcpu%u. " + "The list length is now %d\n", + data->did, data->vid, data->pcpu, data->len); + + v->hvm.pi_cpu = data->pcpu; + v->hvm.summary.pi.pi_list_add++; + if ( data->pcpu > P.max_active_pcpu || !P.pcpu[data->pcpu].active ) + fprintf(stderr, "Strange! pcpu%u is inactive but a vcpu is added" + "to it", data->pcpu); + else if ( P.pcpu[data->pcpu].pi_list_length == -1 ) + P.pcpu[data->pcpu].pi_list_length = data->len; + else if ( data->len != ++P.pcpu[data->pcpu].pi_list_length ) + /* + * Correct pi list length. Removing one vcpu that is already on the + * list before tracing starts would not decrease the pi list length; + * the list length would be inaccuate. + */ + P.pcpu[data->pcpu].pi_list_length = data->len; +} + +void hvm_pi_list_del_process(struct record_info *ri) +{ + struct { + int did; + int vid; + } *data; + struct vcpu_data *v; + + data = (typeof(data))ri->rec.u.tsc.data; + v = vcpu_find(data->did, data->vid); + if ( !v->hvm.init ) + init_hvm_data(&v->hvm, v); + + if ( opt.dump_all ) + { + if ( v->hvm.pi_cpu != -1 ) + printf("d%uv%u is removed from pi blocking list of pcpu%u\n", + data->did, data->vid, v->hvm.pi_cpu); + else + printf("d%uv%u is removed from pi blocking list\n", + data->did, data->vid); + } + + if ( P.pcpu[v->hvm.pi_cpu].pi_list_length != -1 ) + P.pcpu[v->hvm.pi_cpu].pi_list_length--; + v->hvm.pi_cpu = -1; + v->hvm.summary.pi.pi_list_del++; +} + void hvm_handler_process(struct record_info *ri, struct hvm_data *h) { /* Wait for first vmexit to initialize */ if(!h->init) @@ -4763,6 +4852,9 @@ void hvm_handler_process(struct record_info *ri, struct hvm_data *h) { case TRC_HVM_INTR_WINDOW: hvm_intr_window_process(ri, h); break; + case TRC_HVM_PI_LIST_ADD: + hvm_pi_list_add_process(ri, h); + break; case TRC_HVM_OP_DESTROY_PROC: if(h->v->cr3.data) { struct cr3_value_struct *cur = h->v->cr3.data; @@ -4862,7 +4954,6 @@ needs_vmexit: void vcpu_next_update(struct pcpu_info *p, struct vcpu_data *next, tsc_t tsc); void vcpu_prev_update(struct pcpu_info *p, struct vcpu_data *prev, tsc_t tsc, int new_runstate); -struct vcpu_data * vcpu_find(int did, int vid); void lose_vcpu(struct vcpu_data *v, tsc_t tsc); int domain_runstate(struct domain_data *d) { @@ -5267,6 +5358,21 @@ void hvm_process(struct pcpu_info *p) struct vcpu_data *v = p->current; struct hvm_data *h = &v->hvm; + if(ri->evt.sub == 8) + { + UPDATE_VOLUME(p, hvm[HVM_VOL_ASYNC], ri->size); + + switch(ri->event) { + case TRC_HVM_ASYNC_PI_LIST_DEL: + hvm_pi_list_del_process(ri); + break; + + default: + fprintf(warn, "Unknown hvm event: %x\n", ri->event); + } + return; + } + assert(p->current); if(vcpu_set_data_type(p->current, VCPU_DATA_HVM)) @@ -5359,6 +5465,10 @@ void hvm_summary(struct hvm_data *h) { i, h->summary.ipi_count[i]); hvm_io_address_summary(h->summary.io.pio, "IO address summary:"); hvm_io_address_summary(h->summary.io.mmio, "MMIO address summary:"); + + printf("Posted Interrupt:\n"); + printf(" List Add: %u\n", h->summary.pi.pi_list_add); + printf(" List Del: %u\n", h->summary.pi.pi_list_del); } /* ---- Shadow records ---- */ @@ -8962,6 +9072,7 @@ off_t scan_for_new_pcpu(off_t offset) { p->file_offset = offset; p->next_cpu_change_offset = offset; + p->pi_list_length = -1; record_order_insert(p); @@ -9142,7 +9253,6 @@ int find_toplevel_event(struct record_info *ri) return toplevel; } - void process_cpu_change(struct pcpu_info *p) { struct record_info *ri = &p->ri; struct cpu_change_data *r = (typeof(r))ri->d; @@ -9190,6 +9300,7 @@ void process_cpu_change(struct pcpu_info *p) { p2->ri.d = p2->ri.rec.u.notsc.data; p2->file_offset = p->file_offset; p2->next_cpu_change_offset = p->file_offset; + p2->pi_list_length = -1; fprintf(warn, "%s: Activating pcpu %d at offset %lld\n", __func__, r->cpu, (unsigned long long)p->file_offset); @@ -9276,26 +9387,52 @@ void process_cpu_change(struct pcpu_info *p) { } } -struct tl_assert_mask { +struct assert_mask { unsigned p_current:1, - not_idle_domain:1; + not_idle_domain:1, + check_sublevel:1; int vcpu_data_mode; + int sub_max; + struct assert_mask *sub; }; -static struct tl_assert_mask tl_assert_checks[TOPLEVEL_MAX] = { - [TRC_HVM_MAIN]={ .p_current=1, .not_idle_domain=1, .vcpu_data_mode=VCPU_DATA_HVM }, + +static struct assert_mask sl_hvm_assert_mask[SUBLEVEL_HVM_MAX] = { + [SUBLEVEL_HVM_ENTRYEXIT] = { .p_current=1, .not_idle_domain=1, .vcpu_data_mode=VCPU_DATA_HVM }, + [SUBLEVEL_HVM_HANDLER] = { .p_current=1, .not_idle_domain=1, .vcpu_data_mode=VCPU_DATA_HVM }, + [SUBLEVEL_HVM_EMUL] = { .p_current=1, .not_idle_domain=1, .vcpu_data_mode=VCPU_DATA_HVM }, +}; + +static struct assert_mask tl_assert_checks[TOPLEVEL_MAX] = { + [TRC_HVM_MAIN]={ .check_sublevel=1, .sub=sl_hvm_assert_mask, .sub_max=SUBLEVEL_HVM_MAX }, [TRC_SHADOW_MAIN]={ .p_current=1, .not_idle_domain=1, .vcpu_data_mode=VCPU_DATA_HVM }, [TRC_PV_MAIN]={ .p_current=1, .not_idle_domain=1, .vcpu_data_mode=VCPU_DATA_PV }, }; +/* Other sub types are reserved */ +static int sublevel_to_index[16] = { + [1] = 1, + [2] = 2, + [4] = 3, + [8] = 4, +}; + /* There are a lot of common assumptions for the various processing * routines. Check them all in one place, doing something else if * they don't pass. */ int toplevel_assert_check(int toplevel, struct pcpu_info *p) { - struct tl_assert_mask mask; + struct assert_mask mask; mask = tl_assert_checks[toplevel]; + if (mask.check_sublevel) + { + int sub = sublevel_to_index[p->ri.evt.sub]; + + assert(mask.sub && (sub < mask.sub_max)); + mask = mask.sub[sub]; + } + if (mask.p_current && p->current == NULL) { fprintf(warn, "WARNING: p->current null! Not processing\n"); @@ -9362,7 +9499,6 @@ void process_record(struct pcpu_info *p) { if(opt.dump_all) create_dump_header(ri, p); - toplevel = find_toplevel_event(ri); if ( toplevel < 0 ) return; diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 04e9aa6..ccb1c8d 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -190,7 +190,9 @@ static void vmx_vcpu_block(struct vcpu *v) */ ASSERT(old_lock == NULL); - per_cpu(vmx_pi_blocking, v->processor).counter++; + per_cpu(vmx_pi_blocking, pi_cpu).counter++; + TRACE_4D(TRC_HVM_PI_LIST_ADD, v->domain->domain_id, v->vcpu_id, pi_cpu, + per_cpu(vmx_pi_blocking, pi_cpu).counter); list_add_tail(&v->arch.hvm_vmx.pi_blocking.list, &per_cpu(vmx_pi_blocking, pi_cpu).list); spin_unlock_irqrestore(pi_blocking_list_lock, flags[1]); @@ -279,6 +281,7 @@ static void vmx_pi_unblock_vcpu(struct vcpu *v) list_del(&v->arch.hvm_vmx.pi_blocking.list); container_of(pi_blocking_list_lock, struct vmx_pi_blocking_vcpu, lock)->counter--; + TRACE_2D(TRC_HVM_ASYNC_PI_LIST_DEL, v->domain->domain_id, v->vcpu_id); v->arch.hvm_vmx.pi_blocking.lock = NULL; } @@ -308,6 +311,8 @@ void vmx_pi_desc_fixup(unsigned int cpu) list_for_each_entry_safe(vmx, tmp, blocked_vcpus, pi_blocking.list) { + struct vcpu *v = container_of(vmx, struct vcpu, arch.hvm_vmx); + /* * Suppress notification or we may miss an interrupt when the * target cpu is dying. @@ -322,9 +327,11 @@ void vmx_pi_desc_fixup(unsigned int cpu) if ( pi_test_on(&vmx->pi_desc) ) { list_del(&vmx->pi_blocking.list); + TRACE_2D(TRC_HVM_ASYNC_PI_LIST_DEL, v->domain->domain_id, + v->vcpu_id); per_cpu(vmx_pi_blocking, cpu).counter--; vmx->pi_blocking.lock = NULL; - vcpu_unblock(container_of(vmx, struct vcpu, arch.hvm_vmx)); + vcpu_unblock(v); } else { @@ -357,6 +364,10 @@ void vmx_pi_desc_fixup(unsigned int cpu) &per_cpu(vmx_pi_blocking, new_cpu).list); per_cpu(vmx_pi_blocking, cpu).counter--; per_cpu(vmx_pi_blocking, new_cpu).counter++; + TRACE_2D(TRC_HVM_ASYNC_PI_LIST_DEL, v->domain->domain_id, + v->vcpu_id); + TRACE_4D(TRC_HVM_PI_LIST_ADD, v->domain->domain_id, v->vcpu_id, + new_cpu, per_cpu(vmx_pi_blocking, new_cpu).counter); vmx->pi_blocking.lock = new_lock; spin_unlock(new_lock); @@ -2475,11 +2486,15 @@ static void pi_wakeup_interrupt(struct cpu_user_regs *regs) { if ( pi_test_on(&vmx->pi_desc) ) { + struct vcpu *v = container_of(vmx, struct vcpu, arch.hvm_vmx); + list_del(&vmx->pi_blocking.list); + TRACE_2D(TRC_HVM_ASYNC_PI_LIST_DEL, v->domain->domain_id, + v->vcpu_id); per_cpu(vmx_pi_blocking, cpu).counter--; ASSERT(vmx->pi_blocking.lock == lock); vmx->pi_blocking.lock = NULL; - vcpu_unblock(container_of(vmx, struct vcpu, arch.hvm_vmx)); + vcpu_unblock(v); } } diff --git a/xen/include/asm-x86/hvm/trace.h b/xen/include/asm-x86/hvm/trace.h index de802a6..b74ffdd 100644 --- a/xen/include/asm-x86/hvm/trace.h +++ b/xen/include/asm-x86/hvm/trace.h @@ -54,6 +54,7 @@ #define DO_TRC_HVM_TRAP DEFAULT_HVM_MISC #define DO_TRC_HVM_TRAP_DEBUG DEFAULT_HVM_MISC #define DO_TRC_HVM_VLAPIC DEFAULT_HVM_MISC +#define DO_TRC_HVM_PI_LIST_ADD DEFAULT_HVM_MISC #define TRC_PAR_LONG(par) ((par)&0xFFFFFFFF),((par)>>32) diff --git a/xen/include/public/trace.h b/xen/include/public/trace.h index 7f2e891..15ea87c 100644 --- a/xen/include/public/trace.h +++ b/xen/include/public/trace.h @@ -53,6 +53,7 @@ #define TRC_HVM_ENTRYEXIT 0x00081000 /* VMENTRY and #VMEXIT */ #define TRC_HVM_HANDLER 0x00082000 /* various HVM handlers */ #define TRC_HVM_EMUL 0x00084000 /* emulated devices */ +#define TRC_HVM_ASYNC 0x00088000 /* Asynchronous events */ #define TRC_SCHED_MIN 0x00021000 /* Just runstate changes */ #define TRC_SCHED_CLASS 0x00022000 /* Scheduler-specific */ @@ -234,6 +235,7 @@ #define TRC_HVM_TRAP (TRC_HVM_HANDLER + 0x23) #define TRC_HVM_TRAP_DEBUG (TRC_HVM_HANDLER + 0x24) #define TRC_HVM_VLAPIC (TRC_HVM_HANDLER + 0x25) +#define TRC_HVM_PI_LIST_ADD (TRC_HVM_HANDLER + 0x26) #define TRC_HVM_IOPORT_WRITE (TRC_HVM_HANDLER + 0x216) #define TRC_HVM_IOMEM_WRITE (TRC_HVM_HANDLER + 0x217) @@ -257,6 +259,9 @@ #define TRC_HVM_EMUL_PIC_PEND_IRQ_CALL (TRC_HVM_EMUL + 0x10) #define TRC_HVM_EMUL_LAPIC_PIC_INTR (TRC_HVM_EMUL + 0x11) +/* Trace asynconous events for HVM */ +#define TRC_HVM_ASYNC_PI_LIST_DEL (TRC_HVM_ASYNC + 0x1) + /* trace events for per class */ #define TRC_PM_FREQ_CHANGE (TRC_HW_PM + 0x01) #define TRC_PM_IDLE_ENTRY (TRC_HW_PM + 0x02) -- 1.8.3.1 _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org https://lists.xen.org/xen-devel