In order to analyze PI blocking list operation frequence and obtain
the list length, add some relevant events to xentrace and some
associated code in xenalyze. Event ASYNC_PI_LIST_DEL may happen in interrupt
context, which incurs current assumptions checked in toplevel_assert_check()
are not suitable any more. Thus, this patch extends the toplevel_assert_check()
to remove such assumptions for events of type ASYNC_PI_LIST_DEL.

Signed-off-by: Chao Gao <chao....@intel.com>
---
v4:
 - trace part of Patch 1 in v3

---
 tools/xentrace/formats          |   3 +
 tools/xentrace/xenalyze.c       | 154 +++++++++++++++++++++++++++++++++++++---
 xen/arch/x86/hvm/vmx/vmx.c      |  21 +++++-
 xen/include/asm-x86/hvm/trace.h |   1 +
 xen/include/public/trace.h      |   5 ++
 5 files changed, 172 insertions(+), 12 deletions(-)

diff --git a/tools/xentrace/formats b/tools/xentrace/formats
index 8b31780..54e0b11 100644
--- a/tools/xentrace/formats
+++ b/tools/xentrace/formats
@@ -125,6 +125,9 @@
 0x00082020  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INTR_WINDOW [ value = 
0x%(1)08x ]
 0x00082021  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  NPF         [ gpa = 
0x%(2)08x%(1)08x mfn = 0x%(4)08x%(3)08x qual = 0x%(5)04x p2mt = 0x%(6)04x ]
 0x00082023  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  TRAP        [ vector = 
0x%(1)02x ]
+0x00082026  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PI_LIST_ADD [ domid = 
0x%(1)04x vcpu = 0x%(2)04x, pcpu = 0x%(3)04x, #entry = 0x%(4)04x ]
+
+0x00088001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  ASYNC_PI_LIST_DEL [ domid = 
0x%(1)04x vcpu = 0x%(2)04x ]
 
 0x0010f001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_grant_map      [ domid = 
%(1)d ]
 0x0010f002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_grant_unmap    [ domid = 
%(1)d ]
diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c
index fa608ad..fbc2429 100644
--- a/tools/xentrace/xenalyze.c
+++ b/tools/xentrace/xenalyze.c
@@ -296,6 +296,7 @@ struct symbol_struct {
 };
 
 void error(enum error_level l, struct record_info *ri);
+struct vcpu_data * vcpu_find(int did, int vid);
 
 void parse_symbol_file(char *fn) {
     unsigned long long last_addr = 0;
@@ -944,6 +945,7 @@ enum {
     HVM_EVENT_TRAP,
     HVM_EVENT_TRAP_DEBUG,
     HVM_EVENT_VLAPIC,
+    HVM_EVENT_PI_LIST_ADD,
     HVM_EVENT_HANDLER_MAX
 };
 char * hvm_event_handler_name[HVM_EVENT_HANDLER_MAX] = {
@@ -979,13 +981,15 @@ char * hvm_event_handler_name[HVM_EVENT_HANDLER_MAX] = {
     "realmode_emulate",
     "trap",
     "trap_debug",
-    "vlapic"
+    "vlapic",
+    "pi_list_add",
 };
 
 enum {
     HVM_VOL_VMENTRY,
     HVM_VOL_VMEXIT,
     HVM_VOL_HANDLER,
+    HVM_VOL_ASYNC,
     HVM_VOL_MAX
 };
 
@@ -1012,6 +1016,7 @@ char *hvm_vol_name[HVM_VOL_MAX] = {
     [HVM_VOL_VMENTRY]="vmentry",
     [HVM_VOL_VMEXIT] ="vmexit",
     [HVM_VOL_HANDLER]="handler",
+    [HVM_VOL_ASYNC]="async",
 };
 
 enum {
@@ -1337,6 +1342,9 @@ struct hvm_data {
         struct {
             struct io_address *mmio, *pio;
         } io;
+        struct {
+            int pi_list_add, pi_list_del;
+        } pi;
     } summary;
 
     /* In-flight accumulation information */
@@ -1391,6 +1399,9 @@ struct hvm_data {
 
     /* Historical info */
     tsc_t last_rdtsc;
+
+    /* Destination pcpu of posted interrupt's wakeup interrupt */
+    int pi_cpu;
 };
 
 enum {
@@ -1457,6 +1468,8 @@ void init_hvm_data(struct hvm_data *h, struct vcpu_data 
*v) {
     }
     for(i=0; i<GUEST_INTERRUPT_MAX+1; i++)
         h->summary.guest_interrupt[i].count=0;
+
+    h->pi_cpu = -1;
 }
 
 /* PV data */
@@ -1766,6 +1779,14 @@ char * toplevel_name[TOPLEVEL_MAX] = {
     [TOPLEVEL_HW]="hw",
 };
 
+enum {
+    SUBLEVEL_HVM_ENTRYEXIT=1,
+    SUBLEVEL_HVM_HANDLER,
+    SUBLEVEL_HVM_EMUL,
+    SUBLEVEL_HVM_ASYNC,
+    SUBLEVEL_HVM_MAX=SUBLEVEL_HVM_ASYNC+1,
+};
+
 struct trace_volume {
     unsigned long long toplevel[TOPLEVEL_MAX];
     unsigned long long sched_verbose;
@@ -1852,6 +1873,9 @@ struct pcpu_info {
         tsc_t tsc;
         struct cycle_summary idle, running, lost;
     } time;
+
+    /* Posted Interrupt List Length */
+    int pi_list_length;
 };
 
 void __fill_in_record_info(struct pcpu_info *p);
@@ -4726,6 +4750,71 @@ void hvm_generic_dump(struct record_info *ri, char * 
prefix)
     printf(" ]\n");
 }
 
+void hvm_pi_list_add_process(struct record_info *ri, struct hvm_data *h)
+{
+    struct {
+        int did;
+        int vid;
+        int pcpu;
+        int len;
+    } *data;
+    struct vcpu_data *v;
+
+    data = (typeof(data))ri->rec.u.tsc.data;
+    v = vcpu_find(data->did, data->vid);
+    if ( !v->hvm.init )
+        init_hvm_data(&v->hvm, v);
+
+    if ( opt.dump_all )
+        printf("d%uv%u is added to pi blocking list of pcpu%u. "
+               "The list length is now %d\n",
+               data->did, data->vid, data->pcpu, data->len);
+
+    v->hvm.pi_cpu = data->pcpu;
+    v->hvm.summary.pi.pi_list_add++;
+    if ( data->pcpu > P.max_active_pcpu || !P.pcpu[data->pcpu].active )
+        fprintf(stderr, "Strange! pcpu%u is inactive but a vcpu is added"
+                "to it", data->pcpu);
+    else if ( P.pcpu[data->pcpu].pi_list_length == -1 )
+        P.pcpu[data->pcpu].pi_list_length = data->len;
+    else if ( data->len != ++P.pcpu[data->pcpu].pi_list_length )
+        /*
+         * Correct pi list length. Removing one vcpu that is already on the
+         * list before tracing starts would not decrease the pi list length;
+         * the list length would be inaccuate.
+         */
+        P.pcpu[data->pcpu].pi_list_length = data->len;
+}
+
+void hvm_pi_list_del_process(struct record_info *ri)
+{
+    struct {
+        int did;
+        int vid;
+    } *data;
+    struct vcpu_data *v;
+
+    data = (typeof(data))ri->rec.u.tsc.data;
+    v = vcpu_find(data->did, data->vid);
+    if ( !v->hvm.init )
+        init_hvm_data(&v->hvm, v);
+
+    if ( opt.dump_all )
+    {
+        if ( v->hvm.pi_cpu != -1 )
+            printf("d%uv%u is removed from pi blocking list of pcpu%u\n",
+                   data->did, data->vid, v->hvm.pi_cpu);
+        else
+            printf("d%uv%u is removed from pi blocking list\n",
+                   data->did, data->vid);
+    }
+
+    if ( P.pcpu[v->hvm.pi_cpu].pi_list_length != -1 )
+        P.pcpu[v->hvm.pi_cpu].pi_list_length--;
+    v->hvm.pi_cpu = -1;
+    v->hvm.summary.pi.pi_list_del++;
+}
+
 void hvm_handler_process(struct record_info *ri, struct hvm_data *h) {
     /* Wait for first vmexit to initialize */
     if(!h->init)
@@ -4763,6 +4852,9 @@ void hvm_handler_process(struct record_info *ri, struct 
hvm_data *h) {
     case TRC_HVM_INTR_WINDOW:
         hvm_intr_window_process(ri, h);
         break;
+    case TRC_HVM_PI_LIST_ADD:
+        hvm_pi_list_add_process(ri, h);
+        break;
     case TRC_HVM_OP_DESTROY_PROC:
         if(h->v->cr3.data) {
             struct cr3_value_struct *cur = h->v->cr3.data;
@@ -4862,7 +4954,6 @@ needs_vmexit:
 void vcpu_next_update(struct pcpu_info *p, struct vcpu_data *next, tsc_t tsc);
 void vcpu_prev_update(struct pcpu_info *p, struct vcpu_data *prev,
                       tsc_t tsc, int new_runstate);
-struct vcpu_data * vcpu_find(int did, int vid);
 void lose_vcpu(struct vcpu_data *v, tsc_t tsc);
 
 int domain_runstate(struct domain_data *d) {
@@ -5267,6 +5358,21 @@ void hvm_process(struct pcpu_info *p)
     struct vcpu_data *v = p->current;
     struct hvm_data *h = &v->hvm;
 
+    if(ri->evt.sub == 8)
+    {
+        UPDATE_VOLUME(p, hvm[HVM_VOL_ASYNC], ri->size);
+
+        switch(ri->event) {
+        case TRC_HVM_ASYNC_PI_LIST_DEL:
+            hvm_pi_list_del_process(ri);
+            break;
+
+        default:
+            fprintf(warn, "Unknown hvm event: %x\n", ri->event);
+        }
+        return;
+    }
+
     assert(p->current);
 
     if(vcpu_set_data_type(p->current, VCPU_DATA_HVM))
@@ -5359,6 +5465,10 @@ void hvm_summary(struct hvm_data *h) {
                   i, h->summary.ipi_count[i]);
    hvm_io_address_summary(h->summary.io.pio, "IO address summary:");
    hvm_io_address_summary(h->summary.io.mmio, "MMIO address summary:");
+
+   printf("Posted Interrupt:\n");
+   printf(" List Add: %u\n", h->summary.pi.pi_list_add);
+   printf(" List Del: %u\n", h->summary.pi.pi_list_del);
 }
 
 /* ---- Shadow records ---- */
@@ -8962,6 +9072,7 @@ off_t scan_for_new_pcpu(off_t offset) {
 
         p->file_offset = offset;
         p->next_cpu_change_offset = offset;
+        p->pi_list_length = -1;
 
         record_order_insert(p);
 
@@ -9142,7 +9253,6 @@ int find_toplevel_event(struct record_info *ri)
     return toplevel;
 }
 
-
 void process_cpu_change(struct pcpu_info *p) {
     struct record_info *ri = &p->ri;
     struct cpu_change_data *r = (typeof(r))ri->d;
@@ -9190,6 +9300,7 @@ void process_cpu_change(struct pcpu_info *p) {
         p2->ri.d = p2->ri.rec.u.notsc.data;
         p2->file_offset = p->file_offset;
         p2->next_cpu_change_offset = p->file_offset;
+        p2->pi_list_length = -1;
 
         fprintf(warn, "%s: Activating pcpu %d at offset %lld\n",
                 __func__, r->cpu, (unsigned long long)p->file_offset);
@@ -9276,26 +9387,52 @@ void process_cpu_change(struct pcpu_info *p) {
     }
 }
 
-struct tl_assert_mask {
+struct assert_mask {
     unsigned p_current:1,
-        not_idle_domain:1;
+        not_idle_domain:1,
+        check_sublevel:1;
     int vcpu_data_mode;
+    int sub_max;
+    struct assert_mask *sub;
 };
-static struct tl_assert_mask tl_assert_checks[TOPLEVEL_MAX] = {
-    [TRC_HVM_MAIN]={ .p_current=1, .not_idle_domain=1, 
.vcpu_data_mode=VCPU_DATA_HVM },
+
+static struct assert_mask sl_hvm_assert_mask[SUBLEVEL_HVM_MAX] = {
+    [SUBLEVEL_HVM_ENTRYEXIT] = { .p_current=1, .not_idle_domain=1, 
.vcpu_data_mode=VCPU_DATA_HVM },
+    [SUBLEVEL_HVM_HANDLER] = { .p_current=1, .not_idle_domain=1, 
.vcpu_data_mode=VCPU_DATA_HVM },
+    [SUBLEVEL_HVM_EMUL] = { .p_current=1, .not_idle_domain=1, 
.vcpu_data_mode=VCPU_DATA_HVM },
+};
+
+static struct assert_mask tl_assert_checks[TOPLEVEL_MAX] = {
+    [TRC_HVM_MAIN]={ .check_sublevel=1, .sub=sl_hvm_assert_mask, 
.sub_max=SUBLEVEL_HVM_MAX },
     [TRC_SHADOW_MAIN]={ .p_current=1, .not_idle_domain=1, 
.vcpu_data_mode=VCPU_DATA_HVM },
     [TRC_PV_MAIN]={ .p_current=1, .not_idle_domain=1, 
.vcpu_data_mode=VCPU_DATA_PV },
 };
 
+/* Other sub types are reserved */
+static int sublevel_to_index[16] = {
+    [1] = 1,
+    [2] = 2,
+    [4] = 3,
+    [8] = 4,
+};
+
 /* There are a lot of common assumptions for the various processing
  * routines.  Check them all in one place, doing something else if
  * they don't pass. */
 int toplevel_assert_check(int toplevel, struct pcpu_info *p)
 {
-    struct tl_assert_mask mask;
+    struct assert_mask mask;
 
     mask = tl_assert_checks[toplevel];
 
+    if (mask.check_sublevel)
+    {
+        int sub = sublevel_to_index[p->ri.evt.sub];
+
+        assert(mask.sub && (sub < mask.sub_max));
+        mask = mask.sub[sub];
+    }
+
     if (mask.p_current && p->current == NULL)
     {
         fprintf(warn, "WARNING: p->current null!  Not processing\n");
@@ -9362,7 +9499,6 @@ void process_record(struct pcpu_info *p) {
     if(opt.dump_all)
         create_dump_header(ri, p);
 
-
     toplevel = find_toplevel_event(ri);
     if ( toplevel < 0 )
         return;
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 04e9aa6..ccb1c8d 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -190,7 +190,9 @@ static void vmx_vcpu_block(struct vcpu *v)
      */
     ASSERT(old_lock == NULL);
 
-    per_cpu(vmx_pi_blocking, v->processor).counter++;
+    per_cpu(vmx_pi_blocking, pi_cpu).counter++;
+    TRACE_4D(TRC_HVM_PI_LIST_ADD, v->domain->domain_id, v->vcpu_id, pi_cpu,
+             per_cpu(vmx_pi_blocking, pi_cpu).counter);
     list_add_tail(&v->arch.hvm_vmx.pi_blocking.list,
                   &per_cpu(vmx_pi_blocking, pi_cpu).list);
     spin_unlock_irqrestore(pi_blocking_list_lock, flags[1]);
@@ -279,6 +281,7 @@ static void vmx_pi_unblock_vcpu(struct vcpu *v)
         list_del(&v->arch.hvm_vmx.pi_blocking.list);
         container_of(pi_blocking_list_lock,
                      struct vmx_pi_blocking_vcpu, lock)->counter--;
+        TRACE_2D(TRC_HVM_ASYNC_PI_LIST_DEL, v->domain->domain_id, v->vcpu_id);
         v->arch.hvm_vmx.pi_blocking.lock = NULL;
     }
 
@@ -308,6 +311,8 @@ void vmx_pi_desc_fixup(unsigned int cpu)
 
     list_for_each_entry_safe(vmx, tmp, blocked_vcpus, pi_blocking.list)
     {
+        struct vcpu *v = container_of(vmx, struct vcpu, arch.hvm_vmx);
+
         /*
          * Suppress notification or we may miss an interrupt when the
          * target cpu is dying.
@@ -322,9 +327,11 @@ void vmx_pi_desc_fixup(unsigned int cpu)
         if ( pi_test_on(&vmx->pi_desc) )
         {
             list_del(&vmx->pi_blocking.list);
+            TRACE_2D(TRC_HVM_ASYNC_PI_LIST_DEL, v->domain->domain_id,
+                     v->vcpu_id);
             per_cpu(vmx_pi_blocking, cpu).counter--;
             vmx->pi_blocking.lock = NULL;
-            vcpu_unblock(container_of(vmx, struct vcpu, arch.hvm_vmx));
+            vcpu_unblock(v);
         }
         else
         {
@@ -357,6 +364,10 @@ void vmx_pi_desc_fixup(unsigned int cpu)
                       &per_cpu(vmx_pi_blocking, new_cpu).list);
             per_cpu(vmx_pi_blocking, cpu).counter--;
             per_cpu(vmx_pi_blocking, new_cpu).counter++;
+            TRACE_2D(TRC_HVM_ASYNC_PI_LIST_DEL, v->domain->domain_id,
+                     v->vcpu_id);
+            TRACE_4D(TRC_HVM_PI_LIST_ADD, v->domain->domain_id, v->vcpu_id,
+                     new_cpu, per_cpu(vmx_pi_blocking, new_cpu).counter);
             vmx->pi_blocking.lock = new_lock;
 
             spin_unlock(new_lock);
@@ -2475,11 +2486,15 @@ static void pi_wakeup_interrupt(struct cpu_user_regs 
*regs)
     {
         if ( pi_test_on(&vmx->pi_desc) )
         {
+            struct vcpu *v = container_of(vmx, struct vcpu, arch.hvm_vmx);
+
             list_del(&vmx->pi_blocking.list);
+            TRACE_2D(TRC_HVM_ASYNC_PI_LIST_DEL, v->domain->domain_id,
+                     v->vcpu_id);
             per_cpu(vmx_pi_blocking, cpu).counter--;
             ASSERT(vmx->pi_blocking.lock == lock);
             vmx->pi_blocking.lock = NULL;
-            vcpu_unblock(container_of(vmx, struct vcpu, arch.hvm_vmx));
+            vcpu_unblock(v);
         }
     }
 
diff --git a/xen/include/asm-x86/hvm/trace.h b/xen/include/asm-x86/hvm/trace.h
index de802a6..b74ffdd 100644
--- a/xen/include/asm-x86/hvm/trace.h
+++ b/xen/include/asm-x86/hvm/trace.h
@@ -54,6 +54,7 @@
 #define DO_TRC_HVM_TRAP             DEFAULT_HVM_MISC
 #define DO_TRC_HVM_TRAP_DEBUG       DEFAULT_HVM_MISC
 #define DO_TRC_HVM_VLAPIC           DEFAULT_HVM_MISC
+#define DO_TRC_HVM_PI_LIST_ADD      DEFAULT_HVM_MISC
 
 
 #define TRC_PAR_LONG(par) ((par)&0xFFFFFFFF),((par)>>32)
diff --git a/xen/include/public/trace.h b/xen/include/public/trace.h
index 7f2e891..15ea87c 100644
--- a/xen/include/public/trace.h
+++ b/xen/include/public/trace.h
@@ -53,6 +53,7 @@
 #define TRC_HVM_ENTRYEXIT   0x00081000   /* VMENTRY and #VMEXIT       */
 #define TRC_HVM_HANDLER     0x00082000   /* various HVM handlers      */
 #define TRC_HVM_EMUL        0x00084000   /* emulated devices */
+#define TRC_HVM_ASYNC       0x00088000   /* Asynchronous events */
 
 #define TRC_SCHED_MIN       0x00021000   /* Just runstate changes */
 #define TRC_SCHED_CLASS     0x00022000   /* Scheduler-specific    */
@@ -234,6 +235,7 @@
 #define TRC_HVM_TRAP             (TRC_HVM_HANDLER + 0x23)
 #define TRC_HVM_TRAP_DEBUG       (TRC_HVM_HANDLER + 0x24)
 #define TRC_HVM_VLAPIC           (TRC_HVM_HANDLER + 0x25)
+#define TRC_HVM_PI_LIST_ADD      (TRC_HVM_HANDLER + 0x26)
 
 #define TRC_HVM_IOPORT_WRITE    (TRC_HVM_HANDLER + 0x216)
 #define TRC_HVM_IOMEM_WRITE     (TRC_HVM_HANDLER + 0x217)
@@ -257,6 +259,9 @@
 #define TRC_HVM_EMUL_PIC_PEND_IRQ_CALL (TRC_HVM_EMUL + 0x10)
 #define TRC_HVM_EMUL_LAPIC_PIC_INTR    (TRC_HVM_EMUL + 0x11)
 
+/* Trace asynconous events for HVM */
+#define TRC_HVM_ASYNC_PI_LIST_DEL      (TRC_HVM_ASYNC + 0x1)
+
 /* trace events for per class */
 #define TRC_PM_FREQ_CHANGE      (TRC_HW_PM + 0x01)
 #define TRC_PM_IDLE_ENTRY       (TRC_HW_PM + 0x02)
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

Reply via email to