From 78288b7fcc9ae64618ff50bdabe14afa706ff386 Mon Sep 17 00:00:00 2001
From: "Justin T. Weaver" <jtweaver@hawaii.edu>
Date: Mon, 2 Feb 2015 22:04:51 -1000
Subject: [PATCH v2 1/2 draft] sched: credit2: respect per-vcpu hard affinity

by making sure that vcpus only run on the pcpu(s) they are allowed to
run on based on their hard affinity cpu masks.

Signed-off-by: Justin T. Weaver <jtweaver@hawaii.edu>
---
 xen/common/domctl.c        |    6 ++
 xen/common/sched_credit2.c |  218 +++++++++++++++++++++++++++++++++++++++-----
 xen/common/schedule.c      |   76 ++++++++++++++-
 3 files changed, 272 insertions(+), 28 deletions(-)

diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index ee578c0..de76298 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -32,6 +32,8 @@
 #include <public/domctl.h>
 #include <xsm/xsm.h>
 
+extern bool_t setting_affinity;
+
 static DEFINE_SPINLOCK(domctl_lock);
 DEFINE_SPINLOCK(vcpu_alloc_lock);
 
@@ -729,6 +731,8 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
         if ( (v = d->vcpu[vcpuaff->vcpu]) == NULL )
             break;
 
+        printk("domctl:do_domctl - VPF_blocked is %d\n", test_bit(_VPF_blocked, &v->pause_flags));
+
         ret = -EINVAL;
         if ( vcpuaffinity_params_invalid(vcpuaff) )
             break;
@@ -738,6 +742,8 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
             cpumask_var_t new_affinity, old_affinity;
             cpumask_t *online = cpupool_online_cpumask(v->domain->cpupool);;
 
+            setting_affinity = 1;
+
             /*
              * We want to be able to restore hard affinity if we are trying
              * setting both and changing soft affinity (which happens later,
diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index cf53770..f452e36 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -26,6 +26,8 @@
 #include <xen/trace.h>
 #include <xen/cpu.h>
 
+extern bool_t setting_affinity;
+
 #define d2printk(x...)
 //#define d2printk printk
 
@@ -194,6 +196,12 @@ int opt_overload_balance_tolerance=-3;
 integer_param("credit2_balance_over", opt_overload_balance_tolerance);
 
 /*
+ * Use this to avoid having too many cpumask_t structs on the stack
+ */
+static cpumask_t **cpumask = NULL;
+#define csched2_cpumask cpumask[smp_processor_id()]
+
+/*
  * Per-runqueue data
  */
 struct csched2_runqueue_data {
@@ -268,6 +276,23 @@ struct csched2_dom {
     uint16_t nr_vcpus;
 };
 
+/*
+ * When a hard affinity change occurs, we may not be able to check some or
+ * all of the other run queues for a valid new processor for the given vcpu.
+ * Return svc's current pcpu if valid, otherwise return a safe pcpu.
+ */
+static int get_safe_pcpu(struct csched2_vcpu *svc)
+{
+    cpumask_and(csched2_cpumask, svc->vcpu->cpu_hard_affinity, &svc->rqd->active);
+    if ( unlikely(cpumask_empty(csched2_cpumask)) )
+        cpumask_and(csched2_cpumask, svc->vcpu->cpu_hard_affinity,
+            cpupool_online_cpumask(svc->vcpu->domain->cpupool));
+
+    if ( cpumask_test_cpu(svc->vcpu->processor, csched2_cpumask) )
+        return svc->vcpu->processor;
+    else
+        return cpumask_any(csched2_cpumask);
+}
 
 /*
  * Time-to-credit, credit-to-time.
@@ -501,8 +526,9 @@ runq_tickle(const struct scheduler *ops, unsigned int cpu, struct csched2_vcpu *
         goto tickle;
     }
     
-    /* Get a mask of idle, but not tickled */
+    /* Get a mask of idle, but not tickled, that new is allowed to run on. */
     cpumask_andnot(&mask, &rqd->idle, &rqd->tickled);
+    cpumask_and(&mask, &mask, new->vcpu->cpu_hard_affinity);
     
     /* If it's not empty, choose one */
     i = cpumask_cycle(cpu, &mask);
@@ -513,9 +539,11 @@ runq_tickle(const struct scheduler *ops, unsigned int cpu, struct csched2_vcpu *
     }
 
     /* Otherwise, look for the non-idle cpu with the lowest credit,
-     * skipping cpus which have been tickled but not scheduled yet */
+     * skipping cpus which have been tickled but not scheduled yet,
+     * that new is allowed to run on. */
     cpumask_andnot(&mask, &rqd->active, &rqd->idle);
     cpumask_andnot(&mask, &mask, &rqd->tickled);
+    cpumask_and(&mask, &mask, new->vcpu->cpu_hard_affinity);
 
     for_each_cpu(i, &mask)
     {
@@ -965,6 +993,8 @@ csched2_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
     {
         /* If we've boosted someone that's already on a runqueue, prioritize
          * it and inform the cpu in question. */
+        if (setting_affinity && vc->domain->domain_id > 0)
+            printk("credit2:csched2_vcpu_wake - NOT calling runq_insert and runq_tickle because vcpu is on a run queue\n");
         goto out;
     }
 
@@ -988,7 +1018,12 @@ csched2_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
     update_load(ops, svc->rqd, svc, 1, now);
         
     /* Put the VCPU on the runq */
+    if (setting_affinity && vc->domain->domain_id > 0)
+        printk("credit2:csched2_vcpu_wake - calling runq_insert\n");
     runq_insert(ops, vc->processor, svc);
+    if (setting_affinity && vc->domain->domain_id > 0)
+        printk("credit2:csched2_vcpu_wake - calling runq_tickle for pcpu # %d\n",
+            vc->processor);
     runq_tickle(ops, vc->processor, svc, now);
 
 out:
@@ -1053,7 +1088,7 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
      *
      * Since one of the runqueue locks is already held, we can't
      * just grab the prv lock.  Instead, we'll have to trylock, and
-     * do something else reasonable if we fail.
+     * return a safe cpu.
      */
 
     if ( !spin_trylock(&prv->lock) )
@@ -1063,9 +1098,8 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
             d2printk("%pv -\n", svc->vcpu);
             clear_bit(__CSFLAG_runq_migrate_request, &svc->flags);
         }
-        /* Leave it where it is for now.  When we actually pay attention
-         * to affinity we'll have to figure something out... */
-        return vc->processor;
+
+    return get_safe_pcpu(svc);
     }
 
     /* First check to see if we're here because someone else suggested a place
@@ -1081,13 +1115,17 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
         else
         {
             d2printk("%pv +\n", svc->vcpu);
-            new_cpu = cpumask_cycle(vc->processor, &svc->migrate_rqd->active);
-            goto out_up;
+            cpumask_and(csched2_cpumask, vc->cpu_hard_affinity,
+                &svc->migrate_rqd->active);
+            if ( !cpumask_empty(csched2_cpumask) )
+            {
+                new_cpu = cpumask_any(csched2_cpumask);
+                goto out_up;
+            }
+            /* Fall-through to normal cpu pick */
         }
     }
 
-    /* FIXME: Pay attention to cpu affinity */                                                                                      
-
     min_avgload = MAX_LOAD;
 
     /* Find the runqueue with the lowest instantaneous load */
@@ -1099,17 +1137,24 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
         rqd = prv->rqd + i;
 
         /* If checking a different runqueue, grab the lock,
-         * read the avg, and then release the lock.
+         * check hard affinity, read the avg, and then release the lock.
          *
          * If on our own runqueue, don't grab or release the lock;
          * but subtract our own load from the runqueue load to simulate
          * impartiality */
         if ( rqd == svc->rqd )
         {
+            if ( !cpumask_intersects(vc->cpu_hard_affinity, &rqd->active) )
+                continue;
             rqd_avgload = rqd->b_avgload - svc->avgload;
         }
         else if ( spin_trylock(&rqd->lock) )
         {
+            if ( !cpumask_intersects(vc->cpu_hard_affinity, &rqd->active) )
+            {
+                spin_unlock(&rqd->lock);
+                continue;
+            }
             rqd_avgload = rqd->b_avgload;
             spin_unlock(&rqd->lock);
         }
@@ -1123,18 +1168,25 @@ choose_cpu(const struct scheduler *ops, struct vcpu *vc)
         }
     }
 
-    /* We didn't find anyone (most likely because of spinlock contention); leave it where it is */
     if ( min_rqi == -1 )
-        new_cpu = vc->processor;
+    {
+        /* No runqs found (most likely because of spinlock contention). */
+    new_cpu = get_safe_pcpu(svc);
+    }
     else
     {
-        new_cpu = cpumask_cycle(vc->processor, &prv->rqd[min_rqi].active);
+        cpumask_and(csched2_cpumask, vc->cpu_hard_affinity,
+            &prv->rqd[min_rqi].active);
+        new_cpu = cpumask_any(csched2_cpumask);
         BUG_ON(new_cpu >= nr_cpu_ids);
     }
 
 out_up:
     spin_unlock(&prv->lock);
 
+    if (setting_affinity && vc->domain->domain_id > 0)
+        printk("credit2:choose_cpu - returning cpu # %d\n", new_cpu);
+
     return new_cpu;
 }
 
@@ -1197,24 +1249,47 @@ static void migrate(const struct scheduler *ops,
     }
     else
     {
-        int on_runq=0;
-        /* It's not running; just move it */
+        /* It's not running; move it if it's on a different runq than trqd. */
+        bool_t on_runq = 0;
+
         d2printk("%pv %d-%d i\n", svc->vcpu, svc->rqd->id, trqd->id);
+
+        /* Re-assign vcpu's processor, if necessary. */
+        cpumask_and(csched2_cpumask, svc->vcpu->cpu_hard_affinity, &trqd->active);
+        svc->vcpu->processor = cpumask_any(csched2_cpumask);
+        if ( !cpumask_test_cpu(svc->vcpu->processor, csched2_cpumask) )
+            svc->vcpu->processor = cpumask_any(csched2_cpumask);
+
         if ( __vcpu_on_runq(svc) )
+            on_runq = 1;
+
+        /* If the runqs are different, move svc to trqd. */
+        if ( svc->rqd != trqd )
         {
-            __runq_remove(svc);
-            update_load(ops, svc->rqd, svc, -1, now);
-            on_runq=1;
+            if ( on_runq )
+            {
+                __runq_remove(svc);
+                update_load(ops, svc->rqd, svc, -1, now);
+            }
+            __runq_deassign(svc);
+            __runq_assign(svc, trqd);
+            if ( on_runq )
+            {
+                update_load(ops, svc->rqd, svc, 1, now);
+                runq_insert(ops, svc->vcpu->processor, svc);
+            }
         }
-        __runq_deassign(svc);
-        svc->vcpu->processor = cpumask_any(&trqd->active);
-        __runq_assign(svc, trqd);
+
         if ( on_runq )
         {
-            update_load(ops, svc->rqd, svc, 1, now);
-            runq_insert(ops, svc->vcpu->processor, svc);
+            if (setting_affinity && svc->vcpu->domain->domain_id > 0)
+                printk("credit2:migrate - calling runq_tickle\n");
             runq_tickle(ops, svc->vcpu->processor, svc, now);
         }
+        else
+            if (setting_affinity && svc->vcpu->domain->domain_id > 0)
+                printk("credit2:migrate - NOT calling runq_tickle because vcpu was not on a run queue before migrating\n");
+            runq_tickle(ops, svc->vcpu->processor, svc, now);
     }
 }
 
@@ -1250,6 +1325,11 @@ retry:
     for_each_cpu(i, &prv->active_queues)
     {
         s_time_t delta;
+        /* true if there are no vcpus to push due to hard affinity */
+        bool_t ha_no_push = 1;
+        /* true if there are no vcpus to pull due to hard affinity */
+        bool_t ha_no_pull = 1;
+        struct list_head *iter;
         
         st.orqd = prv->rqd + i;
 
@@ -1257,6 +1337,47 @@ retry:
              || !spin_trylock(&st.orqd->lock) )
             continue;
 
+        /*
+         * If due to hard affinity there are no vcpus that can be
+         * pulled or pushed, move to the next runq in the loop.
+         */
+
+        /* See if there are any vcpus that can be pushed from lrqd to orqd. */
+        list_for_each( iter, &st.lrqd->svc )
+        {
+            struct csched2_vcpu * svc =
+                list_entry(iter, struct csched2_vcpu, rqd_elem);
+            cpumask_and(csched2_cpumask, svc->vcpu->cpu_hard_affinity,
+                &st.orqd->active);
+            if (!cpumask_empty(csched2_cpumask))
+            {
+                /* vcpu can be pushed from lrqd to ordq. */
+                ha_no_push = 0;
+                break;
+            }
+        }
+
+        /* See if there are any vcpus that can be pulled from orqd to lrqd. */
+        list_for_each( iter, &st.orqd->svc )
+        {
+            struct csched2_vcpu * svc =
+                list_entry(iter, struct csched2_vcpu, rqd_elem);
+            cpumask_and(csched2_cpumask, svc->vcpu->cpu_hard_affinity,
+                &st.lrqd->active);
+            if (!cpumask_empty(csched2_cpumask))
+            {
+                /* vcpu can be pulled from orqd to lrdq. */
+                ha_no_pull = 0;
+                break;
+            }
+        }
+
+        if ( ha_no_push && ha_no_pull )
+        {
+            spin_unlock(&st.orqd->lock);
+            continue;
+        }
+
         __update_runq_load(ops, st.orqd, 0, now);
     
         delta = st.lrqd->b_avgload - st.orqd->b_avgload;
@@ -1330,6 +1451,12 @@ retry:
         if ( test_bit(__CSFLAG_runq_migrate_request, &push_svc->flags) )
             continue;
 
+        /* Skip if it can't run on the destination runq. */
+        cpumask_and(csched2_cpumask, push_svc->vcpu->cpu_hard_affinity,
+            &st.orqd->active);
+        if ( cpumask_empty(csched2_cpumask) )
+            continue;
+
         list_for_each( pull_iter, &st.orqd->svc )
         {
             struct csched2_vcpu * pull_svc = list_entry(pull_iter, struct csched2_vcpu, rqd_elem);
@@ -1343,6 +1470,12 @@ retry:
             if ( test_bit(__CSFLAG_runq_migrate_request, &pull_svc->flags) )
                 continue;
 
+            /* Skip if it can't run on the destination runq. */
+            cpumask_and(csched2_cpumask, pull_svc->vcpu->cpu_hard_affinity,
+                &st.lrqd->active);
+            if ( cpumask_empty(csched2_cpumask) )
+                continue;
+
             consider(&st, push_svc, pull_svc);
         }
 
@@ -1355,11 +1488,17 @@ retry:
     list_for_each( pull_iter, &st.orqd->svc )
     {
         struct csched2_vcpu * pull_svc = list_entry(pull_iter, struct csched2_vcpu, rqd_elem);
-        
+
         /* Skip this one if it's already been flagged to migrate */
         if ( test_bit(__CSFLAG_runq_migrate_request, &pull_svc->flags) )
             continue;
 
+        /* Skip if it can't run on the destination runq. */
+        cpumask_and(csched2_cpumask, pull_svc->vcpu->cpu_hard_affinity,
+            &st.lrqd->active);
+        if ( cpumask_empty(csched2_cpumask) )
+            continue;
+
         /* Consider pull only */
         consider(&st, NULL, pull_svc);
     }
@@ -1401,6 +1540,9 @@ csched2_vcpu_migrate(
 
     if ( trqd != svc->rqd )
         migrate(ops, svc, trqd, NOW());
+    else
+        if (setting_affinity && vc->domain->domain_id > 0)
+            printk("credit2:csched2_vcpu_migrate - NOT calling migrate because destination run queue is same as current\n");
 }
 
 static int
@@ -1610,6 +1752,10 @@ runq_candidate(struct csched2_runqueue_data *rqd,
     {
         struct csched2_vcpu * svc = list_entry(iter, struct csched2_vcpu, runq_elem);
 
+        /* Only consider vcpus that are allowed to run on this processor. */
+        if ( !cpumask_test_cpu(cpu, svc->vcpu->cpu_hard_affinity) )
+            continue;
+
         /* If this is on a different processor, don't pull it unless
          * its credit is at least CSCHED2_MIGRATE_RESIST higher. */
         if ( svc->vcpu->processor != cpu
@@ -1992,6 +2138,13 @@ csched2_alloc_pdata(const struct scheduler *ops, int cpu)
         printk("%s: cpu %d not online yet, deferring initializatgion\n",
                __func__, cpu);
 
+    /*
+     * For each new pcpu, allocate a cpumask_t for use throughout the
+     * scheduler to avoid putting any cpumask_t structs on the stack.
+     */
+    if ( !zalloc_cpumask_var(&cpumask[cpu]) )
+        return NULL;
+
     return (void *)1;
 }
 
@@ -2040,6 +2193,8 @@ csched2_free_pdata(const struct scheduler *ops, void *pcpu, int cpu)
 
     spin_unlock_irqrestore(&prv->lock, flags);
 
+    free_cpumask_var(cpumask[cpu]);
+
     return;
 }
 
@@ -2127,16 +2282,29 @@ csched2_init(struct scheduler *ops)
 
     prv->load_window_shift = opt_load_window_shift;
 
+    cpumask = xzalloc_bytes(nr_cpu_ids * sizeof(cpumask_t *));
+    if ( cpumask == NULL )
+        return -ENOMEM;
+
     return 0;
 }
 
 static void
 csched2_deinit(const struct scheduler *ops)
 {
+    int i;
     struct csched2_private *prv;
 
     prv = CSCHED2_PRIV(ops);
     xfree(prv);
+
+    if ( cpumask != NULL )
+    {
+        for ( i = 0; i < nr_cpu_ids; i++ )
+            if ( cpumask[i] != NULL )
+                free_cpumask_var(cpumask[i]);
+        xfree(cpumask);
+    }
 }
 
 
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index b73177f..1c5961c 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -38,6 +38,8 @@
 #include <public/sched.h>
 #include <xsm/xsm.h>
 
+bool_t setting_affinity = 0;
+
 /* opt_sched: scheduler - default to credit */
 static char __initdata opt_sched[10] = "credit";
 string_param("sched", opt_sched);
@@ -358,8 +360,13 @@ void vcpu_sleep_nosync(struct vcpu *v)
         if ( v->runstate.state == RUNSTATE_runnable )
             vcpu_runstate_change(v, RUNSTATE_offline, NOW());
 
+        if (setting_affinity && v->domain->domain_id > 0)
+            printk("schedule:vcpu_sleep_nosync - calling credit2:sleep\n");
         SCHED_OP(VCPU2OP(v), sleep, v);
     }
+    else
+        if (setting_affinity && v->domain->domain_id > 0)
+            printk("schedule:vcpu_sleep_nosync - NOT calling credit2:sleep because vcpu_runnable(v) was true\n");
 
     vcpu_schedule_unlock_irqrestore(lock, flags, v);
 
@@ -381,16 +388,28 @@ void vcpu_wake(struct vcpu *v)
     unsigned long flags;
     spinlock_t *lock = vcpu_schedule_lock_irqsave(v, &flags);
 
+    if (setting_affinity && v->domain->domain_id > 0)
+        printk("schedule:vcpu_wake - pause flags for d%dv%d is %lu\n",
+            v->domain->domain_id, v->vcpu_id, v->pause_flags);
+
     if ( likely(vcpu_runnable(v)) )
     {
         if ( v->runstate.state >= RUNSTATE_blocked )
             vcpu_runstate_change(v, RUNSTATE_runnable, NOW());
+        if (setting_affinity && v->domain->domain_id > 0)
+            printk("schedule:vcpu_wake - calling credit2:wake\n");
         SCHED_OP(VCPU2OP(v), wake, v);
     }
-    else if ( !test_bit(_VPF_blocked, &v->pause_flags) )
+    else
     {
-        if ( v->runstate.state == RUNSTATE_blocked )
-            vcpu_runstate_change(v, RUNSTATE_offline, NOW());
+        if (setting_affinity && v->domain->domain_id > 0)
+            printk("schedule:vcpu_wake - NOT calling credit2:wake because vcpu_runnable() is false\n");
+
+        if ( !test_bit(_VPF_blocked, &v->pause_flags) )
+        {
+            if ( v->runstate.state == RUNSTATE_blocked )
+                vcpu_runstate_change(v, RUNSTATE_offline, NOW());
+        }
     }
 
     vcpu_schedule_unlock_irqrestore(lock, flags, v);
@@ -400,6 +419,9 @@ void vcpu_wake(struct vcpu *v)
 
 void vcpu_unblock(struct vcpu *v)
 {
+    if (setting_affinity && v->domain->domain_id > 0)
+        printk("schedule:vcpu_unblock - clearing VPF_block\n");
+
     if ( !test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
         return;
 
@@ -470,6 +492,8 @@ static void vcpu_migrate(struct vcpu *v)
                 break;
 
             /* Select a new CPU. */
+            if (setting_affinity && v->domain->domain_id > 0)
+                printk("schedule:vcpu_migrate - calling credit2:pick_cpu\n");
             new_cpu = SCHED_OP(VCPU2OP(v), pick_cpu, v);
             if ( (new_lock == per_cpu(schedule_data, new_cpu).schedule_lock) &&
                  cpumask_test_cpu(new_cpu, v->domain->cpupool->cpu_valid) )
@@ -520,7 +544,11 @@ static void vcpu_migrate(struct vcpu *v)
      * the lock pointer cant' change while the current lock is held.
      */
     if ( VCPU2OP(v)->migrate )
+    {
+        if (setting_affinity && v->domain->domain_id > 0)
+            printk("schedule:vcpu_migrate - calling credit2:migrate\n");
         SCHED_OP(VCPU2OP(v), migrate, v, new_cpu);
+    }
     else
         v->processor = new_cpu;
 
@@ -533,6 +561,8 @@ static void vcpu_migrate(struct vcpu *v)
         sched_move_irqs(v);
 
     /* Wake on new CPU. */
+    if (setting_affinity && v->domain->domain_id > 0)
+        printk("schedule:vcpu_migrate - calling vcpu_wake\n");
     vcpu_wake(v);
 }
 
@@ -671,8 +701,42 @@ static int vcpu_set_affinity(
 
     if ( test_bit(_VPF_migrating, &v->pause_flags) )
     {
+        if (setting_affinity && v->domain->domain_id > 0)
+        {
+            printk("****************************************************\n");
+            printk("schedule:vcpu_set_affinity - pause flags is %lu\n", v->pause_flags);
+            printk("1 VPF_blocked is %d\n", test_bit(_VPF_blocked, &v->pause_flags));
+            printk("2 VPF_down is %d\n", test_bit(_VPF_down, &v->pause_flags));
+            printk("4 VPF_blocked_in_xen is %d\n", test_bit(_VPF_blocked_in_xen, &v->pause_flags));
+            printk("8 VPF_migrating is %d\n", test_bit(_VPF_migrating, &v->pause_flags));
+            printk("16 VPF_mem_paging is %d\n", test_bit(_VPF_mem_paging, &v->pause_flags));
+            printk("32 VPF_mem_access is %d\n", test_bit(_VPF_mem_access, &v->pause_flags));
+            printk("64 VPF_mem_sharing is %d\n", test_bit(_VPF_mem_sharing, &v->pause_flags));
+            printk("128 VPF_in_reset is %d\n", test_bit(_VPF_in_reset, &v->pause_flags));
+        }
+
+        printk("schedule:vcpu_set_affinity - calling vcpu_sleep_nosync\n");
         vcpu_sleep_nosync(v);
+        printk("schedule:vcpu_set_affinity - calling vcpu_migrate\n");
         vcpu_migrate(v);
+
+        if (setting_affinity && v->domain->domain_id > 0)
+        {
+            printk("schedule:vcpu_set_affinity - pause flags is %lu\n", v->pause_flags);
+            printk("1 VPF_blocked is %d\n", test_bit(_VPF_blocked, &v->pause_flags));
+            printk("2 VPF_down is %d\n", test_bit(_VPF_down, &v->pause_flags));
+            printk("4 VPF_blocked_in_xen is %d\n", test_bit(_VPF_blocked_in_xen, &v->pause_flags));
+            printk("8 VPF_migrating is %d\n", test_bit(_VPF_migrating, &v->pause_flags));
+            printk("16 VPF_mem_paging is %d\n", test_bit(_VPF_mem_paging, &v->pause_flags));
+            printk("32 VPF_mem_access is %d\n", test_bit(_VPF_mem_access, &v->pause_flags));
+            printk("64 VPF_mem_sharing is %d\n", test_bit(_VPF_mem_sharing, &v->pause_flags));
+            printk("128 VPF_in_reset is %d\n", test_bit(_VPF_in_reset, &v->pause_flags));
+        }
+
+        if (setting_affinity && v->domain->domain_id > 0)
+            printk("schedule:vcpu_set_affinity - Done changing hard affinity\n");
+
+        setting_affinity = 0;
     }
 
     return 0;
@@ -704,6 +768,9 @@ void vcpu_block(void)
 {
     struct vcpu *v = current;
 
+    if (setting_affinity && v->domain->domain_id > 0)
+        printk("schedule:vcpu_block - setting VPF_block\n");
+
     set_bit(_VPF_blocked, &v->pause_flags);
 
     /* Check for events /after/ blocking: avoids wakeup waiting race. */
@@ -739,6 +806,9 @@ static long do_poll(struct sched_poll *sched_poll)
     if ( !guest_handle_okay(sched_poll->ports, sched_poll->nr_ports) )
         return -EFAULT;
 
+    if (setting_affinity && v->domain->domain_id > 0)
+        printk("schedule:do_poll - setting VPF_block\n");
+
     set_bit(_VPF_blocked, &v->pause_flags);
     v->poll_evtchn = -1;
     set_bit(v->vcpu_id, d->poll_mask);
-- 
1.7.10.4