[Xen-devel] [PATCH v2 2/2] xen: sched: rtds: use non-atomic bit-ops

2016-06-26 Thread Tianyang Chen
Vcpu flags are checked and cleared atomically. Performance can be
improved with corresponding non-atomic versions since schedule.c
already has spin_locks in place.

Signed-off-by: Tianyang Chen <ti...@cis.upenn.edu>
---
Changes since v1:
-none
---
 xen/common/sched_rt.c |   16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 255d1f6..8ecc908 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -936,7 +936,7 @@ burn_budget(const struct scheduler *ops, struct rt_vcpu 
*svc, s_time_t now)
 if ( svc->cur_budget <= 0 )
 {
 svc->cur_budget = 0;
-set_bit(__RTDS_depleted, >flags);
+__set_bit(__RTDS_depleted, >flags);
 }
 
 /* TRACE */
@@ -1050,7 +1050,7 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
bool_t tasklet_work_sched
 if ( snext != scurr &&
  !is_idle_vcpu(current) &&
  vcpu_runnable(current) )
-set_bit(__RTDS_delayed_runq_add, >flags);
+__set_bit(__RTDS_delayed_runq_add, >flags);
 
 snext->last_start = now;
 ret.time =  -1; /* if an idle vcpu is picked */
@@ -1059,7 +1059,7 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
bool_t tasklet_work_sched
 if ( snext != scurr )
 {
 q_remove(snext);
-set_bit(__RTDS_scheduled, >flags);
+__set_bit(__RTDS_scheduled, >flags);
 }
 if ( snext->vcpu->processor != cpu )
 {
@@ -1093,7 +1093,7 @@ rt_vcpu_sleep(const struct scheduler *ops, struct vcpu 
*vc)
 replq_remove(ops, svc);
 }
 else if ( svc->flags & RTDS_delayed_runq_add )
-clear_bit(__RTDS_delayed_runq_add, >flags);
+__clear_bit(__RTDS_delayed_runq_add, >flags);
 }
 
 /*
@@ -1235,7 +1235,7 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
  */
 if ( unlikely(svc->flags & RTDS_scheduled) )
 {
-set_bit(__RTDS_delayed_runq_add, >flags);
+__set_bit(__RTDS_delayed_runq_add, >flags);
 /*
  * The vcpu is waking up already, and we didn't even had the time to
  * remove its next replenishment event from the replenishment queue
@@ -1266,12 +1266,12 @@ rt_context_saved(const struct scheduler *ops, struct 
vcpu *vc)
 struct rt_vcpu *svc = rt_vcpu(vc);
 spinlock_t *lock = vcpu_schedule_lock_irq(vc);
 
-clear_bit(__RTDS_scheduled, >flags);
+__clear_bit(__RTDS_scheduled, >flags);
 /* not insert idle vcpu to runq */
 if ( is_idle_vcpu(vc) )
 goto out;
 
-if ( test_and_clear_bit(__RTDS_delayed_runq_add, >flags) &&
+if ( __test_and_clear_bit(__RTDS_delayed_runq_add, >flags) &&
  likely(vcpu_runnable(vc)) )
 {
 runq_insert(ops, svc);
@@ -1447,7 +1447,7 @@ static void repl_timer_handler(void *data){
 runq_tickle(ops, next_on_runq);
 }
 else if ( vcpu_on_q(svc) &&
-  test_and_clear_bit(__RTDS_depleted, >flags) )
+  __test_and_clear_bit(__RTDS_depleted, >flags) )
 runq_tickle(ops, svc);
 
 list_del(>replq_elem);
-- 
1.7.9.5


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2 0/2] xen: sched: rtds clean-up

2016-06-26 Thread Tianyang Chen
The first part of this patch series aims at fixing coding syle issue
for control structures, as well as aligning comments in the structs.

Related discussions: 
http://lists.xenproject.org/archives/html/xen-devel/2016-03/msg01528.html

Because locks are grabbed in schedule.c before
hooks are called, underscores in front of function names are removed.

The second part replaces atomic bit-ops with non-atomic ones since locks
are grabbed in schedule.c.

Discussions:
http://lists.xenproject.org/archives/html/xen-devel/2016-03/msg01528.html
http://www.gossamer-threads.com/lists/xen/devel/431251?do=post_view_threaded#431251

Tianyang Chen (2):
  xen: sched: rtds code clean-up
  xen: sched: rtds: use non-atomic bit-ops

 xen/common/sched_rt.c |  106 ++---
 1 file changed, 56 insertions(+), 50 deletions(-)

-- 
1.7.9.5


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v2 1/2] xen: sched: rtds code clean-up

2016-06-26 Thread Tianyang Chen
No functional change:
 -aligned comments in rt_vcpu struct
 -removed double underscores from the names of some functions
 -fixed coding sytle for control structures involving lists
 -fixed typos in the comments
 -added comments for UPDATE_LIMIT_SHIFT

Signed-off-by: Tianyang Chen <ti...@cis.upenn.edu>
---
Changes since v1:
-added a detailed list of changes
---
 xen/common/sched_rt.c |   90 ++---
 1 file changed, 48 insertions(+), 42 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 7f8f411..255d1f6 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -80,7 +80,7 @@
  * in schedule.c
  *
  * The functions involes RunQ and needs to grab locks are:
- *vcpu_insert, vcpu_remove, context_saved, __runq_insert
+ *vcpu_insert, vcpu_remove, context_saved, runq_insert
  */
 
 
@@ -107,6 +107,12 @@
  */
 #define RTDS_MIN_BUDGET (MICROSECS(10))
 
+/*
+ * UPDATE_LIMIT_SHIFT: a constant used in rt_update_deadline(). When finding
+ * the next deadline, performing addition could be faster if the difference
+ * between cur_deadline and now is small. If the difference is bigger than
+ * 1024 * period, use multiplication.
+ */
 #define UPDATE_LIMIT_SHIFT  10
 
 /*
@@ -158,12 +164,12 @@
 static void repl_timer_handler(void *data);
 
 /*
- * Systme-wide private data, include global RunQueue/DepletedQ
+ * System-wide private data, include global RunQueue/DepletedQ
  * Global lock is referenced by schedule_data.schedule_lock from all
  * physical cpus. It can be grabbed via vcpu_schedule_lock_irq()
  */
 struct rt_private {
-spinlock_t lock;/* the global coarse grand lock */
+spinlock_t lock;/* the global coarse-grained lock */
 struct list_head sdom;  /* list of availalbe domains, used for dump */
 struct list_head runq;  /* ordered list of runnable vcpus */
 struct list_head depletedq; /* unordered list of depleted vcpus */
@@ -176,7 +182,7 @@ struct rt_private {
  * Virtual CPU
  */
 struct rt_vcpu {
-struct list_head q_elem;/* on the runq/depletedq list */
+struct list_head q_elem; /* on the runq/depletedq list */
 struct list_head replq_elem; /* on the replenishment events list */
 
 /* Up-pointers */
@@ -188,11 +194,11 @@ struct rt_vcpu {
 s_time_t budget;
 
 /* VCPU current infomation in nanosecond */
-s_time_t cur_budget;/* current budget */
-s_time_t last_start;/* last start time */
-s_time_t cur_deadline;  /* current deadline for EDF */
+s_time_t cur_budget; /* current budget */
+s_time_t last_start; /* last start time */
+s_time_t cur_deadline;   /* current deadline for EDF */
 
-unsigned flags; /* mark __RTDS_scheduled, etc.. */
+unsigned flags;  /* mark __RTDS_scheduled, etc.. */
 };
 
 /*
@@ -241,13 +247,13 @@ static inline struct list_head *rt_replq(const struct 
scheduler *ops)
  * and the replenishment events queue.
  */
 static int
-__vcpu_on_q(const struct rt_vcpu *svc)
+vcpu_on_q(const struct rt_vcpu *svc)
 {
return !list_empty(>q_elem);
 }
 
 static struct rt_vcpu *
-__q_elem(struct list_head *elem)
+q_elem(struct list_head *elem)
 {
 return list_entry(elem, struct rt_vcpu, q_elem);
 }
@@ -303,7 +309,7 @@ rt_dump_vcpu(const struct scheduler *ops, const struct 
rt_vcpu *svc)
 svc->cur_budget,
 svc->cur_deadline,
 svc->last_start,
-__vcpu_on_q(svc),
+vcpu_on_q(svc),
 vcpu_runnable(svc->vcpu),
 svc->flags,
 keyhandler_scratch);
@@ -339,28 +345,28 @@ rt_dump(const struct scheduler *ops)
 replq = rt_replq(ops);
 
 printk("Global RunQueue info:\n");
-list_for_each( iter, runq )
+list_for_each ( iter, runq )
 {
-svc = __q_elem(iter);
+svc = q_elem(iter);
 rt_dump_vcpu(ops, svc);
 }
 
 printk("Global DepletedQueue info:\n");
-list_for_each( iter, depletedq )
+list_for_each ( iter, depletedq )
 {
-svc = __q_elem(iter);
+svc = q_elem(iter);
 rt_dump_vcpu(ops, svc);
 }
 
 printk("Global Replenishment Events info:\n");
-list_for_each( iter, replq )
+list_for_each ( iter, replq )
 {
 svc = replq_elem(iter);
 rt_dump_vcpu(ops, svc);
 }
 
 printk("Domain info:\n");
-list_for_each( iter, >sdom )
+list_for_each ( iter, >sdom )
 {
 struct vcpu *v;
 
@@ -380,7 +386,7 @@ rt_dump(const struct scheduler *ops)
 
 /*
  * update deadline and budget when now >= cur_deadline
- * it need to be updated to the deadline of the current period
+ * it needs to be updated to the deadline of the current period
  */
 static void
 rt_update_deadline(s_time_t now, struct rt_vcpu *svc)
@@ -463,14 +469,14 @@ deadline_queue_insert(stru

[Xen-devel] [PATCH 2/2] xen: sched: rtds: use non-atomic bit-ops

2016-05-15 Thread Tianyang Chen
Vcpu flags are checked and cleared atomically. Performance can be
improved with corresponding non-atomic versions since schedule.c
already has spin_locks in place.

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
---
 xen/common/sched_rt.c |   16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 1584d53..1a18f6d 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -936,7 +936,7 @@ burn_budget(const struct scheduler *ops, struct rt_vcpu 
*svc, s_time_t now)
 if ( svc->cur_budget <= 0 )
 {
 svc->cur_budget = 0;
-set_bit(__RTDS_depleted, >flags);
+__set_bit(__RTDS_depleted, >flags);
 }
 
 /* TRACE */
@@ -1050,7 +1050,7 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
bool_t tasklet_work_sched
 if ( snext != scurr &&
  !is_idle_vcpu(current) &&
  vcpu_runnable(current) )
-set_bit(__RTDS_delayed_runq_add, >flags);
+__set_bit(__RTDS_delayed_runq_add, >flags);
 
 snext->last_start = now;
 ret.time =  -1; /* if an idle vcpu is picked */
@@ -1059,7 +1059,7 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
bool_t tasklet_work_sched
 if ( snext != scurr )
 {
 q_remove(snext);
-set_bit(__RTDS_scheduled, >flags);
+__set_bit(__RTDS_scheduled, >flags);
 }
 if ( snext->vcpu->processor != cpu )
 {
@@ -1093,7 +1093,7 @@ rt_vcpu_sleep(const struct scheduler *ops, struct vcpu 
*vc)
 replq_remove(ops, svc);
 }
 else if ( svc->flags & RTDS_delayed_runq_add )
-clear_bit(__RTDS_delayed_runq_add, >flags);
+__clear_bit(__RTDS_delayed_runq_add, >flags);
 }
 
 /*
@@ -1235,7 +1235,7 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
  */
 if ( unlikely(svc->flags & RTDS_scheduled) )
 {
-set_bit(__RTDS_delayed_runq_add, >flags);
+__set_bit(__RTDS_delayed_runq_add, >flags);
 /*
  * The vcpu is waking up already, and we didn't even had the time to
  * remove its next replenishment event from the replenishment queue
@@ -1266,12 +1266,12 @@ rt_context_saved(const struct scheduler *ops, struct 
vcpu *vc)
 struct rt_vcpu *svc = rt_vcpu(vc);
 spinlock_t *lock = vcpu_schedule_lock_irq(vc);
 
-clear_bit(__RTDS_scheduled, >flags);
+__clear_bit(__RTDS_scheduled, >flags);
 /* not insert idle vcpu to runq */
 if ( is_idle_vcpu(vc) )
 goto out;
 
-if ( test_and_clear_bit(__RTDS_delayed_runq_add, >flags) &&
+if ( __test_and_clear_bit(__RTDS_delayed_runq_add, >flags) &&
  likely(vcpu_runnable(vc)) )
 {
 runq_insert(ops, svc);
@@ -1447,7 +1447,7 @@ static void repl_timer_handler(void *data){
 runq_tickle(ops, next_on_runq);
 }
 else if ( vcpu_on_q(svc) &&
-  test_and_clear_bit(__RTDS_depleted, >flags) )
+  __test_and_clear_bit(__RTDS_depleted, >flags) )
 runq_tickle(ops, svc);
 
 list_del(>replq_elem);
-- 
1.7.9.5


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 1/2] xen: sched: rtds refactor code

2016-05-15 Thread Tianyang Chen
No functional change:
 -Various coding style fix
 -Added comments for UPDATE_LIMIT_SHIFT.

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
---
 xen/common/sched_rt.c |  106 ++---
 1 file changed, 56 insertions(+), 50 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 7f8f411..1584d53 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -80,7 +80,7 @@
  * in schedule.c
  *
  * The functions involes RunQ and needs to grab locks are:
- *vcpu_insert, vcpu_remove, context_saved, __runq_insert
+ *vcpu_insert, vcpu_remove, context_saved, runq_insert
  */
 
 
@@ -107,6 +107,12 @@
  */
 #define RTDS_MIN_BUDGET (MICROSECS(10))
 
+/*
+ * UPDATE_LIMIT_SHIT: a constant used in rt_update_deadline(). When finding
+ * the next deadline, performing addition could be faster if the difference
+ * between cur_deadline and now is small. If the difference is bigger than
+ * 1024 * period, use multiplication.
+ */
 #define UPDATE_LIMIT_SHIFT  10
 
 /*
@@ -158,25 +164,25 @@
 static void repl_timer_handler(void *data);
 
 /*
- * Systme-wide private data, include global RunQueue/DepletedQ
+ * System-wide private data, include global RunQueue/DepletedQ
  * Global lock is referenced by schedule_data.schedule_lock from all
  * physical cpus. It can be grabbed via vcpu_schedule_lock_irq()
  */
 struct rt_private {
-spinlock_t lock;/* the global coarse grand lock */
-struct list_head sdom;  /* list of availalbe domains, used for dump */
-struct list_head runq;  /* ordered list of runnable vcpus */
-struct list_head depletedq; /* unordered list of depleted vcpus */
-struct list_head replq; /* ordered list of vcpus that need 
replenishment */
-cpumask_t tickled;  /* cpus been tickled */
-struct timer *repl_timer;   /* replenishment timer */
+spinlock_t lock; /* the global coarse grand lock */
+struct list_head sdom;   /* list of availalbe domains, used for dump */
+struct list_head runq;   /* ordered list of runnable vcpus */
+struct list_head depletedq;  /* unordered list of depleted vcpus */
+struct list_head replq;  /* ordered list of vcpus that need 
replenishment */
+cpumask_t tickled;   /* cpus been tickled */
+struct timer *repl_timer;/* replenishment timer */
 };
 
 /*
  * Virtual CPU
  */
 struct rt_vcpu {
-struct list_head q_elem;/* on the runq/depletedq list */
+struct list_head q_elem; /* on the runq/depletedq list */
 struct list_head replq_elem; /* on the replenishment events list */
 
 /* Up-pointers */
@@ -188,19 +194,19 @@ struct rt_vcpu {
 s_time_t budget;
 
 /* VCPU current infomation in nanosecond */
-s_time_t cur_budget;/* current budget */
-s_time_t last_start;/* last start time */
-s_time_t cur_deadline;  /* current deadline for EDF */
+s_time_t cur_budget; /* current budget */
+s_time_t last_start; /* last start time */
+s_time_t cur_deadline;   /* current deadline for EDF */
 
-unsigned flags; /* mark __RTDS_scheduled, etc.. */
+unsigned flags;  /* mark __RTDS_scheduled, etc.. */
 };
 
 /*
  * Domain
  */
 struct rt_dom {
-struct list_head sdom_elem; /* link list on rt_priv */
-struct domain *dom; /* pointer to upper domain */
+struct list_head sdom_elem;  /* link list on rt_priv */
+struct domain *dom;  /* pointer to upper domain */
 };
 
 /*
@@ -241,13 +247,13 @@ static inline struct list_head *rt_replq(const struct 
scheduler *ops)
  * and the replenishment events queue.
  */
 static int
-__vcpu_on_q(const struct rt_vcpu *svc)
+vcpu_on_q(const struct rt_vcpu *svc)
 {
return !list_empty(>q_elem);
 }
 
 static struct rt_vcpu *
-__q_elem(struct list_head *elem)
+q_elem(struct list_head *elem)
 {
 return list_entry(elem, struct rt_vcpu, q_elem);
 }
@@ -303,7 +309,7 @@ rt_dump_vcpu(const struct scheduler *ops, const struct 
rt_vcpu *svc)
 svc->cur_budget,
 svc->cur_deadline,
 svc->last_start,
-__vcpu_on_q(svc),
+vcpu_on_q(svc),
 vcpu_runnable(svc->vcpu),
 svc->flags,
 keyhandler_scratch);
@@ -339,28 +345,28 @@ rt_dump(const struct scheduler *ops)
 replq = rt_replq(ops);
 
 printk("Global RunQueue info:\n");
-list_for_each( iter, runq )
+list_for_each ( iter, runq )
 {
-svc = __q_elem(iter);
+svc = q_elem(iter);
 rt_dump_vcpu(ops, svc);
 }
 
 printk("Global DepletedQueue info:\n");
-list_for_each( iter, depletedq )
+list_for_each ( iter, depletedq )
 {
-svc = __q_elem(iter);
+svc = q_elem(iter);
 rt_dump_vcpu(ops, svc);
 }
 
 printk("Global Replenishment Events info:\n");
-list_fo

[Xen-devel] [PATCH 0/2] xen: sched: rtds refactor code

2016-05-15 Thread Tianyang Chen
The first part of this patch series aims at fixing coding syle issue
for control structures. Because locks are grabbed in schedule.c before
hooks are called, underscores in front of function names are removed.

The second part replaces atomic bit-ops with non-atomic ones since locks
are grabbed in schedule.c.

Discussions:
http://lists.xenproject.org/archives/html/xen-devel/2016-03/msg01528.html
http://www.gossamer-threads.com/lists/xen/devel/431251?do=post_view_threaded#431251

Tianyang Chen (2):
  xen: sched: rtds refactor code
  xen: sched: rtds: use non-atomic bit-ops

 xen/common/sched_rt.c |  122 ++---
 1 file changed, 64 insertions(+), 58 deletions(-)

-- 
1.7.9.5


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH] xen: sched: rtds: refactor code

2016-05-11 Thread Tianyang Chen
No functional change:
 -Various coding style fix
 -Added comments for UPDATE_LIMIT_SHIFT.

Use non-atomic bit-ops:
 -Vcpu flags are checked and cleared atomically. Performance can be
  improved with corresponding non-atomic versions since schedule.c
  already has spin_locks in place.

Suggested-by: Dario Faggioli <dario.faggi...@citrix.com>
Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
---
 xen/common/sched_rt.c |  122 ++---
 1 file changed, 64 insertions(+), 58 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 7f8f411..1a18f6d 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -80,7 +80,7 @@
  * in schedule.c
  *
  * The functions involes RunQ and needs to grab locks are:
- *vcpu_insert, vcpu_remove, context_saved, __runq_insert
+ *vcpu_insert, vcpu_remove, context_saved, runq_insert
  */
 
 
@@ -107,6 +107,12 @@
  */
 #define RTDS_MIN_BUDGET (MICROSECS(10))
 
+/*
+ * UPDATE_LIMIT_SHIT: a constant used in rt_update_deadline(). When finding
+ * the next deadline, performing addition could be faster if the difference
+ * between cur_deadline and now is small. If the difference is bigger than
+ * 1024 * period, use multiplication.
+ */
 #define UPDATE_LIMIT_SHIFT  10
 
 /*
@@ -158,25 +164,25 @@
 static void repl_timer_handler(void *data);
 
 /*
- * Systme-wide private data, include global RunQueue/DepletedQ
+ * System-wide private data, include global RunQueue/DepletedQ
  * Global lock is referenced by schedule_data.schedule_lock from all
  * physical cpus. It can be grabbed via vcpu_schedule_lock_irq()
  */
 struct rt_private {
-spinlock_t lock;/* the global coarse grand lock */
-struct list_head sdom;  /* list of availalbe domains, used for dump */
-struct list_head runq;  /* ordered list of runnable vcpus */
-struct list_head depletedq; /* unordered list of depleted vcpus */
-struct list_head replq; /* ordered list of vcpus that need 
replenishment */
-cpumask_t tickled;  /* cpus been tickled */
-struct timer *repl_timer;   /* replenishment timer */
+spinlock_t lock; /* the global coarse grand lock */
+struct list_head sdom;   /* list of availalbe domains, used for dump */
+struct list_head runq;   /* ordered list of runnable vcpus */
+struct list_head depletedq;  /* unordered list of depleted vcpus */
+struct list_head replq;  /* ordered list of vcpus that need 
replenishment */
+cpumask_t tickled;   /* cpus been tickled */
+struct timer *repl_timer;/* replenishment timer */
 };
 
 /*
  * Virtual CPU
  */
 struct rt_vcpu {
-struct list_head q_elem;/* on the runq/depletedq list */
+struct list_head q_elem; /* on the runq/depletedq list */
 struct list_head replq_elem; /* on the replenishment events list */
 
 /* Up-pointers */
@@ -188,19 +194,19 @@ struct rt_vcpu {
 s_time_t budget;
 
 /* VCPU current infomation in nanosecond */
-s_time_t cur_budget;/* current budget */
-s_time_t last_start;/* last start time */
-s_time_t cur_deadline;  /* current deadline for EDF */
+s_time_t cur_budget; /* current budget */
+s_time_t last_start; /* last start time */
+s_time_t cur_deadline;   /* current deadline for EDF */
 
-unsigned flags; /* mark __RTDS_scheduled, etc.. */
+unsigned flags;  /* mark __RTDS_scheduled, etc.. */
 };
 
 /*
  * Domain
  */
 struct rt_dom {
-struct list_head sdom_elem; /* link list on rt_priv */
-struct domain *dom; /* pointer to upper domain */
+struct list_head sdom_elem;  /* link list on rt_priv */
+struct domain *dom;  /* pointer to upper domain */
 };
 
 /*
@@ -241,13 +247,13 @@ static inline struct list_head *rt_replq(const struct 
scheduler *ops)
  * and the replenishment events queue.
  */
 static int
-__vcpu_on_q(const struct rt_vcpu *svc)
+vcpu_on_q(const struct rt_vcpu *svc)
 {
return !list_empty(>q_elem);
 }
 
 static struct rt_vcpu *
-__q_elem(struct list_head *elem)
+q_elem(struct list_head *elem)
 {
 return list_entry(elem, struct rt_vcpu, q_elem);
 }
@@ -303,7 +309,7 @@ rt_dump_vcpu(const struct scheduler *ops, const struct 
rt_vcpu *svc)
 svc->cur_budget,
 svc->cur_deadline,
 svc->last_start,
-__vcpu_on_q(svc),
+vcpu_on_q(svc),
 vcpu_runnable(svc->vcpu),
 svc->flags,
 keyhandler_scratch);
@@ -339,28 +345,28 @@ rt_dump(const struct scheduler *ops)
 replq = rt_replq(ops);
 
 printk("Global RunQueue info:\n");
-list_for_each( iter, runq )
+list_for_each ( iter, runq )
 {
-svc = __q_elem(iter);
+svc = q_elem(iter);
 rt_dump_vcpu(ops, svc);
 }
 
 printk("Global DepletedQueue i

[Xen-devel] [PATCH v10]xen: sched: convert RTDS from time to event driven model

2016-03-20 Thread Tianyang Chen
The current RTDS code has several problems:
 - The scheduler, although the algorithm is event driven by
   nature, follows a time driven model (is invoked periodically!),
   making the code looks unnatural;
 - Budget replenishment logic, budget enforcement logic and scheduling
   decisions are mixed and entangled, making the code hard to understand;
 - The various queues of vcpus are scanned various times, making the
   code inefficient;

This patch separates budget replenishment and enforcement by adding
a replenishment timer, which fires at the next most imminent
release time of all runnable vcpus.

A replenishment queue has been added to keep track of all vcpus that
are runnable.

In the main scheduling function, we now return when a scheduling
decision should be made next time, such as when the budget of the
currently running vcpu runs out.

Finally, when waking up a vcpu, it tickles various vcpus appropriately,
like all other schedulers do.

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>
---
changes since v9:
Re-worded some of the comments
Fixed the wrong returned value from rt_schedule().

changes since v8:
Replaced spin_lock_irqsave with spin_lock_irq.
Bug fix in burn_budget() where budget == 0.
Removed unnecessary tickling in the timer handler when
vcpus have the same deadline.

changes since v7:
Coding sytle.
Added a flag to indicate that a vcpu was depleted.
Added a local list including updated vcpus in the
timer handler. It is used to decide which vcpu should
tickle.

changes since v6:
Removed unnecessary vcpu_on_q() checks for runq_insert()
Renamed replenishment queue related functions without
underscores
New replq_reinsert() function for rt_vcpu_wake()

changes since v5:
Removed unnecessary vcpu_on_replq() checks
deadline_queue_insert() returns a flag to indicate if it's
needed to re-program the timer
Removed unnecessary timer checks
Added helper functions to remove vcpus from queues
coding style

Changes since v4:
removed unnecessary replenishment queue checks in vcpu_wake()
extended replq_remove() to all cases in vcpu_sleep()
used _deadline_queue_insert() helper function for both queues
_replq_insert() and _replq_remove() program timer internally

Changes since v3:
removed running queue.
added repl queue to keep track of repl events.
timer is now per scheduler.
timer is init on a valid cpu in a cpupool.
---
 xen/common/sched_rt.c |  431 ++---
 1 file changed, 335 insertions(+), 96 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index bfed2e2..a43b8b8 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -3,7 +3,10 @@
  * EDF scheduling is a real-time scheduling algorithm used in embedded field.
  *
  * by Sisu Xi, 2013, Washington University in Saint Louis
- * and Meng Xu, 2014, University of Pennsylvania
+ * Meng Xu, 2014-2016, University of Pennsylvania
+ *
+ * Conversion toward event driven model by Tianyang Chen
+ * and Dagaen Golomb, 2016, University of Pennsylvania
  *
  * based on the code of credit Scheduler
  */
@@ -16,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -87,7 +91,7 @@
 #define RTDS_DEFAULT_BUDGET (MICROSECS(4000))
 
 #define UPDATE_LIMIT_SHIFT  10
-#define MAX_SCHEDULE(MILLISECS(1))
+
 /*
  * Flags
  */
@@ -115,6 +119,16 @@
 #define RTDS_delayed_runq_add (1<<__RTDS_delayed_runq_add)
 
 /*
+ * RTDS_depleted: Does this vcp run out of budget?
+ * This flag is
+ * + set in burn_budget() if a vcpu has zero budget left;
+ * + cleared and checked in the repenishment handler,
+ *   for the vcpus that are being replenished.
+ */
+#define __RTDS_depleted 3
+#define RTDS_depleted (1<<__RTDS_depleted)
+
+/*
  * rt tracing events ("only" 512 available!). Check
  * include/public/trace.h for more details.
  */
@@ -142,6 +156,13 @@ static cpumask_var_t *_cpumask_scratch;
  */
 static unsigned int nr_rt_ops;
 
+static void repl_timer_handler(void *data);
+
+#define deadline_runq_insert(...) \
+  deadline_queue_insert(&__q_elem, ##__VA_ARGS__)
+#define deadline_replq_insert(...) \
+  deadline_queue_insert(_elem, ##__VA_ARGS__)
+
 /*
  * Systme-wide private data, include global RunQueue/DepletedQ
  * Global lock is referenced by schedule_data.schedule_lock from all
@@ -152,7 +173,9 @@ struct rt_private {
 struct list_head sdom;  /* list of availalbe domains, used for dump */
 struct list_head runq;  /* ordered list of runnable vcpus */
 struct list_head depletedq; /* unordered list of depleted vcpus */
+struct list_head replq; /* ordered list of vcpus that need 
replenishment */
 cpumask_t tickled;  /* cpus been tickled */
+struct

[Xen-devel] [PATCH v11]xen: sched: convert RTDS from time to event driven model

2016-03-19 Thread Tianyang Chen
The current RTDS code has several problems:
 - the scheduler, although the algorithm is event driven by
   nature, follows a time driven model (is invoked periodically!),
   making the code look unnatural;
 - budget replenishment logic, budget enforcement logic and scheduling
   decisions are mixed and entangled, making the code hard to understand;
 - the various queues of vcpus are scanned various times, making the
   code inefficient;

This patch separates budget replenishment and enforcement. It does that
by handling the former with a dedicated timer, and a queue of pending
replenishment events.

A replenishment queue has been added to keep track of all vcpus that
are runnable.

We also make sure that the main scheduling function is called when a
scheduling decision is necessary, such as when the currently running
vcpu runs out of budget.

Finally, when waking up a vcpu, it is now enough to tickle the various
CPUs appropriately, like all other schedulers also do.

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>
---
changes since v10:
Re-worded commit messages.
Moved helper macros to where they belong.

changes since v9:
Re-worded some of the comments
Fixed the wrong returned value from rt_schedule().

changes since v8:
Replaced spin_lock_irqsave with spin_lock_irq.
Bug fix in burn_budget() where budget == 0.
Removed unnecessary tickling in the timer handler when
vcpus have the same deadline.

changes since v7:
Coding sytle.
Added a flag to indicate that a vcpu was depleted.
Added a local list including updated vcpus in the
timer handler. It is used to decide which vcpu should
tickle.

changes since v6:
Removed unnecessary vcpu_on_q() checks for runq_insert()
Renamed replenishment queue related functions without
underscores
New replq_reinsert() function for rt_vcpu_wake()

changes since v5:
Removed unnecessary vcpu_on_replq() checks
deadline_queue_insert() returns a flag to indicate if it's
needed to re-program the timer
Removed unnecessary timer checks
Added helper functions to remove vcpus from queues
coding style

Changes since v4:
removed unnecessary replenishment queue checks in vcpu_wake()
extended replq_remove() to all cases in vcpu_sleep()
used _deadline_queue_insert() helper function for both queues
_replq_insert() and _replq_remove() program timer internally

Changes since v3:
removed running queue.
added repl queue to keep track of repl events.
timer is now per scheduler.
timer is init on a valid cpu in a cpupool.
---
 xen/common/sched_rt.c |  429 ++---
 1 file changed, 333 insertions(+), 96 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index bfed2e2..fc3863e 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -3,7 +3,10 @@
  * EDF scheduling is a real-time scheduling algorithm used in embedded field.
  *
  * by Sisu Xi, 2013, Washington University in Saint Louis
- * and Meng Xu, 2014, University of Pennsylvania
+ * Meng Xu, 2014-2016, University of Pennsylvania
+ *
+ * Conversion toward event driven model by Tianyang Chen
+ * and Dagaen Golomb, 2016, University of Pennsylvania
  *
  * based on the code of credit Scheduler
  */
@@ -16,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -87,7 +91,7 @@
 #define RTDS_DEFAULT_BUDGET (MICROSECS(4000))
 
 #define UPDATE_LIMIT_SHIFT  10
-#define MAX_SCHEDULE(MILLISECS(1))
+
 /*
  * Flags
  */
@@ -115,6 +119,16 @@
 #define RTDS_delayed_runq_add (1<<__RTDS_delayed_runq_add)
 
 /*
+ * RTDS_depleted: Does this vcp run out of budget?
+ * This flag is
+ * + set in burn_budget() if a vcpu has zero budget left;
+ * + cleared and checked in the repenishment handler,
+ *   for the vcpus that are being replenished.
+ */
+#define __RTDS_depleted 3
+#define RTDS_depleted (1<<__RTDS_depleted)
+
+/*
  * rt tracing events ("only" 512 available!). Check
  * include/public/trace.h for more details.
  */
@@ -142,6 +156,8 @@ static cpumask_var_t *_cpumask_scratch;
  */
 static unsigned int nr_rt_ops;
 
+static void repl_timer_handler(void *data);
+
 /*
  * Systme-wide private data, include global RunQueue/DepletedQ
  * Global lock is referenced by schedule_data.schedule_lock from all
@@ -152,7 +168,9 @@ struct rt_private {
 struct list_head sdom;  /* list of availalbe domains, used for dump */
 struct list_head runq;  /* ordered list of runnable vcpus */
 struct list_head depletedq; /* unordered list of depleted vcpus */
+struct list_head replq; /* ordered list of vcpus that need 
replenishment */
 cpumask_t tickled;  /* cpus been tickled */
+struct timer *repl_timer;   /* replenishment timer */
 };
 
 /*
@@ -160,6 +1

[Xen-devel] [PATCH v9]xen: sched: convert RTDS from time to event driven model

2016-03-14 Thread Tianyang Chen
Budget replenishment and enforcement are separated by adding
a replenishment timer, which fires at the next most imminent
release time of all runnable vcpus.

A replenishment queue has been added to keep track of all vcpus that
are runnable.

The following functions have major changes to manage the replenishment
events and timer.

repl_handler(): It is a timer handler which is re-programmed
to fire at the nearest vcpu deadline to replenish vcpus.

rt_schedule(): picks the highest runnable vcpu based on cpu
affinity and ret.time will be passed to schedule(). If an idle
vcpu is picked, -1 is returned to avoid busy-waiting. repl_update()
has been removed.

rt_vcpu_wake(): when a vcpu wakes up, it tickles instead of
picking one from the run queue.

rt_context_saved(): when context switching is finished, the
preempted vcpu will be put back into the runq.

Simplified funtional graph:

schedule.c SCHEDULE_SOFTIRQ:
rt_schedule():
[spin_lock]
burn_budget(scurr)
snext = runq_pick()
[spin_unlock]

sched_rt.c TIMER_SOFTIRQ
replenishment_timer_handler()
[spin_lock]
<for_each_vcpu_on_q(i)> {
replenish(i)
runq_tickle(i)
}>
program_timer()
[spin_lock]

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>
---
changes since v8:
Replaced spin_lock_irqsave with spin_lock_irq.
Bug fix in burn_budget() where budget == 0.
Removed unnecessary tickling in the timer handler when
vcpus have the same deadline.

changes since v7:
Coding sytle.
Added a flag to indicate that a vcpu was depleted.
Added a local list including updated vcpus in the
timer handler. It is used to decide which vcpu should
tickle.

changes since v6:
Removed unnecessary vcpu_on_q() checks for runq_insert()
Renamed replenishment queue related functions without
underscores
New replq_reinsert() function for rt_vcpu_wake()

changes since v5:
Removed unnecessary vcpu_on_replq() checks
deadline_queue_insert() returns a flag to indicate if it's
needed to re-program the timer
Removed unnecessary timer checks
Added helper functions to remove vcpus from queues
coding style

Changes since v4:
removed unnecessary replenishment queue checks in vcpu_wake()
extended replq_remove() to all cases in vcpu_sleep()
used _deadline_queue_insert() helper function for both queues
_replq_insert() and _replq_remove() program timer internally

Changes since v3:
removed running queue.
added repl queue to keep track of repl events.
timer is now per scheduler.
timer is init on a valid cpu in a cpupool.
---
 xen/common/sched_rt.c |  437 ++---
 1 file changed, 341 insertions(+), 96 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index bfed2e2..b491915 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -3,7 +3,9 @@
  * EDF scheduling is a real-time scheduling algorithm used in embedded field.
  *
  * by Sisu Xi, 2013, Washington University in Saint Louis
- * and Meng Xu, 2014, University of Pennsylvania
+ * Meng Xu, 2014-2016, University of Pennsylvania
+ * Tianyang Chen, 2016, University of Pennsylvania
+ * Dagaen Golomb, 2016, University of Pennsylvania
  *
  * based on the code of credit Scheduler
  */
@@ -16,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -87,7 +90,7 @@
 #define RTDS_DEFAULT_BUDGET (MICROSECS(4000))
 
 #define UPDATE_LIMIT_SHIFT  10
-#define MAX_SCHEDULE(MILLISECS(1))
+
 /*
  * Flags
  */
@@ -115,6 +118,18 @@
 #define RTDS_delayed_runq_add (1<<__RTDS_delayed_runq_add)
 
 /*
+ * The replenishment timer handler needs to check this bit
+ * to know where a replenished vcpu was, when deciding which
+ * vcpu should tickle.
+ * A replenished vcpu should tickle if it was moved from the
+ * depleted queue to the run queue.
+ * + Set in burn_budget() if a vcpu has zero budget left.
+ * + Cleared and checked in the repenishment handler.
+ */
+#define __RTDS_was_depleted 3
+#define RTDS_was_depleted (1<<__RTDS_was_depleted)
+
+/*
  * rt tracing events ("only" 512 available!). Check
  * include/public/trace.h for more details.
  */
@@ -142,6 +157,9 @@ static cpumask_var_t *_cpumask_scratch;
  */
 static unsigned int nr_rt_ops;
 
+/* handler for the replenishment timer */
+static void repl_handler(void *data);
+
 /*
  * Systme-wide private data, include global RunQueue/DepletedQ
  * Global lock is referenced by schedule_data.schedule_lock from all
@@ -152,7 +170,9 @@ struct rt_private {
 struct list_head sdom;  /* list of availalbe domains, used for dump */
 struct list_head runq;  /* ordered list of runnable vcpus */
 struct list_head depletedq; /* unordered list

[Xen-devel] [PATCH v8]xen: sched: convert RTDS from time to event driven model

2016-03-11 Thread Tianyang Chen
Budget replenishment and enforcement are separated by adding
a replenishment timer, which fires at the next most imminent
release time of all runnable vcpus.

A replenishment queue has been added to keep track of all vcpus that
are runnable.

The following functions have major changes to manage the replenishment
events and timer.

repl_handler(): It is a timer handler which is re-programmed
to fire at the nearest vcpu deadline to replenish vcpus.

rt_schedule(): picks the highest runnable vcpu based on cpu
affinity and ret.time will be passed to schedule(). If an idle
vcpu is picked, -1 is returned to avoid busy-waiting. repl_update()
has been removed.

rt_vcpu_wake(): when a vcpu wakes up, it tickles instead of
picking one from the run queue.

rt_context_saved(): when context switching is finished, the
preempted vcpu will be put back into the runq.

Simplified funtional graph:

schedule.c SCHEDULE_SOFTIRQ:
rt_schedule():
[spin_lock]
burn_budget(scurr)
snext = runq_pick()
[spin_unlock]

sched_rt.c TIMER_SOFTIRQ
replenishment_timer_handler()
[spin_lock]
<for_each_vcpu_on_q(i)> {
replenish(i)
runq_tickle(i)
}>
program_timer()
[spin_lock]

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>
---
changes since v7:
Coding sytle.
Added a flag to indicate that a vcpu was depleted.
Added a local list including updated vcpus in the
timer handler. It is used to decide which vcpu should
tickle.

changes since v6:
Removed unnecessary vcpu_on_q() checks for runq_insert()
Renamed replenishment queue related functions without
underscores
New replq_reinsert() function for rt_vcpu_wake()

changes since v5:
Removed unnecessary vcpu_on_replq() checks
deadline_queue_insert() returns a flag to indicate if it's
needed to re-program the timer
Removed unnecessary timer checks
Added helper functions to remove vcpus from queues
coding style

Changes since v4:
removed unnecessary replenishment queue checks in vcpu_wake()
extended replq_remove() to all cases in vcpu_sleep()
used _deadline_queue_insert() helper function for both queues
_replq_insert() and _replq_remove() program timer internally

Changes since v3:
removed running queue.
added repl queue to keep track of repl events.
timer is now per scheduler.
timer is init on a valid cpu in a cpupool.
---
 xen/common/sched_rt.c |  435 ++---
 1 file changed, 340 insertions(+), 95 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index bfed2e2..58189cd 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -3,7 +3,9 @@
  * EDF scheduling is a real-time scheduling algorithm used in embedded field.
  *
  * by Sisu Xi, 2013, Washington University in Saint Louis
- * and Meng Xu, 2014, University of Pennsylvania
+ * Meng Xu, 2014-2016, University of Pennsylvania
+ * Tianyang Chen, 2016, University of Pennsylvania
+ * Dagaen Golomb, 2016, University of Pennsylvania
  *
  * based on the code of credit Scheduler
  */
@@ -16,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -87,7 +90,7 @@
 #define RTDS_DEFAULT_BUDGET (MICROSECS(4000))
 
 #define UPDATE_LIMIT_SHIFT  10
-#define MAX_SCHEDULE(MILLISECS(1))
+
 /*
  * Flags
  */
@@ -115,6 +118,18 @@
 #define RTDS_delayed_runq_add (1<<__RTDS_delayed_runq_add)
 
 /*
+ * The replenishment timer handler needs to check this bit
+ * to know where a replenished vcpu was, when deciding which
+ * vcpu should tickle.
+ * A replenished vcpu should tickle if it was moved from the
+ * depleted queue to the run queue.
+ * + Set in burn_budget() if a vcpu has zero budget left.
+ * + Cleared and checked in the repenishment handler.
+ */
+#define __RTDS_was_depleted 3
+#define RTDS_was_depleted (1<<__RTDS_was_depleted)
+
+/*
  * rt tracing events ("only" 512 available!). Check
  * include/public/trace.h for more details.
  */
@@ -142,6 +157,9 @@ static cpumask_var_t *_cpumask_scratch;
  */
 static unsigned int nr_rt_ops;
 
+/* handler for the replenishment timer */
+static void repl_handler(void *data);
+
 /*
  * Systme-wide private data, include global RunQueue/DepletedQ
  * Global lock is referenced by schedule_data.schedule_lock from all
@@ -152,7 +170,9 @@ struct rt_private {
 struct list_head sdom;  /* list of availalbe domains, used for dump */
 struct list_head runq;  /* ordered list of runnable vcpus */
 struct list_head depletedq; /* unordered list of depleted vcpus */
+struct list_head replq; /* ordered list of vcpus that need 
replenishment */
 cpumask_t tickled;  /* cpus been tickled */
+struct timer *repl_timer;   /* replenishm

[Xen-devel] [PATCH v7]xen: sched: convert RTDS from time to event driven model

2016-03-04 Thread Tianyang Chen
Budget replenishment and enforcement are separated by adding
a replenishment timer, which fires at the next most imminent
release time of all runnable vcpus.

A replenishment queue has been added to keep track of all replenishment
events.

The following functions have major changes to manage the replenishment
events and timer.

repl_handler(): It is a timer handler which is re-programmed
to fire at the nearest vcpu deadline to replenish vcpus.

rt_schedule(): picks the highest runnable vcpu based on cpu
affinity and ret.time will be passed to schedule(). If an idle
vcpu is picked, -1 is returned to avoid busy-waiting. repl_update()
has been removed.

rt_vcpu_wake(): when a vcpu wakes up, it tickles instead of
picking one from the run queue.

rt_context_saved(): when context switching is finished, the
preempted vcpu will be put back into the run queue and it tickles.

Simplified funtional graph:

schedule.c SCHEDULE_SOFTIRQ:
rt_schedule():
[spin_lock]
burn_budget(scurr)
snext = runq_pick()
[spin_unlock]

sched_rt.c TIMER_SOFTIRQ
replenishment_timer_handler()
[spin_lock]
<for_each_vcpu_on_q(i)> {
replenish(i)
runq_tickle(i)
}>
program_timer()
[spin_lock]

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>

---
changes since v6:
Removed unnecessary vcpu_on_q() checks for runq_insert()
Renamed replenishment queue related functions without
underscores
New replq_reinsert() function for rt_vcpu_wake()

changes since v5:
Removed unnecessary vcpu_on_replq() checks
deadline_queue_insert() returns a flag to indicate if it's
needed to re-program the timer
Removed unnecessary timer checks
Added helper functions to remove vcpus from queues
coding style

Changes since v4:
removed unnecessary replenishment queue checks in vcpu_wake()
extended replq_remove() to all cases in vcpu_sleep()
used _deadline_queue_insert() helper function for both queues
_replq_insert() and _replq_remove() program timer internally

Changes since v3:
removed running queue.
added repl queue to keep track of repl events.
timer is now per scheduler.
timer is init on a valid cpu in a cpupool.
---
 xen/common/sched_rt.c |  381 +
 1 file changed, 286 insertions(+), 95 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index bfed2e2..06803be 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -87,7 +88,7 @@
 #define RTDS_DEFAULT_BUDGET (MICROSECS(4000))
 
 #define UPDATE_LIMIT_SHIFT  10
-#define MAX_SCHEDULE(MILLISECS(1))
+
 /*
  * Flags
  */
@@ -142,6 +143,9 @@ static cpumask_var_t *_cpumask_scratch;
  */
 static unsigned int nr_rt_ops;
 
+/* handler for the replenishment timer */
+static void repl_handler(void *data);
+
 /*
  * Systme-wide private data, include global RunQueue/DepletedQ
  * Global lock is referenced by schedule_data.schedule_lock from all
@@ -152,7 +156,9 @@ struct rt_private {
 struct list_head sdom;  /* list of availalbe domains, used for dump */
 struct list_head runq;  /* ordered list of runnable vcpus */
 struct list_head depletedq; /* unordered list of depleted vcpus */
+struct list_head replq; /* ordered list of vcpus that need 
replenishment */
 cpumask_t tickled;  /* cpus been tickled */
+struct timer *repl_timer;   /* replenishment timer */
 };
 
 /*
@@ -160,6 +166,7 @@ struct rt_private {
  */
 struct rt_vcpu {
 struct list_head q_elem;/* on the runq/depletedq list */
+struct list_head replq_elem;/* on the repl event list */
 
 /* Up-pointers */
 struct rt_dom *sdom;
@@ -213,8 +220,14 @@ static inline struct list_head *rt_depletedq(const struct 
scheduler *ops)
 return _priv(ops)->depletedq;
 }
 
+static inline struct list_head *rt_replq(const struct scheduler *ops)
+{
+return _priv(ops)->replq;
+}
+
 /*
- * Queue helper functions for runq and depletedq
+ * Helper functions for manipulating the runqueue, the depleted queue,
+ * and the replenishment events queue.
  */
 static int
 __vcpu_on_q(const struct rt_vcpu *svc)
@@ -228,6 +241,18 @@ __q_elem(struct list_head *elem)
 return list_entry(elem, struct rt_vcpu, q_elem);
 }
 
+static struct rt_vcpu *
+replq_elem(struct list_head *elem)
+{
+return list_entry(elem, struct rt_vcpu, replq_elem);
+}
+
+static int
+vcpu_on_replq(const struct rt_vcpu *svc)
+{
+   return !list_empty(>replq_elem);
+}
+
 /*
  * Debug related code, dump vcpu/cpu information
  */
@@ -288,7 +313,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu)
 static void
 rt_dump(const struct scheduler *ops)
 {
-struc

Re: [Xen-devel] [PATCH v6][RFC]xen: sched: convert RTDS from time to event driven model

2016-02-26 Thread Tianyang Chen



On 2/26/2016 4:11 AM, Dario Faggioli wrote:

It looks like the current code doesn't add a vcpu to the
replenishment
queue when vcpu_insert() is called. When the scheduler is
initialized,
all the vcpus are added to the replenishment queue after waking up
from
sleep. This needs to be changed (add vcpu to replq in vcpu_insert())
to
make it consistent in a sense that when rt_vcpu_insert() is called,
it
needs to have a corresponding replenishment event queued.

This way the ASSERT() is for both cases in __runq_insert() to
enforce
the fact that "when a vcpu is inserted to runq/depletedq, a
replenishment event is waiting for it".


I am ok with this (calling replq_insert() in rt_vcpu_insert()). Not
just doing that unconditionally though, as a vcpu can, e.g., be paused
when the insert_vcpu hook is called, and in that case, I don't think we
want to enqueue the replenishment event, do we?



Yes.


static void
rt_context_saved(const struct scheduler *ops, struct vcpu *vc)
{
  struct rt_vcpu *svc = rt_vcpu(vc);
  spinlock_t *lock = vcpu_schedule_lock_irq(vc);

  clear_bit(__RTDS_scheduled, >flags);
  /* not insert idle vcpu to runq */
  if ( is_idle_vcpu(vc) )
  goto out;

  if ( test_and_clear_bit(__RTDS_delayed_runq_add, >flags)
&&
   likely(vcpu_runnable(vc)) )
  {
  __runq_insert(ops, svc);
  runq_tickle(ops, snext);
  }
  else
  __replq_remove(ops, svc);
out:
  vcpu_schedule_unlock_irq(lock, vc);
}

And, as said above, if you do this, try also removing the
__replq_remove() call from rt_vcpu_sleep(), this one should cover
for
that (and, actually, more than just that!).

After all this, check whether you still have the assert in
__replq_insert() triggering and let me know

So after moving the __replq_remove() to rt_context_saved(), the
assert
in __replq_insert() still fails when dom0 boots up.


Well, maybe removing __replq_remove() from rt_vcpu_sleep() is not
entirely ok, as if we do that we fail to deal with the case of when
(still in rt_vcpu_sleep()), __vcpu_on_q(svc) is true.

So, I'd say, do as I said above wrt rt_context_saved(). For
rt_vcpu_sleep(), you can try something like this:

static void
rt_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc)
{
 struct rt_vcpu * const svc = rt_vcpu(vc);

 BUG_ON( is_idle_vcpu(vc) );
 SCHED_STAT_CRANK(vcpu_sleep);

 if ( curr_on_cpu(vc->processor) == vc )
 cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
 else if ( __vcpu_on_q(svc) )
 {
 __q_remove(svc);
 __replq_remove(svc);  <=== *** LOOK HERE ***
 }
 else if ( svc->flags & RTDS_delayed_runq_add )
 clear_bit(__RTDS_delayed_runq_add, >flags);
}

In fact, in both the first and the third case, we go will at some point
pass through rt_context_switch(), and hit the __replq_remove() that I
made you put there.

In the case in the middle, as the vcpu was just queued, and for making
it go to sleep, it is enough to remove it from the runq (or depletedq,
in the case of this scheduler), we won't go through
rt_schedule()-->rt_context_saved(), and hence the __replq_remove()
won't be called.

Sorry for the overlook, can you try this.

That being said...


(XEN) Assertion '!vcpu_on_replq(svc)' failed at sched_rt.c:524
(XEN) [ Xen-4.7-unstable  x86_64  debug=y  Tainted:C ]

(XEN) Xen call trace:
(XEN)[] sched_rt.c#__replq_insert+0x2b/0x64
(XEN)[] sched_rt.c#rt_vcpu_wake+0xf2/0x12c
(XEN)[] vcpu_wake+0x213/0x3d4
(XEN)[] vcpu_unblock+0x4b/0x4d
(XEN)[] vcpu_kick+0x20/0x6f
(XEN)[] vcpu_mark_events_pending+0x2c/0x2f
(XEN)[]
event_2l.c#evtchn_2l_set_pending+0xa9/0xb9
(XEN)[] evtchn_send+0x158/0x183
(XEN)[] do_event_channel_op+0xe21/0x147d
(XEN)[] lstar_enter+0xe2/0x13c
(XEN)


... This says that the we call __replq_insert() from rt_vcpu_wake() and
find out in there that vcpu_on_replq() is true.

However, in v6 code for rt_vcpu_wake(), I can see this:

+if( !__vcpu_on_replq(svc) )
+{
+__replq_insert(ops, svc);
+ASSERT( vcpu_runnable(vc) );
+}

which would make me think that, if vcpu_on_replq() is true, we
shouldn't be calling __replq_insert() in the first place.

So, have you made other changes wrt v6 when trying this?


The v6 doesn't have the if statement commented out when I submitted it. 
But I tried commenting it out, the assertion failed.


It fails in V6 with:
rt_vcpu_sleep(): removing replenishment event for all cases
rt_context_saved(): not removing replenishment events
rt_vcpu_wake(): not checking if the event is already queued.

or with:
rt_vcpu_sleep(): not removing replenishment event at all
rt_context_saved(): removing replenishment events if not runnable
rt_vcpu_wake(): not checking if the event is already queued.

Also with:
rt_vcpu_sleep(): removing replenishment event if the vcpu is on 
runq/depletedq

rt_context_saved(): removing replenishment events if not runnable
rt_vcpu_wake(): not 

Re: [Xen-devel] [PATCH v6][RFC]xen: sched: convert RTDS from time to event driven model

2016-02-25 Thread Tianyang Chen



On 2/25/2016 6:31 PM, Dario Faggioli wrote:

Hey again,

Thanks for turning up so quickly.

We are getting closer and closer, although (of course :-)) I have some
more comments.

However, is there a particular reason why you are keeping the RFC tag?
Until you do that, it's like saying that you are chasing feedback, but
you do not think yourself the patch should be considered for being
upstreamed. As far as my opinion goes, this patch is not ready to go in
right now (as I said, I've got questions and comments), but its status
is way past RFC.



Oh OK, I had the impression that RFC means request for comments and it 
should always be used because indeed, I'm asking for comments.



On Thu, 2016-02-25 at 15:05 -0500, Tianyang Chen wrote:

changes since v5:
 removed unnecessary vcpu_on_replq() checks
 deadline_queue_insert() returns a flag to indicate if it's
 needed to re-program the timer
 removed unnecessary timer checks
 added helper functions to remove vcpus from queues
 coding style

Changes since v4:
 removed unnecessary replenishment queue checks in vcpu_wake()
 extended replq_remove() to all cases in vcpu_sleep()
 used _deadline_queue_insert() helper function for both queues
 _replq_insert() and _replq_remove() program timer internally

Changes since v3:
 removed running queue.
 added repl queue to keep track of repl events.
 timer is now per scheduler.
 timer is init on a valid cpu in a cpupool.


So, this does not belong here. It is ok to have it in this part of the
email, but it should not land in the actual commit changelog, once the
patch will be committed into Xen's git tree.

The way to achieve the above is to put this summary of changes below
the actual changelog, and below the Signed-of-by lines, after a marker
that looks like this "---".


Budget replenishment and enforcement are separated by adding
a replenishment timer, which fires at the next most imminent
release time of all runnable vcpus.

A new runningq has been added to keep track of all vcpus that
are on pcpus.


Mmm.. Is this the proper changelog? runningq is something we discussed,
and that appeared in v2, but is certainly no longer here... :-O



oops...


Wait, maybe you misunderstood and/or I did not make myself clear enough
(in which case, sorry). I never meant to say "always stop the timer".
Atually, in one of my last email I said the opposite, and I think that
would be the best thing to do.

Do you think there is any specific reason why we need to always stop
and restart it? If not, I think we can:
  - have deadline_queue_remove() also return whether the element
removed was the first one (i.e., the one the timer was programmed
after);
  - if it was, re-program the timer after the new front of the queue;
  - if it wasn't, nothing to do.

Thoughts?



It was my thought originally that the timer needs to be re-programmed 
only when the top vcpu is taken off. So did you mean when I manipulated 
repl_timer->expires manually, the timer should be stopped using proper 
timer API? The manipulation is gone now. Also, set_timer internally 
disables the timer so I assume it's safe just to call set_timer() 
directly, right?



@@ -405,22 +500,38 @@ __runq_insert(const struct scheduler *ops,
struct rt_vcpu *svc)

  /* add svc to runq if svc still has budget */
  if ( svc->cur_budget > 0 )
-{
-list_for_each(iter, runq)
-{
-struct rt_vcpu * iter_svc = __q_elem(iter);
-if ( svc->cur_deadline <= iter_svc->cur_deadline )
-break;
- }
-list_add_tail(>q_elem, iter);
-}
+deadline_queue_insert(&__q_elem, svc, >q_elem, runq);
  else
  {
  list_add(>q_elem, >depletedq);
+ASSERT( __vcpu_on_replq(svc) );


So, by doing this, you're telling me that, if the vcpu is added to the
depleted queue, there must be a replenishment planned for it (or the
ASSERT() would fail).

But if we are adding it to the runq, there may or may not be a
replenishment planned for it.

Is this what we want? Why there must not be a replenishment planned
already for a vcpu going into the runq (to happen at its next
deadline)?



It looks like the current code doesn't add a vcpu to the replenishment 
queue when vcpu_insert() is called. When the scheduler is initialized, 
all the vcpus are added to the replenishment queue after waking up from 
sleep. This needs to be changed (add vcpu to replq in vcpu_insert()) to 
make it consistent in a sense that when rt_vcpu_insert() is called, it 
needs to have a corresponding replenishment event queued.


This way the ASSERT() is for both cases in __runq_insert() to enforce 
the fact that "when a vcpu is inserted to runq/depletedq, a 
replenishment event is waiting for it".



@@ -1087,10 +1193,6 @@ static void
  rt_context_saved(const struct scheduler *ops, struct vcpu *vc)
  {
  stru

[Xen-devel] [PATCH v6][RFC]xen: sched: convert RTDS from time to event driven model

2016-02-25 Thread Tianyang Chen
changes since v5:
removed unnecessary vcpu_on_replq() checks
deadline_queue_insert() returns a flag to indicate if it's
needed to re-program the timer
removed unnecessary timer checks
added helper functions to remove vcpus from queues
coding style

Changes since v4:
removed unnecessary replenishment queue checks in vcpu_wake()
extended replq_remove() to all cases in vcpu_sleep()
used _deadline_queue_insert() helper function for both queues
_replq_insert() and _replq_remove() program timer internally

Changes since v3:
removed running queue.
added repl queue to keep track of repl events.
timer is now per scheduler.
timer is init on a valid cpu in a cpupool.

Budget replenishment and enforcement are separated by adding
a replenishment timer, which fires at the next most imminent
release time of all runnable vcpus.

A new runningq has been added to keep track of all vcpus that
are on pcpus.

The following functions have major changes to manage the runningq
and replenishment

repl_handler(): It is a timer handler which is re-programmed
to fire at the nearest vcpu deadline to replenish vcpus on runq,
depeletedq and runningq. When the replenishment is done, each
replenished vcpu should tickle a pcpu to see if it needs to
preempt any running vcpus.

rt_schedule(): picks the highest runnable vcpu based on cpu
affinity and ret.time will be passed to schedule(). If an idle
vcpu is picked, -1 is returned to avoid busy-waiting. repl_update()
has been removed.

rt_vcpu_wake(): when a vcpu is awake, it tickles instead of
picking one from runq. When a vcpu wakes up, it might reprogram
the timer if it has a more recent release time.

rt_context_saved(): when context switching is finished, the
preempted vcpu will be put back into the runq. Runningq is
updated and picking from runq and tickling are removed.

Simplified funtional graph:

schedule.c SCHEDULE_SOFTIRQ:
rt_schedule():
[spin_lock]
burn_budget(scurr)
snext = runq_pick()
[spin_unlock]

sched_rt.c TIMER_SOFTIRQ
replenishment_timer_handler()
[spin_lock]
<for_each_vcpu_on_q(i)> {
replenish(i)
runq_tickle(i)
}>
program_timer()
[spin_lock]

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>
---
 xen/common/sched_rt.c |  328 -
 1 file changed, 244 insertions(+), 84 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 2e5430f..16f77f9 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -87,7 +88,7 @@
 #define RTDS_DEFAULT_BUDGET (MICROSECS(4000))
 
 #define UPDATE_LIMIT_SHIFT  10
-#define MAX_SCHEDULE(MILLISECS(1))
+
 /*
  * Flags
  */
@@ -142,6 +143,9 @@ static cpumask_var_t *_cpumask_scratch;
  */
 static unsigned int nr_rt_ops;
 
+/* handler for the replenishment timer */
+static void repl_handler(void *data);
+
 /*
  * Systme-wide private data, include global RunQueue/DepletedQ
  * Global lock is referenced by schedule_data.schedule_lock from all
@@ -152,7 +156,9 @@ struct rt_private {
 struct list_head sdom;  /* list of availalbe domains, used for dump */
 struct list_head runq;  /* ordered list of runnable vcpus */
 struct list_head depletedq; /* unordered list of depleted vcpus */
+struct list_head replq; /* ordered list of vcpus that need 
replenishment */
 cpumask_t tickled;  /* cpus been tickled */
+struct timer *repl_timer;   /* replenishment timer */
 };
 
 /*
@@ -160,6 +166,7 @@ struct rt_private {
  */
 struct rt_vcpu {
 struct list_head q_elem;/* on the runq/depletedq list */
+struct list_head replq_elem;/* on the repl event list */
 
 /* Up-pointers */
 struct rt_dom *sdom;
@@ -213,8 +220,14 @@ static inline struct list_head *rt_depletedq(const struct 
scheduler *ops)
 return _priv(ops)->depletedq;
 }
 
+static inline struct list_head *rt_replq(const struct scheduler *ops)
+{
+return _priv(ops)->replq;
+}
+
 /*
- * Queue helper functions for runq and depletedq
+ * Queue helper functions for runq, depletedq
+ * and replenishment event queue
  */
 static int
 __vcpu_on_q(const struct rt_vcpu *svc)
@@ -228,6 +241,18 @@ __q_elem(struct list_head *elem)
 return list_entry(elem, struct rt_vcpu, q_elem);
 }
 
+static struct rt_vcpu *
+__replq_elem(struct list_head *elem)
+{
+return list_entry(elem, struct rt_vcpu, replq_elem);
+}
+
+static int
+__vcpu_on_replq(const struct rt_vcpu *svc)
+{
+   return !list_empty(>replq_elem);
+}
+
 /*
  * Debug related code, dump vcpu/cpu information
  */
@@ -288,7 +313,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu)
 static void
 rt_dump(const s

Re: [Xen-devel] [PATCH v5][RFC]xen: sched: convert RTDS from time to event driven model

2016-02-25 Thread Tianyang Chen



On 2/25/2016 5:34 AM, Dario Faggioli wrote:

+ * it should be re-inserted back to the replenishment queue.
+ */
+if ( now >= svc->cur_deadline)
+{
+rt_update_deadline(now, svc);
+__replq_remove(ops, svc);
+}
+
+if( !__vcpu_on_replq(svc) )
+__replq_insert(ops, svc);
+

And here I am again: is it really necessary to check whether svc is
not
in the replenishment queue? It looks to me that it really should
not be
there... but maybe it can, because we remove the event from the
queue
when the vcpu sleeps, but *not* when the vcpu blocks?

Yeah. That is the case where I keep getting assertion failure if
it's
removed.


Which one ASSERT() fails?

The replq_insert() fails because it's already on the replenishment queue 
when rt_vcpu_wake() is trying to insert a vcpu again.


(XEN) Assertion '!__vcpu_on_replq(svc)' failed at sched_rt.c:527
(XEN) [ Xen-4.7-unstable  x86_64  debug=y  Tainted:C ]
(XEN) CPU:0
(XEN) RIP:e008:[] sched_rt.c#rt_vcpu_wake+0xf0/0x17f
(XEN) RFLAGS: 00010002   CONTEXT: hypervisor (d0v0)
(XEN) rax: 0001   rbx: 83023b522940   rcx: 0001
(XEN) rdx: 0031bb1b9980   rsi: 82d080342318   rdi: 83023b486ca0
(XEN) rbp: 8300bfcffd88   rsp: 8300bfcffd58   r8:  0004
(XEN) r9:  deadbeef   r10: 82d08025f5c0   r11: 0206
(XEN) r12: 83023b486ca0   r13: 8300bfd46000   r14: 82d080299b80
(XEN) r15: 83023b522d80   cr0: 80050033   cr4: 000406a0
(XEN) cr3: 000231c0d000   cr2: 880001e80ba8
(XEN) ds:    es:    fs:    gs:    ss: e010   cs: e008
(XEN) Xen stack trace from rsp=8300bfcffd58:
(XEN)8300bfcffd70 8300bfd46000 000216110572 83023b522940
(XEN)82d08032bc00 0282 8300bfcffdd8 82d08012be0c
(XEN)83023b4b5000 83023b4f1000 8300bfd47000 8300bfd46000
(XEN) 83023b4b4280 00014440 0001
(XEN)8300bfcffde8 82d08012c327 8300bfcffe08 82d080169cea
(XEN)83023b4b5000 000a 8300bfcffe18 82d080169d65
(XEN)8300bfcffe38 82d08010762a 83023b4b4280 83023b4b5000
(XEN)8300bfcffe68 82d08010822a 8300bfcffe68 fff2
(XEN)88022056dcb4 880230c34440 8300bfcffef8 82d0801096fc
(XEN)8300bfcffef8 8300bfcfff18 8300bfcffef8 82d080240e85
(XEN)88020001  0246 810013aa
(XEN)000a 810013aa e030 8300bfd47000
(XEN)8802206597f0 880230c34440 00014440 0001
(XEN)7cff403000c7 82d0802439e2 8100140a 0020
(XEN)88022063c7d0 88022063c7d0 0001 dca0
(XEN)88022056dcb8 880230c34440 0206 0004
(XEN)8802230001a0 880220619000 0020 8100140a
(XEN) 88022056dcb4 0004 00010100
(XEN)8100140a e033 0206 88022056dc90
(XEN)e02b   
(XEN) Xen call trace:
(XEN)[] sched_rt.c#rt_vcpu_wake+0xf0/0x17f
(XEN)[] vcpu_wake+0x213/0x3d4
(XEN)[] vcpu_unblock+0x4b/0x4d
(XEN)[] vcpu_kick+0x20/0x6f
(XEN)[] vcpu_mark_events_pending+0x2c/0x2f
(XEN)[] event_2l.c#evtchn_2l_set_pending+0xa9/0xb9
(XEN)[] evtchn_send+0x158/0x183
(XEN)[] do_event_channel_op+0xe21/0x147d
(XEN)[] lstar_enter+0xe2/0x13c
(XEN)
(XEN)
(XEN) 
(XEN) Panic on CPU 0:
(XEN) Assertion '!__vcpu_on_replq(svc)' failed at sched_rt.c:527
(XEN) 

I'm thinking when
a vcpu unblocks, it could potentially fall through here.


Well, when unblocking, wake() is certainly called, yes.


And like you
said, mostly spurious sleep
happens when a vcpu is running and it could happen in other cases,
although rare.


I think I said already there's no such thing as "spurious sleep". Or at
least, I can't think of anything that I would define a spurious sleep,
if you do, please, explain what situation you're referring to.

I meant to say spurious wakeup... If rt_vcpu_sleep() removes vcpus from 
replenishment queue, it's perfectly safe for rt_vcpu_wake() to insert 
them back. So, I'm suspecting it's the spurious wakeup that's causing 
troubles because vcpus are not removed prior to rt_vcpu_wake(). However, 
the two RETURNs at the beginning of rt_vcpu_wake() should catch that 
shouldn't it?

In any case, one way of dealing with vcpus blocking/offlining/etc could
be to, in context_saved(), in case we are not adding the vcpu back to
the runq, cancel its replenishment event with __replq_remove().

(This may make it possible to avoid doing it in rt_vcpu_sleep() too,
but you'll need to check and test.)

Can 

Re: [Xen-devel] [PATCH v5][RFC]xen: sched: convert RTDS from time to event driven model

2016-02-24 Thread Tianyang Chen



On 2/24/2016 9:02 PM, Dario Faggioli wrote:

Hey,

Here I am, sorry for the delay. :-(

No problem, I think we are almost there.


On Mon, 2016-02-08 at 23:33 -0500, Tianyang Chen wrote:

Changes since v4:
 removed unnecessary replenishment queue checks in vcpu_wake()
 extended replq_remove() to all cases in vcpu_sleep()
 used _deadline_queue_insert() helper function for both queues
 _replq_insert() and _replq_remove() program timer internally

Changes since v3:
 removed running queue.
 added repl queue to keep track of repl events.
 timer is now per scheduler.
 timer is init on a valid cpu in a cpupool.

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>


So, the actual changelog... why did it disappear? :-)


I will add it in the next version.


diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 2e5430f..1f0bb7b 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc)
  }
  
  /*

+ * Removing a vcpu from the replenishment queue could
+ * re-program the timer for the next replenishment event
+ * if the timer is currently active
+ */
+static inline void
+__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc)
+{
+struct rt_private *prv = rt_priv(ops);
+struct list_head *replq = rt_replq(ops);
+struct timer* repl_timer = prv->repl_timer;
+
+if ( __vcpu_on_replq(svc) )
+{

So, can this be false? If yes, when and why?


+/*
+ * disarm the timer if removing the first replenishment
event
+ * which is going to happen next
+ */
+if( active_timer(repl_timer) )
+{


And here again, isn't it the case that, if there is a vcpu in the
replenishment queue, then the timer _must_ be active? (I.e., if the
above is true, isn't this also always true?)


So, the reason for this check is that the code inside timer handler also 
calls this function when
timer is stopped. We don't want to start the timer inside the timer 
handler when it's manipulating
the replenishment queue right? Because it could be removing/adding back 
more than one vcpu

and the timer should only be set at the very end of the timer handler.

In fact, I copied a static function from timer.c just to check if a 
timer is active or not. Not sure if

this is a good practice or not.


+struct rt_vcpu *next_repl = __replq_elem(replq->next);
+
+if( next_repl->cur_deadline == svc->cur_deadline )
+repl_timer->expires = 0;
+

I think we need to call stop_timer() here, don't we? I know that
set_timer() does that itself, but I think it is indeed necessary. E.g.,
right now, you're manipulating the timer without the timer lock.


So when the timer handler is protected by a global lock, it is still 
necessary to stop the timer?

Also, the timer only runs on a single CPU for a scheduler too.


+list_del_init(>replq_elem);
+
+/* re-arm the timer for the next replenishment event */
+if( !list_empty(replq) )
+{
+struct rt_vcpu *svc_next = __replq_elem(replq-

next);

+set_timer(repl_timer, svc_next->cur_deadline);
+}
+}
+
+else
+list_del_init(>replq_elem);


I don't like the structure of this function, and I'd ask for a
different if/then/else logic to be used, but let's first see --by
answering the questions above-- if some of these if-s can actually go
away. :-)


 So this function reduces to:

   
/*

 * disarm the timer if removing the first replenishment event
 * which is going to happen next
 */
if( active_timer(repl_timer) )
{
stop_timer(replq_timer);
repl_timer->expires = 0;

list_del_init(>replq_elem);

/* re-arm the timer for the next replenishment event */
if( !list_empty(replq) )
{
struct rt_vcpu *svc_next = __replq_elem(replq->next);
set_timer(repl_timer, svc_next->cur_deadline);
}
}

else
list_del_init(>replq_elem);



+}
+}
+
+/*
+ * An utility function that inserts a vcpu to a
+ * queue based on certain order (EDF)


End long comments with a full stop.


+ */
+static void
+_deadline_queue_insert(struct rt_vcpu * (*_get_q_elem)(struct
list_head *elem),


There's really a lot of "underscore-prefixing" in this file.

This is, of course, not your fault, and should not be addressed in this
patch. But, at least when it comes to new code, let's avoid making
things worse.

So, "deadline_queue_insert()" is just ok, I think. Yes, I know I did
suggest to call it how it's called in this patch, underscore included,
but I now think it would be better to get 

Re: [Xen-devel] [PATCH v5][RFC]xen: sched: convert RTDS from time to event driven model

2016-02-17 Thread Tianyang Chen



On 2/15/2016 10:55 PM, Meng Xu wrote:

Hi Tianyang,

Thanks for the patch! Great work and really quick action! :-)
I will just comment on something I quickly find out and would look
forwarding to Dario's comment.

On Mon, Feb 8, 2016 at 11:33 PM, Tianyang Chen <ti...@seas.upenn.edu
<mailto:ti...@seas.upenn.edu>> wrote:
 > Changes since v4:
 > removed unnecessary replenishment queue checks in vcpu_wake()
 > extended replq_remove() to all cases in vcpu_sleep()
 > used _deadline_queue_insert() helper function for both queues
 > _replq_insert() and _replq_remove() program timer internally
 >
 > Changes since v3:
 > removed running queue.
 > added repl queue to keep track of repl events.
 > timer is now per scheduler.
 > timer is init on a valid cpu in a cpupool.
 >
 > Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu
<mailto:ti...@seas.upenn.edu>>
 > Signed-off-by: Meng Xu <men...@cis.upenn.edu
<mailto:men...@cis.upenn.edu>>
 > Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu
<mailto:dgol...@seas.upenn.edu>>
 > ---
 >  xen/common/sched_rt.c |  337
-
 >  1 file changed, 251 insertions(+), 86 deletions(-)
 >
 > diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
 > index 2e5430f..1f0bb7b 100644
 > --- a/xen/common/sched_rt.c
 > +++ b/xen/common/sched_rt.c
 > @@ -16,6 +16,7 @@
 >  #include 
 >  #include 
 >  #include 
 > +#include 
 >  #include 
 >  #include 
 >  #include 
 > @@ -87,7 +88,7 @@
 >  #define RTDS_DEFAULT_BUDGET (MICROSECS(4000))
 >
 >  #define UPDATE_LIMIT_SHIFT  10
 > -#define MAX_SCHEDULE(MILLISECS(1))
 > +
 >  /*
 >   * Flags
 >   */
 > @@ -142,6 +143,12 @@ static cpumask_var_t *_cpumask_scratch;
 >   */
 >  static unsigned int nr_rt_ops;
 >
 > +/* handler for the replenishment timer */
 > +static void repl_handler(void *data);
 > +
 > +/* checks if a timer is active or not */
 > +bool_t active_timer(struct timer* t);
 > +
 >  /*
 >   * Systme-wide private data, include global RunQueue/DepletedQ
 >   * Global lock is referenced by schedule_data.schedule_lock from all
 > @@ -152,7 +159,9 @@ struct rt_private {
 >  struct list_head sdom;  /* list of availalbe domains, used
for dump */
 >  struct list_head runq;  /* ordered list of runnable vcpus */
 >  struct list_head depletedq; /* unordered list of depleted vcpus */
 > +struct list_head replq; /* ordered list of vcpus that need
replenishment */
 >  cpumask_t tickled;  /* cpus been tickled */
 > +struct timer *repl_timer;   /* replenishment timer */
 >  };
 >
 >  /*
 > @@ -160,6 +169,7 @@ struct rt_private {
 >   */
 >  struct rt_vcpu {
 >  struct list_head q_elem;/* on the runq/depletedq list */
 > +struct list_head replq_elem;/* on the repl event list */
 >
 >  /* Up-pointers */
 >  struct rt_dom *sdom;
 > @@ -213,8 +223,14 @@ static inline struct list_head
*rt_depletedq(const struct scheduler *ops)
 >  return _priv(ops)->depletedq;
 >  }
 >
 > +static inline struct list_head *rt_replq(const struct scheduler *ops)
 > +{
 > +return _priv(ops)->replq;
 > +}
 > +
 >  /*
 > - * Queue helper functions for runq and depletedq
 > + * Queue helper functions for runq, depletedq
 > + * and replenishment event queue
 >   */
 >  static int
 >  __vcpu_on_q(const struct rt_vcpu *svc)
 > @@ -228,6 +244,18 @@ __q_elem(struct list_head *elem)
 >  return list_entry(elem, struct rt_vcpu, q_elem);
 >  }
 >
 > +static struct rt_vcpu *
 > +__replq_elem(struct list_head *elem)
 > +{
 > +return list_entry(elem, struct rt_vcpu, replq_elem);
 > +}
 > +
 > +static int
 > +__vcpu_on_replq(const struct rt_vcpu *svc)
 > +{
 > +   return !list_empty(>replq_elem);
 > +}
 > +
 >  /*
 >   * Debug related code, dump vcpu/cpu information
 >   */
 > @@ -288,7 +316,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu)
 >  static void
 >  rt_dump(const struct scheduler *ops)
 >  {
 > -struct list_head *runq, *depletedq, *iter;
 > +struct list_head *runq, *depletedq, *replq, *iter;
 >  struct rt_private *prv = rt_priv(ops);
 >  struct rt_vcpu *svc;
 >  struct rt_dom *sdom;
 > @@ -301,6 +329,7 @@ rt_dump(const struct scheduler *ops)
 >
 >  runq = rt_runq(ops);
 >  depletedq = rt_depletedq(ops);
 > +replq = rt_replq(ops);
 >
 >  printk("Global RunQueue info:\n");
 >  list_for_each( iter, runq )
 > @@ -316,6 +345,13 @@ rt_dump(const struct scheduler *ops)
 >  rt_dump_vcpu(ops, svc);
 >  }
 >
 > +

[Xen-devel] [PATCH v5][RFC]xen: sched: convert RTDS from time to event driven model

2016-02-08 Thread Tianyang Chen
Changes since v4:
removed unnecessary replenishment queue checks in vcpu_wake()
extended replq_remove() to all cases in vcpu_sleep()
used _deadline_queue_insert() helper function for both queues
_replq_insert() and _replq_remove() program timer internally

Changes since v3:
removed running queue.
added repl queue to keep track of repl events.
timer is now per scheduler.
timer is init on a valid cpu in a cpupool.

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>
---
 xen/common/sched_rt.c |  337 -
 1 file changed, 251 insertions(+), 86 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 2e5430f..1f0bb7b 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -87,7 +88,7 @@
 #define RTDS_DEFAULT_BUDGET (MICROSECS(4000))
 
 #define UPDATE_LIMIT_SHIFT  10
-#define MAX_SCHEDULE(MILLISECS(1))
+
 /*
  * Flags
  */
@@ -142,6 +143,12 @@ static cpumask_var_t *_cpumask_scratch;
  */
 static unsigned int nr_rt_ops;
 
+/* handler for the replenishment timer */
+static void repl_handler(void *data);
+
+/* checks if a timer is active or not */
+bool_t active_timer(struct timer* t);
+
 /*
  * Systme-wide private data, include global RunQueue/DepletedQ
  * Global lock is referenced by schedule_data.schedule_lock from all
@@ -152,7 +159,9 @@ struct rt_private {
 struct list_head sdom;  /* list of availalbe domains, used for dump */
 struct list_head runq;  /* ordered list of runnable vcpus */
 struct list_head depletedq; /* unordered list of depleted vcpus */
+struct list_head replq; /* ordered list of vcpus that need 
replenishment */
 cpumask_t tickled;  /* cpus been tickled */
+struct timer *repl_timer;   /* replenishment timer */
 };
 
 /*
@@ -160,6 +169,7 @@ struct rt_private {
  */
 struct rt_vcpu {
 struct list_head q_elem;/* on the runq/depletedq list */
+struct list_head replq_elem;/* on the repl event list */
 
 /* Up-pointers */
 struct rt_dom *sdom;
@@ -213,8 +223,14 @@ static inline struct list_head *rt_depletedq(const struct 
scheduler *ops)
 return _priv(ops)->depletedq;
 }
 
+static inline struct list_head *rt_replq(const struct scheduler *ops)
+{
+return _priv(ops)->replq;
+}
+
 /*
- * Queue helper functions for runq and depletedq
+ * Queue helper functions for runq, depletedq
+ * and replenishment event queue
  */
 static int
 __vcpu_on_q(const struct rt_vcpu *svc)
@@ -228,6 +244,18 @@ __q_elem(struct list_head *elem)
 return list_entry(elem, struct rt_vcpu, q_elem);
 }
 
+static struct rt_vcpu *
+__replq_elem(struct list_head *elem)
+{
+return list_entry(elem, struct rt_vcpu, replq_elem);
+}
+
+static int
+__vcpu_on_replq(const struct rt_vcpu *svc)
+{
+   return !list_empty(>replq_elem);
+}
+
 /*
  * Debug related code, dump vcpu/cpu information
  */
@@ -288,7 +316,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu)
 static void
 rt_dump(const struct scheduler *ops)
 {
-struct list_head *runq, *depletedq, *iter;
+struct list_head *runq, *depletedq, *replq, *iter;
 struct rt_private *prv = rt_priv(ops);
 struct rt_vcpu *svc;
 struct rt_dom *sdom;
@@ -301,6 +329,7 @@ rt_dump(const struct scheduler *ops)
 
 runq = rt_runq(ops);
 depletedq = rt_depletedq(ops);
+replq = rt_replq(ops);
 
 printk("Global RunQueue info:\n");
 list_for_each( iter, runq )
@@ -316,6 +345,13 @@ rt_dump(const struct scheduler *ops)
 rt_dump_vcpu(ops, svc);
 }
 
+printk("Global Replenishment Event info:\n");
+list_for_each( iter, replq )
+{
+svc = __replq_elem(iter);
+rt_dump_vcpu(ops, svc);
+}
+
 printk("Domain info:\n");
 list_for_each( iter, >sdom )
 {
@@ -388,6 +424,66 @@ __q_remove(struct rt_vcpu *svc)
 }
 
 /*
+ * Removing a vcpu from the replenishment queue could
+ * re-program the timer for the next replenishment event
+ * if the timer is currently active
+ */
+static inline void
+__replq_remove(const struct scheduler *ops, struct rt_vcpu *svc)
+{
+struct rt_private *prv = rt_priv(ops);
+struct list_head *replq = rt_replq(ops);
+struct timer* repl_timer = prv->repl_timer;
+
+if ( __vcpu_on_replq(svc) )
+{
+/*
+ * disarm the timer if removing the first replenishment event
+ * which is going to happen next
+ */
+if( active_timer(repl_timer) )
+{
+struct rt_vcpu *next_repl = __replq_elem(replq->next);
+
+if( next_repl->cur_deadline == svc->cur_deadline )
+repl_timer->expires = 0;
+
+list_del_init(>re

Re: [Xen-devel] [PATCH v4] xen: sched: convert RTDS from time to event driven model

2016-02-08 Thread Tianyang Chen



On 2/8/2016 6:27 AM, Dario Faggioli wrote:

On Fri, 2016-02-05 at 23:27 -0500, Tianyang Chen wrote:


On 2/5/2016 9:39 AM, Dario Faggioli wrote:

On Wed, 2016-02-03 at 21:23 -0500, Tianyang Chen wrote:



I see. So I'm just curious what can cause spurious wakeup? Does it
only
happen to a running vcpu (currently on pcpu), and a vcpu that is on
either runq or depletedq?


Hehe, "that's virt, baby!" :-P

No, seriously, vcpu being already running or already in a queue, is
indeed what makes me call a wakeup a "spurious" wakeup. You can check
at the performance counters that we update in those cases, to see when
they happen most. In my experience, on Credit, I've seen them happening
mostly when a vcpu is running. For instance:

root@Zhaman:~# xenperf |grep vcpu_wake
sched: vcpu_wake_runningT=39
sched: vcpu_wake_onrunq T= 0
sched: vcpu_wake_runnable   T= 59875
sched: vcpu_wake_not_runnable   T= 0


So we realized that this is just a spurious wakeup, and get back to
what we were doing.

What's wrong with this I just said?



The reason why I wanted to add a vcpu back to replq is because it
could
be taken off from the timer handler. Now, because of the spurious
wakeup, I think the timer shouldn't take any vcpus off of replq, in
which case wake() should be doing nothing just like other schedulers
when it's a spurious wakeup. Also, only sleep() should remove events
from replq.


Err... well, that depends on the how the code ends up looking like, and
it's start to become difficult to reason on it like this.

Perhaps, considering that it looks to me that we are in agreement on
all the important aspects, you can draft a new version and we'll reason
and comment on top of that?


Mmm... no, I think we should know/figure out whether a
replenishment is
pending already and we are ok with it, or if we need a new one.



Just to make sure, spurious sleep/wakeup are for a vcpu that is on
pcpu
or any queue right?


Yes, spurious wakeup is how I'm calling wakeups that needs no action
from the schedule, because the vcpu is already awake (and either
running or runnable).

I don't think there is such a thing as spurious sleep... When sleep is
called, we go to sleep. There are cases where the sleep-->wakeup
turnaround is really really fast, but I would not call them "spurious
sleeps", and they should not need much special treatment, not in
sched_rt.c at least.

Oh, maybe you mean the cases where you wakeup and you find out that
context_saved() has not run yet (just occurred by looking at the code
below)? Well, yes... But I wouldn't call those "spurious sleeps"
either.


If a real sleep happens, it should be taken off from replq. However,
in
spurious wakeup (which I assume corresponds to a spurious sleep?),
it
shouldn't be taken off from replq. Adding back to replq should
happen
for those vcpus which were taken off because of "real sleep".


I don't really follow, but I have the feeling that you're making it
sound more complicated like it is in reality... :-)



So my reasoning is that, when sleep() is called in sched_rt.c, a vcpu 
will need to be taken off from the replenishment event queue. However, a 
vcpu can be put to sleep/not-runnable without even calling sleep(), 
which corresponds to a later "spurious wakeup". Is there any way that 
RTDS can know when this happens? If not, in rt_vcpu_wake(), it needs to 
check, in all situations, if a vcpu is on the replenishment event queue 
or not. If not, add it back. I'm I right?


vcpu running  --> sleep(), taken off from replq  
vcpu on queue --> sleep(), taken off from replq
vcpu not on queue --> sleep(), taken off from replq


However,

vcpu running/on queque/not on queue --> just not runnable anymore (still 
on replq) --> spurious wakeup (still on replq)


vcpus shouldn't be added back to replq again.

So in all cases, rt_vcpu_wake() always need to check if a vcpu is on the 
replq or not.


Now I think because of the addition of a replenishment queue, spurious 
wake up cannot be simply treated as NOP. Just like V4, the "Out" label 
basically maintains the replenishment queue.


Anyways, I agree it's gonna be easier to reason on v5 where all changes 
are applied.


Thanks,
Tianyang

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v4] xen: sched: convert RTDS from time to event driven model

2016-02-05 Thread Tianyang Chen



On 2/5/2016 9:39 AM, Dario Faggioli wrote:

On Wed, 2016-02-03 at 21:23 -0500, Tianyang Chen wrote:


On 2/3/2016 7:39 AM, Dario Faggioli wrote:

On Tue, 2016-02-02 at 13:09 -0500, Tianyang Chen wrote:



So what do we do, we raise the *_delayed_runq_add flag and continue
and
leave the vcpu alone. This happens, for instance, when there is a
race
between scheduling out a vcpu that is going to sleep, and the vcpu
in
question being woken back up already! It's not frequent, but we
need to
take care of that. At some point (sooner rather tan later, most
likely)
the context switch will actually happen, and the vcpu that is
waking up
is put in the runqueue.



Yeah I agree with this situation. But I meant to say when a vcpu is
waking up while it's still on a queue. See below.


So, if a vcpu wakes up while it is running on a pcpu already, or while
it is already woken and has already been put in the runqueue, these are
SPURIOUS WAKEUP, which should be dealt with by just doing nothing, as
every scheduler is doing This is what I'm tring to say since a
couple of email, let's see if I've said it clear enough this time. :-D



I see. So I'm just curious what can cause spurious wakeup? Does it only 
happen to a running vcpu (currently on pcpu), and a vcpu that is on 
either runq or depletedq?



static void
rt_vcpu_wake(const struct scheduler *ops, struct vcpu *vc)
{
  struct rt_vcpu * const svc = rt_vcpu(vc);
  s_time_t now = NOW();
  struct rt_private *prv = rt_priv(ops);

  BUG_ON( is_idle_vcpu(vc) );

  if ( unlikely(curr_on_cpu(vc->processor) == vc) )
  {
  SCHED_STAT_CRANK(vcpu_wake_running);
  return;
  }



For this situation, a vcpu is waking up on a pcpu. It could be taken
off from the replq inside the timer handler at the end. So, if we
decide
to not replenish any sleeping/un-runnable vcpus any further, we
should
probably add it back to replq here right?


I'm sorry, I don't understand what you're saying here. The vcpu woke up
a few time ago. We went through here. At the time it was not on any
pcpu and it was not in the runqueue, so we did plan a replenishment
event for it in the replenishment queue, at it's next deadline.

Now we're here again, for some reason. We figure out that the vcpu is
running already and, most likely (feel free to add an ASSERT() inside
the if to that effect), it will also still have its replenishment event
queued.

So we realized that this is just a spurious wakeup, and get back to
what we were doing.

What's wrong with this I just said?



The reason why I wanted to add a vcpu back to replq is because it could 
be taken off from the timer handler. Now, because of the spurious 
wakeup, I think the timer shouldn't take any vcpus off of replq, in 
which case wake() should be doing nothing just like other schedulers 
when it's a spurious wakeup. Also, only sleep() should remove events 
from replq.



  /* on RunQ/DepletedQ, just update info is ok */
  if ( unlikely(__vcpu_on_q(svc)) )
  {
  SCHED_STAT_CRANK(vcpu_wake_onrunq);
  return;
  }



For this situation, a vcpu could be going through the following
phases:

on runq --> sleep --> (for a long time) --> wake up


Well, but then we're not here, because when it went to sleep, it's been
taken off the runqueue, hasn't it?

Actually, I do think that you need to be running to go to sleep, but
anyway, even if a vcpu could go to sleep or block from the runqueue,
it's not going to stay in the runqueue, and the first wakeup that
occurs does not find it on the runqueue, and hence is not covered by
this case... Is it?



uh, I missed the __q_remove() from sleep. It will not end up here in the 
case I mentioned.



When it wakes up, it could stay in the front of the runq because
cur_deadline hasn't been updated. It will, inherently have some high
priority-ness (because of EDF) and be picked over other vcpus right?
Do
we want to update it's deadline and budget here and re-insert it
accordingly?


When it wakes up and it is found not to be either running or in the
runqueue already, yes, we certainly want to do that!

But if some other (again, spurious) wakeup occurs, and find it already
in the runqueue (i.e., this case) we actually *don't* want to update
its deadline again.



Agree.


  if ( likely(vcpu_runnable(vc)) )
  SCHED_STAT_CRANK(vcpu_wake_runnable);
  else
  SCHED_STAT_CRANK(vcpu_wake_not_runnable);


  if ( now >= svc->cur_deadline)
  {
  rt_update_deadline(now, svc);
  __replq_remove(svc);
  }

  __replq_insert(svc);



For here, consider this scenario:

on pcpu (originally on replq) --> sleep --> context_saved() but
still
asleep --> wake()

Timer handler isn't triggered before it awakes. Then this vcpu will
be
added to replq again while it's already there. I'm not 100% positive
if
this situation is possible though. But judging from
rt_context_saved(),
it checks if a vcp

Re: [Xen-devel] [PATCH v4] xen: sched: convert RTDS from time to event driven model

2016-02-03 Thread Tianyang Chen



On 2/3/2016 7:39 AM, Dario Faggioli wrote:

On Tue, 2016-02-02 at 13:09 -0500, Tianyang Chen wrote:

On 2/2/2016 10:08 AM, Dario Faggioli wrote:

On Sun, 2016-01-31 at 23:32 -0500, Tianyang Chen wrote:


+struct rt_private *prv = rt_priv(ops);
+struct list_head *replq = rt_replq(ops);
+struct list_head *iter;
+
+ASSERT( spin_is_locked(>lock) );
+

Is this really useful? Considering where this is called, I don't
think
it is.



This basically comes from __q_insert().


I see. Well it's still not necessary, IMO, so don't add it. At some
point, we may want to remove it from __runq_insert() too.

The bottom line is: prv->lock is, currently, the runqueue lock (it's
the lock for everything! :-D). It is pretty obvious that you need the
runq lock to manipulate the runqueue.

It is not the runqueue that you are manipulating here, it is the
replenishment queue, but as a matter of fact, prv->lock serializes that
too. So, if anything, it would be more useful to add a comment
somewhere, noting (reinforcing, I should probably say) the point that
also this new queue for replenishment is being serialized by prv->lock,
than an assert like this one.



Sure


I have been searching for the
definitions of vcpu_schedule_lock_irq() and pcpu_schedule_lock_irq().
Do
you know where they are defined? I did some name search in the
entire
xen directory but still nothing.


They are auto-generated. Look in xen/include/xen/sched-if.h.


+ASSERT( !__vcpu_on_p(svc) );
+
+list_for_each(iter, replq)
+{
+struct rt_vcpu * iter_svc = __p_elem(iter);
+if ( svc->cur_deadline <= iter_svc->cur_deadline )
+break;
+}
+
+list_add_tail(>p_elem, iter);
+}

This looks ok. The list_for_each()+list_ad_tail() part would be
basically identical to what we have inside __runq_insert(),
thgough,
wouldn't it?

It may make sense to define an _ordered_queue_insert() (or
_deadline_queue_insert(), since it's alwas the deadline that is
compared) utility function that we can then call in both cases.


@@ -449,6 +507,7 @@ rt_init(struct scheduler *ops)
   INIT_LIST_HEAD(>sdom);
   INIT_LIST_HEAD(>runq);
   INIT_LIST_HEAD(>depletedq);
+INIT_LIST_HEAD(>replq);


Is there a reason why you don't do at least the allocation of the
timer
here? Because, to me, this looks the best place for that.


   cpumask_clear(>tickled);

@@ -473,6 +532,9 @@ rt_deinit(const struct scheduler *ops)
   xfree(_cpumask_scratch);
   _cpumask_scratch = NULL;
   }
+
+kill_timer(prv->repl_timer);
+
   xfree(prv);


And here, you're freeing prv, without having freed prv->timer,
which is
therefore leaked.


@@ -632,6 +699,17 @@ rt_vcpu_insert(const struct scheduler *ops,
struct vcpu *vc)
   vcpu_schedule_unlock_irq(lock, vc);

   SCHED_STAT_CRANK(vcpu_insert);
+
+if( prv->repl_timer == NULL )
+{
+/* vc->processor has been set in schedule.c */
+cpu = vc->processor;
+
+repl_timer = xzalloc(struct timer);
+
+prv->repl_timer = repl_timer;
+init_timer(repl_timer, repl_handler, (void *)ops, cpu);
+}
   }


This belong to rt_init, IMO.



When a new pool is created, the corresponding scheduler is also
initialized. But there is no pcpu assigned to that pool.


Sure, and in fact, above, when commenting on rt_init() I said "Is there
a reason why you don't do at least the allocation of the timer here?"

But you're right, here I put it like all should belong to rt_init(), I
should have been more clear.


If init_timer()
happens in rt_init, how does it know which cpu to pick?
When move_domain() is called, there must be some pcpus in the
destination pool and then vcpu_insert happens.


Allocating memory for the rt_priv copy of this particular scheduler
instance, definitely belong in rt_init().

Then, in this case, we could do the allocation in rt_init() and do the
initialization later, perhaps here, or the first time that the timer
needs to be started, or when the first pCPU is assigned to this
scheduler (by being assigned to the cpupool this scheduler services).

I honestly prefer the latter. That is to say, in rt_alloc_pdata(), you
check whether prv->repl_timer is NULL and you allocate and initialize
it for the pCPU being brought up. I also would put an explicit 'prv-

repl_timer=NULL', with a comment that proper allocation and

initialization will happen later, and the reason why that is the case,
in rt_init().

What do you think?



Yeah it makes sense to put some comments in rt_init to remind readers 
that the timer will be taken care of later in alloc_pdata().



@@ -841,7 +881,6 @@ rt_schedule(const struct scheduler *ops,
s_time_t
now, bool_t tasklet_work_sched
   /* burn_budget would return for IDLE VCPU */
   burn_budget(ops, scurr, now);

-__repl_update(ops, now);


__repl_update() going away is of course fine. But we need to enact
the
budget

Re: [Xen-devel] [Bug] sched: credit2: Assertion failed

2016-02-02 Thread Tianyang Chen



On 2/2/2016 3:29 AM, Dario Faggioli wrote:

On Tue, 2016-02-02 at 01:18 -0500, Tianyang Chen wrote:

The following script caused an unresponsive dom0 and it can not be
reproduced all the time. The dom0 is using credit2 scheduler.

#!/bin/bash
xl cpupool-list -c
xl cpupool-cpu-remove Pool-0 3
xl cpupool-cpu-remove Pool-0 2
xl cpupool-create name=\"r\" sched=\"credit\"
xl cpupool-create name=\"t\" sched=\"credit\"
xl cpupool-cpu-add r 3
xl cpupool-cpu-add t 2


Yes, this is known. It is because Credit2 does not support (hard)
affinity. There is a series on the list by someone implementing that,
but it needs refreshing, which I hope to get round to it soon.


Another bug can be reproduced(not all the time) if the newly created
pool is also credit2. There is an assertion failure (see bug.txt),
which
is essentially the same thing I got when trying to make the new rtds
scheduler cpupool safe.


This is also known, and it will be fixed when I'll get back and
resubmit this:

http://lists.xenproject.org/archives/html/xen-devel/2015-09/msg03811.html

In fact, look at this patch:
http://lists.xenproject.org/archives/html/xen-devel/2015-09/msg03813.html

And I'll do that right in this days, so I'd say you can ignore this for
RTDS (as it will get fixed by the same series).

So both are known issues... I'll open bugs in our bugtraker for them to
avoid forgetting or tripping over them again.



Got it, thanks for the heads up.

--
Tianyang

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v4] xen: sched: convert RTDS from time to event driven model

2016-02-02 Thread Tianyang Chen



On 2/2/2016 10:08 AM, Dario Faggioli wrote:

On Sun, 2016-01-31 at 23:32 -0500, Tianyang Chen wrote:

v4 is meant for discussion on the addition of replq.


In which cases, you should always put something like RFC in the
subject, so people knows what the intent is even by just skimming their
inboxes.


Got it.

Changes since v3:
removed running queue.
added repl queue to keep track of repl events.
timer is now per scheduler.
timer is init on a valid cpu in a cpupool.

Bugs to be fixed: Cpupool and locks. When a pcpu is removed from a
pool and added to another, the lock equality assert in free_pdata()
fails when Pool-0 is using rtds.


Known issue. I will fix it for both Credit2 and RTDS, so just ignore
it.



Yep. I was wondering if I should fix this before sending out v4.


diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 2e5430f..c36e5de 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -16,6 +16,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -87,7 +88,7 @@
  #define RTDS_DEFAULT_BUDGET (MICROSECS(4000))

  #define UPDATE_LIMIT_SHIFT  10
-#define MAX_SCHEDULE(MILLISECS(1))
+


Unrelated to the topic. Can we have a dedicated patch that adds a
comment above UPDATE_LIMIT_SHIFT, explainig what it is and how it's
used.

This is something low priority, of course.



Sure.


@@ -160,6 +166,7 @@ struct rt_private {
   */
  struct rt_vcpu {
  struct list_head q_elem;/* on the runq/depletedq list */
+struct list_head p_elem;/* on the repl event list */


Mmm... 'p_elem' because the previous one was 'q_elem', and 'p' follows
'q', I guess. It feels a bit obscure, though, and I'd prefer a more
'talking' name.

For now, and at this level, I guess I can live with it. But in other
places, things need to be improved (see below).


@@ -213,8 +220,14 @@ static inline struct list_head
*rt_depletedq(const struct scheduler *ops)
  return _priv(ops)->depletedq;
  }

+static inline struct list_head *rt_replq(const struct scheduler
*ops)
+{
+return _priv(ops)->replq;
+}
+
  /*
- * Queue helper functions for runq and depletedq
+ * Queue helper functions for runq, depletedq
+ * and repl event q
   */


So, in comments, can we use full words as much as possible. It makes
them a lot easier to read (so, e.g., "replenishment events queue" or
"queue of the replenishment events").

I know this very file's style is not like that, from this point of
view... and, in fact, this is something I would like to change, when we
get the chance (i.e., when touching the code for other reasons, like in
this case).


@@ -228,6 +241,18 @@ __q_elem(struct list_head *elem)
  return list_entry(elem, struct rt_vcpu, q_elem);
  }

+static struct rt_vcpu *
+__p_elem(struct list_head *elem)
+{
+return list_entry(elem, struct rt_vcpu, p_elem);
+}
+

So, while this may well still be fine...


+static int
+__vcpu_on_p(const struct rt_vcpu *svc)
+{
+   return !list_empty(>p_elem);
+}
+


...here I really would like to go for something that will make it much
more obvious, just by giving a look at the code, where we are actually
checking the vcpu to be, without having to remember that p is the
replenishment queue.

So, I don't know, maybe something like vcpu_on_replq(), or
vcpu_needs_replenish() ?


@@ -387,6 +412,13 @@ __q_remove(struct rt_vcpu *svc)
  list_del_init(>q_elem);
  }

+static inline void
+__p_remove(struct rt_vcpu *svc)
+{
+if ( __vcpu_on_p(svc) )
+list_del_init(>p_elem);
+}
+

replq_remove(), or vcpu_repl_cancel() (or something else) ?


@@ -421,6 +453,32 @@ __runq_insert(const struct scheduler *ops,
struct rt_vcpu *svc)
  }

  /*
+ * Insert svc into the repl even list:
+ * vcpus that needs to be repl earlier go first.
+ */
+static void
+__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
+{


This is exactly what I've been trying to say above: __replq_insert() is
ok, much better than __q_insert()! Do the same everywhere else. :-)



Indeed, some of the names are confusing the first time I looked at RTDS.


+struct rt_private *prv = rt_priv(ops);
+struct list_head *replq = rt_replq(ops);
+struct list_head *iter;
+
+ASSERT( spin_is_locked(>lock) );
+

Is this really useful? Considering where this is called, I don't think
it is.



This basically comes from __q_insert(). I have been searching for the 
definitions of vcpu_schedule_lock_irq() and pcpu_schedule_lock_irq(). Do 
you know where they are defined? I did some name search in the entire 
xen directory but still nothing.



+ASSERT( !__vcpu_on_p(svc) );
+
+list_for_each(iter, replq)
+{
+struct rt_vcpu * iter_svc = __p_elem(iter);
+if ( svc->cur_deadline <= iter_svc->cur_deadline )
+break;
+}
+
+list_add_tail(>p_elem, iter);
+}

This looks ok. The list_for_each()+list_ad_tail() part w

[Xen-devel] [Bug] sched: credit2: Assertion failed

2016-02-01 Thread Tianyang Chen


The following script caused an unresponsive dom0 and it can not be 
reproduced all the time. The dom0 is using credit2 scheduler.


#!/bin/bash
xl cpupool-list -c
xl cpupool-cpu-remove Pool-0 3
xl cpupool-cpu-remove Pool-0 2
xl cpupool-create name=\"r\" sched=\"credit\"
xl cpupool-create name=\"t\" sched=\"credit\"
xl cpupool-cpu-add r 3
xl cpupool-cpu-add t 2

Another bug can be reproduced(not all the time) if the newly created 
pool is also credit2. There is an assertion failure (see bug.txt), which 
is essentially the same thing I got when trying to make the new rtds 
scheduler cpupool safe.


With different combinations of schedulers and number of pools created, 
dom0 is either unresponsive or kernel panics.


Since it happens when a pcpu(free) is added to another pool, I'm 
guessing the place to look at is schedule_cpu_switch(), where a series 
of alloc() and free() are called. And indeed at the last free_pdata(), 
the assertion fails.


Both of them happen on 4.7 unstable, git:2e46e3f

Any idea on this is appreciated.

--
Tianyang Chen
 Xen 4.7-unstable
(XEN) Xen version 4.7-unstable (root@) (gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 
4.6.3) debug=y Tue Feb  2 00:41:08 EST 2016
(XEN) Latest ChangeSet: Fri Jan 22 16:19:51 2016 +0100 git:2e46e3f
(XEN) Console output is synchronous.
(XEN) Bootloader: GRUB 1.99-21ubuntu3.16
(XEN) Command line: placeholder sched=credit2 loglvl=all guest_loglvl=all 
sync_console console_to_ring com1=115200,8n1,0x2f8,3 console=com1
(XEN) Video information:
(XEN)  VGA is text mode 80x25, font 8x16
(XEN) Disc information:
(XEN)  Found 1 MBR signatures
(XEN)  Found 1 EDD information structures
(XEN) Xen-e820 RAM map:
(XEN)   - 0009f400 (usable)
(XEN)  0009f400 - 000a (reserved)
(XEN)  000ca000 - 000cc000 (reserved)
(XEN)  000dc000 - 0010 (reserved)
(XEN)  0010 - bfee (usable)
(XEN)  bfee - bfeff000 (ACPI data)
(XEN)  bfeff000 - bff0 (ACPI NVS)
(XEN)  bff0 - c000 (usable)
(XEN)  e000 - f000 (reserved)
(XEN)  fec0 - fec1 (reserved)
(XEN)  fee0 - fee01000 (reserved)
(XEN)  fffe - 0001 (reserved)
(XEN)  0001 - 00024000 (usable)
(XEN) ACPI: RSDP 000F6B50, 0024 (r2 PTLTD )
(XEN) ACPI: XSDT BFEE2F86, 005C (r1 INTEL  440BX 604 VMW   1324272)
(XEN) ACPI: FACP BFEFEE98, 00F4 (r4 INTEL  440BX 604 PTL F4240)
(XEN) ACPI: DSDT BFEE35B8, 1B8E0 (r1 PTLTD  Custom604 MSFT  301)
(XEN) ACPI: FACS BFEFFFC0, 0040
(XEN) ACPI: BOOT BFEE3590, 0028 (r1 PTLTD  $SBFTBL$  604  LTP1)
(XEN) ACPI: APIC BFEE338E, 0202 (r1 PTLTDAPIC604  LTP0)
(XEN) ACPI: MCFG BFEE3352, 003C (r1 PTLTD  $PCITBL$  604  LTP1)
(XEN) ACPI: SRAT BFEE3082, 02D0 (r2 VMWARE MEMPLUG   604 VMW 1)
(XEN) ACPI: HPET BFEE304A, 0038 (r1 VMWARE VMW HPET  604 VMW 1)
(XEN) ACPI: WAET BFEE3022, 0028 (r1 VMWARE VMW WAET  604 VMW 1)
(XEN) System RAM: 8191MB (8388092kB)
(XEN) SRAT: PXM 0 -> APIC 00 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 01 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 02 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 03 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 04 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 05 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 06 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 07 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 08 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 09 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 0a -> Node 0
(XEN) SRAT: PXM 0 -> APIC 0b -> Node 0
(XEN) SRAT: PXM 0 -> APIC 0c -> Node 0
(XEN) SRAT: PXM 0 -> APIC 0d -> Node 0
(XEN) SRAT: PXM 0 -> APIC 0e -> Node 0
(XEN) SRAT: PXM 0 -> APIC 0f -> Node 0
(XEN) SRAT: PXM 0 -> APIC 10 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 11 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 12 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 13 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 14 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 15 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 16 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 17 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 18 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 19 -> Node 0
(XEN) SRAT: PXM 0 -> APIC 1a -> Node 0
(XEN) SRAT: PXM 0 -> APIC 1b -> Node 0
(XEN) SRAT: PXM 0 -> APIC 1c -> Node 0
(XEN) SRAT: PXM 0 -> APIC 1d -> Node 0
(XEN) SRAT: PXM 0 -> APIC 1e -> Node 0
(XEN) SRAT: PXM 0 -> APIC 1f -> Node 0
(XEN) SRAT: Node 0 PXM 0 0-a
(XEN) SRAT: Node 0 PXM 0 10-c000
(XEN) SRAT: Node 0 PXM 0 1-24000
(XEN) SRAT: Node 0 PXM 0 24000-204000 (hotplug)
(XEN) NUMA: Allocated memnodemap from 23b531000 - 23b552000
(XEN) NUMA: Using 8 for the hash shift.
(XEN) Domain heap initialised
(XEN) found SMP MP-table at 000f6bc0
(XE

[Xen-devel] [PATCH v4] xen: sched: convert RTDS from time to event driven model

2016-01-31 Thread Tianyang Chen
v4 is meant for discussion on the addition of replq.

Changes since v3:
removed running queue.
added repl queue to keep track of repl events.
timer is now per scheduler.
timer is init on a valid cpu in a cpupool.

Bugs to be fixed: Cpupool and locks. When a pcpu is removed from a
pool and added to another, the lock equality assert in free_pdata()
fails when Pool-0 is using rtds.

This patch is based on master branch after commit 2e46e3
x86/mce: fix misleading indentation in init_nonfatal_mce_checker()

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>
---
 xen/common/sched_rt.c |  262 -
 1 file changed, 192 insertions(+), 70 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 2e5430f..c36e5de 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -87,7 +88,7 @@
 #define RTDS_DEFAULT_BUDGET (MICROSECS(4000))
 
 #define UPDATE_LIMIT_SHIFT  10
-#define MAX_SCHEDULE(MILLISECS(1))
+
 /*
  * Flags
  */
@@ -142,6 +143,9 @@ static cpumask_var_t *_cpumask_scratch;
  */
 static unsigned int nr_rt_ops;
 
+/* handler for the replenishment timer */
+static void repl_handler(void *data);
+
 /*
  * Systme-wide private data, include global RunQueue/DepletedQ
  * Global lock is referenced by schedule_data.schedule_lock from all
@@ -152,7 +156,9 @@ struct rt_private {
 struct list_head sdom;  /* list of availalbe domains, used for dump */
 struct list_head runq;  /* ordered list of runnable vcpus */
 struct list_head depletedq; /* unordered list of depleted vcpus */
+struct list_head replq; /* ordered list of vcpus that need repl */
 cpumask_t tickled;  /* cpus been tickled */
+struct timer *repl_timer;   /* replenishment timer */
 };
 
 /*
@@ -160,6 +166,7 @@ struct rt_private {
  */
 struct rt_vcpu {
 struct list_head q_elem;/* on the runq/depletedq list */
+struct list_head p_elem;/* on the repl event list */
 
 /* Up-pointers */
 struct rt_dom *sdom;
@@ -213,8 +220,14 @@ static inline struct list_head *rt_depletedq(const struct 
scheduler *ops)
 return _priv(ops)->depletedq;
 }
 
+static inline struct list_head *rt_replq(const struct scheduler *ops)
+{
+return _priv(ops)->replq;
+}
+
 /*
- * Queue helper functions for runq and depletedq
+ * Queue helper functions for runq, depletedq
+ * and repl event q
  */
 static int
 __vcpu_on_q(const struct rt_vcpu *svc)
@@ -228,6 +241,18 @@ __q_elem(struct list_head *elem)
 return list_entry(elem, struct rt_vcpu, q_elem);
 }
 
+static struct rt_vcpu *
+__p_elem(struct list_head *elem)
+{
+return list_entry(elem, struct rt_vcpu, p_elem);
+}
+
+static int
+__vcpu_on_p(const struct rt_vcpu *svc)
+{
+   return !list_empty(>p_elem);
+}
+
 /*
  * Debug related code, dump vcpu/cpu information
  */
@@ -387,6 +412,13 @@ __q_remove(struct rt_vcpu *svc)
 list_del_init(>q_elem);
 }
 
+static inline void
+__p_remove(struct rt_vcpu *svc)
+{
+if ( __vcpu_on_p(svc) )
+list_del_init(>p_elem);
+}
+
 /*
  * Insert svc with budget in RunQ according to EDF:
  * vcpus with smaller deadlines go first.
@@ -421,6 +453,32 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu 
*svc)
 }
 
 /*
+ * Insert svc into the repl even list:
+ * vcpus that needs to be repl earlier go first.
+ */
+static void
+__replq_insert(const struct scheduler *ops, struct rt_vcpu *svc)
+{
+struct rt_private *prv = rt_priv(ops);
+struct list_head *replq = rt_replq(ops);
+struct list_head *iter;
+
+ASSERT( spin_is_locked(>lock) );
+
+ASSERT( !__vcpu_on_p(svc) );
+
+list_for_each(iter, replq)
+{
+struct rt_vcpu * iter_svc = __p_elem(iter);
+if ( svc->cur_deadline <= iter_svc->cur_deadline )
+break;
+}
+
+list_add_tail(>p_elem, iter);
+}
+
+
+/*
  * Init/Free related code
  */
 static int
@@ -449,6 +507,7 @@ rt_init(struct scheduler *ops)
 INIT_LIST_HEAD(>sdom);
 INIT_LIST_HEAD(>runq);
 INIT_LIST_HEAD(>depletedq);
+INIT_LIST_HEAD(>replq);
 
 cpumask_clear(>tickled);
 
@@ -473,6 +532,9 @@ rt_deinit(const struct scheduler *ops)
 xfree(_cpumask_scratch);
 _cpumask_scratch = NULL;
 }
+
+kill_timer(prv->repl_timer);
+
 xfree(prv);
 }
 
@@ -586,6 +648,7 @@ rt_alloc_vdata(const struct scheduler *ops, struct vcpu 
*vc, void *dd)
 return NULL;
 
 INIT_LIST_HEAD(>q_elem);
+INIT_LIST_HEAD(>p_elem);
 svc->flags = 0U;
 svc->sdom = dd;
 svc->vcpu = vc;
@@ -618,6 +681,10 @@ static void
 rt_vcpu_insert(const struct scheduler *ops, struct vcpu *vc)
 {
 struct rt_vcpu *svc = rt_vc

Re: [Xen-devel] [PATCH V2 1/1] Improved RTDS scheduler

2016-01-29 Thread Tianyang Chen



On 1/26/2016 12:28 PM, Meng Xu wrote:

Hi Dario and Tianyang,

On Tue, Jan 26, 2016 at 9:06 AM, Dario Faggioli
<dario.faggi...@citrix.com> wrote:

On Mon, 2016-01-25 at 18:00 -0500, Meng Xu wrote:

On Mon, Jan 25, 2016 at 5:04 PM, Tianyang Chen <ti...@seas.upenn.edu>
wrote:

I have removed some of the Ccs so they won't get bothered as we
discussed
previously.

On 1/25/2016 4:00 AM, Dario Faggioli wrote:


On Thu, 2015-12-31 at 05:20 -0500, Tianyang Chen wrote:



@@ -147,6 +148,16 @@ static unsigned int nr_rt_ops;
* Global lock is referenced by schedule_data.schedule_lock
from all
* physical cpus. It can be grabbed via
vcpu_schedule_lock_irq()
*/
+
+/* dedicated timer for replenishment */
+static struct timer repl_timer;
+


So, there's always only one timer... Even if we have multiple
cpupool
with RTDS as their scheduler, they share the replenishment timer?
I
think it makes more sense to make this per-scheduler.


Yeah, I totally ignored the case for cpu-pools. It looks like when
a
cpu-pool is created, it copies the scheduler struct and calls
rt_init()
where a private field is initialized. So I assume the timer should
be put
inside the scheduler private struct? Now that I think about it, the
timer is
hard-coded to run on cpu0. If there're lots of cpu-pools but the
replenishment can only be done on the same pcpu, would that be a
problem?
Should we keep track of all instances of schedulers (nr_rt_ops
counts how
many) and just put times on different pcpus?


+/* controls when to first start the timer*/
+static int timer_started;
+


I don't like this, and I don't think we need it. In fact, you
removed
it yourself from v3, AFAICT.


@@ -635,6 +652,13 @@ rt_vcpu_insert(const struct scheduler
*ops,
struct vcpu *vc)

   /* add rt_vcpu svc to scheduler-specific vcpu list of the
dom */
   list_add_tail(>sdom_elem, >sdom->vcpu);
+
+if(!timer_started)
+{
+/* the first vcpu starts the timer for the first
time*/
+timer_started = 1;
+set_timer(_timer,svc->cur_deadline);
+}
   }


This also seems to be gone in v3, which is good. In fact, it uses
timer_started, which I already said I didn't like.

About the actual startup of the timer (no matter whether for
first time
or not). Here, you were doing it in _vcpu_insert() and not in
_vcpu_wake(); in v3 you're doing it in _vcpu_wake() and not in
_runq_insert()... Which one is the proper way?



Correct me if I'm wrong, at the beginning of the boot process, all
vcpus are
put to sleep/not_runnable after insertions. Therefore, the timer
should
start when the first vcpu wakes up. I think the wake() in v3 should
be
correct.



@@ -792,44 +816,6 @@ __runq_pick(const struct scheduler *ops,
const
cpumask_t *mask)
   }

   /*
- * Update vcpu's budget and
- * sort runq by insert the modifed vcpu back to runq
- * lock is grabbed before calling this function
- */
-static void
-__repl_update(const struct scheduler *ops, s_time_t now)
-{


Please, allow me to say that seeing this function going away,
fills my
heart with pure joy!! :-D


@@ -889,7 +874,7 @@ rt_schedule(const struct scheduler *ops,
s_time_t
now, bool_t tasklet_work_sched
   }
   }

-ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched
quantum */
+ret.time = snext->budget; /* invoke the scheduler next
time */
   ret.task = snext->vcpu;


This is ok as it is done in v3 (i.e., snext->budget if !idle, -1
if
idle).


@@ -1074,14 +1055,7 @@ rt_vcpu_wake(const struct scheduler
*ops,
struct vcpu *vc)
   /* insert svc to runq/depletedq because svc is not in
queue now
*/
   __runq_insert(ops, svc);

-__repl_update(ops, now);
-
-ASSERT(!list_empty(>sdom));
-sdom = list_entry(prv->sdom.next, struct rt_dom,
sdom_elem);
-online = cpupool_scheduler_cpumask(sdom->dom->cpupool);
-snext = __runq_pick(ops, online); /* pick snext from ALL
valid
cpus */
-
-runq_tickle(ops, snext);
+runq_tickle(ops, svc);


And this is another thing I especially like of this patch: it
makes the
wakeup path a lot simpler and a lot more similar to how it looks
like
in the other schedulers.

Good job with this. :-)


@@ -1108,15 +1078,8 @@ rt_context_saved(const struct scheduler
*ops,
struct vcpu *vc)
   if ( test_and_clear_bit(__RTDS_delayed_runq_add, 

flags) &&

likely(vcpu_runnable(vc)) )
   {
+/* only insert the pre-empted vcpu back*/
   __runq_insert(ops, svc);
-__repl_update(ops, NOW());
-
-ASSERT(!list_empty(>sdom));
-sdom = list_entry(prv->sdom.next, struct rt_dom,
sdom_elem);
-online = cpupool_scheduler_cpumask(sdom->dom-

cpupool);

-snext = __runq_pick(ops, online); /* pick snext from
ALL
cpus */
-
-runq_tickle(ops, snext);
   }


Mmm... I'll think about this more and let you know... But out of
the
top of my head, I think the tickling has to stay? You preempted a
vcpu
from th

Re: [Xen-devel] [PATCH V2 1/1] Improved RTDS scheduler

2016-01-25 Thread Tianyang Chen
I have removed some of the Ccs so they won't get bothered as we 
discussed previously.


On 1/25/2016 4:00 AM, Dario Faggioli wrote:

On Thu, 2015-12-31 at 05:20 -0500, Tianyang Chen wrote:


@@ -147,6 +148,16 @@ static unsigned int nr_rt_ops;
   * Global lock is referenced by schedule_data.schedule_lock from all
   * physical cpus. It can be grabbed via vcpu_schedule_lock_irq()
   */
+
+/* dedicated timer for replenishment */
+static struct timer repl_timer;
+

So, there's always only one timer... Even if we have multiple cpupool
with RTDS as their scheduler, they share the replenishment timer? I
think it makes more sense to make this per-scheduler.

Yeah, I totally ignored the case for cpu-pools. It looks like when a 
cpu-pool is created, it copies the scheduler struct and calls rt_init() 
where a private field is initialized. So I assume the timer should be 
put inside the scheduler private struct? Now that I think about it, the 
timer is hard-coded to run on cpu0. If there're lots of cpu-pools but 
the replenishment can only be done on the same pcpu, would that be a 
problem? Should we keep track of all instances of schedulers (nr_rt_ops 
counts how many) and just put times on different pcpus?



+/* controls when to first start the timer*/
+static int timer_started;
+

I don't like this, and I don't think we need it. In fact, you removed
it yourself from v3, AFAICT.


@@ -635,6 +652,13 @@ rt_vcpu_insert(const struct scheduler *ops,
struct vcpu *vc)

  /* add rt_vcpu svc to scheduler-specific vcpu list of the dom */
  list_add_tail(>sdom_elem, >sdom->vcpu);
+
+if(!timer_started)
+{
+/* the first vcpu starts the timer for the first time*/
+timer_started = 1;
+set_timer(_timer,svc->cur_deadline);
+}
  }


This also seems to be gone in v3, which is good. In fact, it uses
timer_started, which I already said I didn't like.

About the actual startup of the timer (no matter whether for first time
or not). Here, you were doing it in _vcpu_insert() and not in
_vcpu_wake(); in v3 you're doing it in _vcpu_wake() and not in
_runq_insert()... Which one is the proper way?



Correct me if I'm wrong, at the beginning of the boot process, all vcpus 
are put to sleep/not_runnable after insertions. Therefore, the timer 
should start when the first vcpu wakes up. I think the wake() in v3 
should be correct.



@@ -792,44 +816,6 @@ __runq_pick(const struct scheduler *ops, const
cpumask_t *mask)
  }

  /*
- * Update vcpu's budget and
- * sort runq by insert the modifed vcpu back to runq
- * lock is grabbed before calling this function
- */
-static void
-__repl_update(const struct scheduler *ops, s_time_t now)
-{


Please, allow me to say that seeing this function going away, fills my
heart with pure joy!! :-D


@@ -889,7 +874,7 @@ rt_schedule(const struct scheduler *ops, s_time_t
now, bool_t tasklet_work_sched
  }
  }

-ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */
+ret.time = snext->budget; /* invoke the scheduler next time */
  ret.task = snext->vcpu;


This is ok as it is done in v3 (i.e., snext->budget if !idle, -1 if
idle).


@@ -1074,14 +1055,7 @@ rt_vcpu_wake(const struct scheduler *ops,
struct vcpu *vc)
  /* insert svc to runq/depletedq because svc is not in queue now
*/
  __runq_insert(ops, svc);

-__repl_update(ops, now);
-
-ASSERT(!list_empty(>sdom));
-sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
-online = cpupool_scheduler_cpumask(sdom->dom->cpupool);
-snext = __runq_pick(ops, online); /* pick snext from ALL valid
cpus */
-
-runq_tickle(ops, snext);
+runq_tickle(ops, svc);


And this is another thing I especially like of this patch: it makes the
wakeup path a lot simpler and a lot more similar to how it looks like
in the other schedulers.

Good job with this. :-)


@@ -1108,15 +1078,8 @@ rt_context_saved(const struct scheduler *ops,
struct vcpu *vc)
  if ( test_and_clear_bit(__RTDS_delayed_runq_add, >flags) &&
   likely(vcpu_runnable(vc)) )
  {
+/* only insert the pre-empted vcpu back*/
  __runq_insert(ops, svc);
-__repl_update(ops, NOW());
-
-ASSERT(!list_empty(>sdom));
-sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
-online = cpupool_scheduler_cpumask(sdom->dom->cpupool);
-snext = __runq_pick(ops, online); /* pick snext from ALL
cpus */
-
-runq_tickle(ops, snext);
  }

Mmm... I'll think about this more and let you know... But out of the
top of my head, I think the tickling has to stay? You preempted a vcpu
from the pcpu where it was running, maybe some other pcpu is either
idle or running a vcpu with a later deadline, and should come and pick
this one up?

gEDF allows this but there is overhead and may not be worth it. I have 
no stats to support this but there are some papers on restricting what 
tasks c

[Xen-devel] [PATCH v3 0/1] xen: sched: convert RTDS from time to event driven model

2016-01-21 Thread Tianyang Chen
Current RTDS scheduler is time driven and is called every 1ms. 
During each scheduler call, the repl_update() scans both runq 
and depeletedq, which might not be necessary every 1ms.

Since each vcpu is implemented as a deferable server, budget is 
preserved during its period and refilled in the next. It is not 
necessary to check every 1ms as the current design does. The 
replenishment is needed at the nearest next period(nearest 
current_deadline) of all runnable vcpus.

This improved design tries to reduce scheduler invocation by 
using an event driven approach;rt_schedule() will return a value 
when the scheduler needs to be called next time. In addition, 
the sched_rt will have one dedicated timer to handle replenishment 
when necessary. In other words, the budget replenishment and 
scheduler decision(rt_schedule) are separated. 

The transient behavior should be noted. It happens between a vcpu 
tickles and a pcpu actually picks it. As previous discussions, this 
is unavoidable.

Previous discussions:
http://lists.xenproject.org/archives/html/xen-devel/2015-06/msg02629.html

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>

Tianyang Chen (1):
  xen: sched: convert RTDS from time to event driven model

 xen/common/sched_rt.c |  291 -
 1 file changed, 213 insertions(+), 78 deletions(-)

-- 
1.7.9.5


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v3 1/1] xen: sched: convert RTDS from time to event driven model

2016-01-21 Thread Tianyang Chen
Budget replenishment and enforcement are separated by adding
a replenishment timer, which fires at the next most imminent
release time of all runnable vcpus.

A new runningq has been added to keep track of all vcpus that
are on pcpus.

The following functions have major changes to manage the runningq
and replenishment

repl_handler(): It is a timer handler which is re-programmed
to fire at the nearest vcpu deadline to replenish vcpus on runq,
depeletedq and runningq. When the replenishment is done, each
replenished vcpu should tickle a pcpu to see if it needs to
preempt any running vcpus.

rt_schedule(): picks the highest runnable vcpu based on cpu
affinity and ret.time will be passed to schedule(). If an idle
vcpu is picked, -1 is returned to avoid busy-waiting. repl_update()
has been removed.

rt_vcpu_wake(): when a vcpu is awake, it tickles instead of
picking one from runq. When a vcpu wakes up, it might reprogram
the timer if it has a more recent release time.

rt_context_saved(): when context switching is finished, the
preempted vcpu will be put back into the runq. Runningq is
updated and picking from runq and tickling are removed.

Simplified funtional graph:

schedule.c SCHEDULE_SOFTIRQ:
rt_schedule():
[spin_lock]
burn_budget(scurr)
snext = runq_pick()
[spin_unlock]

sched_rt.c TIMER_SOFTIRQ
replenishment_timer_handler()
[spin_lock]
<for_each_vcpu_on_q(i)> {
replenish(i)
runq_tickle(i)
}>
program_timer()
[spin_lock]

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>
---
 xen/common/sched_rt.c |  291 -
 1 file changed, 213 insertions(+), 78 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 4372486..1470e94 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -87,7 +88,7 @@
 #define RTDS_DEFAULT_BUDGET (MICROSECS(4000))
 
 #define UPDATE_LIMIT_SHIFT  10
-#define MAX_SCHEDULE(MILLISECS(1))
+
 /*
  * Flags
  */
@@ -147,11 +148,19 @@ static unsigned int nr_rt_ops;
  * Global lock is referenced by schedule_data.schedule_lock from all
  * physical cpus. It can be grabbed via vcpu_schedule_lock_irq()
  */
+
+/* dedicated timer for replenishment */
+static struct timer repl_timer;
+
+/* handler for the replenishment timer */
+static void repl_handler(void *data); 
+
 struct rt_private {
 spinlock_t lock;/* the global coarse grand lock */
 struct list_head sdom;  /* list of availalbe domains, used for dump */
 struct list_head runq;  /* ordered list of runnable vcpus */
 struct list_head depletedq; /* unordered list of depleted vcpus */
+struct list_head runningq;  /* current running vcpus */
 cpumask_t tickled;  /* cpus been tickled */
 };
 
@@ -160,6 +169,7 @@ struct rt_private {
  */
 struct rt_vcpu {
 struct list_head q_elem;/* on the runq/depletedq list */
+struct list_head runningq_elem; /* on the runningq list */
 struct list_head sdom_elem; /* on the domain VCPU list */
 
 /* Up-pointers */
@@ -215,6 +225,11 @@ static inline struct list_head *rt_depletedq(const struct 
scheduler *ops)
 return _priv(ops)->depletedq;
 }
 
+static inline struct list_head *rt_runningq(const struct scheduler *ops)
+{
+return _priv(ops)->runningq;
+}
+
 /*
  * Queue helper functions for runq and depletedq
  */
@@ -230,6 +245,18 @@ __q_elem(struct list_head *elem)
 return list_entry(elem, struct rt_vcpu, q_elem);
 }
 
+static int
+__vcpu_on_runningq(const struct rt_vcpu *svc)
+{
+return !list_empty(>runningq_elem);
+}
+
+static struct rt_vcpu *
+__runningq_elem(struct list_head *elem)
+{
+return list_entry(elem, struct rt_vcpu, runningq_elem);
+}
+
 /*
  * Debug related code, dump vcpu/cpu information
  */
@@ -290,7 +317,7 @@ rt_dump_pcpu(const struct scheduler *ops, int cpu)
 static void
 rt_dump(const struct scheduler *ops)
 {
-struct list_head *iter_sdom, *iter_svc, *runq, *depletedq, *iter;
+struct list_head *iter_sdom, *iter_svc, *runq, *depletedq, *runningq, 
*iter;
 struct rt_private *prv = rt_priv(ops);
 struct rt_vcpu *svc;
 struct rt_dom *sdom;
@@ -303,6 +330,7 @@ rt_dump(const struct scheduler *ops)
 
 runq = rt_runq(ops);
 depletedq = rt_depletedq(ops);
+runningq = rt_runningq(ops);
 
 printk("Global RunQueue info:\n");
 list_for_each( iter, runq )
@@ -318,6 +346,13 @@ rt_dump(const struct scheduler *ops)
 rt_dump_vcpu(ops, svc);
 }
 
+printk("Global RunningQueue info:\n");
+list_for_each( iter, runningq )
+{
+svc = __runningq_elem(iter);
+rt_dump_vcpu(ops, svc);
+}
+
 printk

Re: [Xen-devel] [PATCH v3 1/1] xen: sched: convert RTDS from time to event driven model

2016-01-21 Thread Tianyang Chen



On 1/21/2016 11:06 PM, Tianyang Chen wrote:

Budget replenishment and enforcement are separated by adding
a replenishment timer, which fires at the next most imminent
release time of all runnable vcpus.

A new runningq has been added to keep track of all vcpus that
are on pcpus.

The following functions have major changes to manage the runningq
and replenishment
Well, I'm just gonna comment on couple things here. The major difference 
between
this patch and the previous one is the addition of a runningq, which 
leads to

extra code in context_save().

Since the scheduler doesn't run until the first vcpu wakes up, wake() 
does some of the
timer stuff, in additional to the logic in the handler. It works nicely 
especially when

a vcpu wakes up and has the earliest next release time.


@@ -160,6 +169,7 @@ struct rt_private {
   */
  struct rt_vcpu {
  struct list_head q_elem;/* on the runq/depletedq list */
+struct list_head runningq_elem; /* on the runningq list */
  struct list_head sdom_elem; /* on the domain VCPU list */
  

Is it better to re-use q-elem so extra helper functions can be
avoided? Maybe another flag thing that shows which q a vcpu is on?

@@ -848,8 +880,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
bool_t tasklet_work_sched
  /* burn_budget would return for IDLE VCPU */
  burn_budget(ops, scurr, now);
  
-__repl_update(ops, now);

-
  if ( tasklet_work_scheduled )
  {
  snext = rt_vcpu(idle_vcpu[cpu]);
@@ -875,6 +905,9 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
bool_t tasklet_work_sched
  set_bit(__RTDS_delayed_runq_add, >flags);
  
  snext->last_start = now;

+
+ret.time =  -1; /* if an idle vcpu is picked */
+
I kinda just stick this in based on previous discussion on RTDS 
busy-waiting.




+/* The replenishment timer handler scans
+ * all three queues and find the most
+ * imminent release time. If there is no
+ * vcpus, timer is set in wake()
+ */
+static void repl_handler(void *data){
+unsigned long flags;
+s_time_t now = NOW();
+s_time_t min_repl = LONG_MAX; /* max time used in comparisoni */
+struct scheduler *ops = data;
+struct rt_private *prv = rt_priv(ops);
+struct list_head *runq = rt_runq(ops);
+struct list_head *depletedq = rt_depletedq(ops);
+struct list_head *runningq = rt_runningq(ops);
+struct list_head *iter;
+struct list_head *tmp;
+struct rt_vcpu *svc = NULL;
+
+stop_timer(_timer);
+
+spin_lock_irqsave(>lock, flags);
+
Here is a lock for protecting queues. Correct me if I'm wrong, since in 
alloc_pdata(), the private lock points to

the global system lock, I just assume there is no difference here.

--
Tianyang Chen

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] Questions about the use of idle_vcpu[]

2016-01-19 Thread Tianyang Chen



On 1/18/2016 11:07 AM, Meng Xu wrote:

On Mon, Jan 18, 2016 at 6:00 AM, Dario Faggioli
<dario.faggi...@citrix.com> wrote:


On Mon, 2016-01-18 at 10:47 +, George Dunlap wrote:

On Fri, Jan 15, 2016 at 1:04 AM, Tianyang Chen <ti...@seas.upenn.edu>


If an idle vcpu is picked, the ret.time is set accordingly in both
credit
and credit2 by checking whether snext is idle. if so, credit
returns -1 and
credit2 returns 2ms. However, there is no corresponding code in the
RTDS
scheduler to handle this. When an idle_vcpu is picked, the value of
ret.time
would be 0 and the scheduler would be invoked again. What is the
logic
behind this?


No real logic, as far as I can tell. :-)  The ret.time return value
tells the generic scheduling code when to set the next scheduler
timer.  According to the comment in xen/common/schedule.c:schedule(),
returning a negative value means "don't bother setting a timer"
(e.g.,
no time limit).  So credit1 does the right thing.


It does.



Then the RTDS is doing *incorrectly* right now. :-(



George: Thanks. After looking at idle_loop() it makes sense now. Even 
though an idle vcpu won't tell scheduler timer when to fire next time, 
do_tasklet() checks if all tasklets on the list are finished and then 
raise SCHEDULE_SOFTIRQ.






It looks like credit2's behavior will probably prevent the processor
from going into deeper power-saving states, and rtds' behavior might
cause it to essentially busy-wait.


RTDS behavior is broken in many respect, including this,

and in fact,
Meng and Tianyang are sending patches already to fix it (I'll let you
guys have my comments shortly :-P).



Right. Tianyang and I are working on changing it from quantum driven
model to event-driven (or called timer-driven) model. Tianyang sent
out the first-version patch, but that version has some problems. He is
working on the second version now.

Hi Dario,
Tianyang is working on the second version right now.
If you could have a quick look at our discussion in that thread and
points out the "serious" issues in the decision, that will be great!
We won't repeat the error again and again in the following versions.
As to the minor issues, we could refine it in the second version.
(I'm just thinking about how to save your time to have this done. For
the obvious things that I can handle, I will do it and avoid "wasting"
you time. For the design choices that we are unclear, we definitely
need your insights/commands. ;-) )

Dario: I had some discussion with Meng recently and the second version 
will soon come out. You can directly comment on it if that saves you 
some time.


Thanks,
Tianyang

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH V2 1/1] Improved RTDS scheduler

2016-01-19 Thread Tianyang Chen



On 1/15/2016 10:33 AM, Meng Xu wrote:

On Wed, Jan 6, 2016 at 2:57 AM, Tianyang Chen <ti...@seas.upenn.edu> wrote:




On 12/31/2015 8:44 AM, Meng Xu wrote:


On Thu, Dec 31, 2015 at 6:20 PM, Tianyang Chen <ti...@seas.upenn.edu> wrote:



Budget replenishment is now handled by a dedicated timer which is
triggered at the most imminent release time of all runnable vcpus.

Changes since V1:
  None

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>
---
   xen/common/sched_rt.c |  159 
+
   1 file changed, 95 insertions(+), 64 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 4372486..d522272 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -16,6 +16,7 @@
   #include 
   #include 
   #include 
+#include 
   #include 
   #include 
   #include 
@@ -147,6 +148,16 @@ static unsigned int nr_rt_ops;
* Global lock is referenced by schedule_data.schedule_lock from all
* physical cpus. It can be grabbed via vcpu_schedule_lock_irq()
*/
+
+/* dedicated timer for replenishment */
+static struct timer repl_timer;
+
+/* controls when to first start the timer*/




missing a space between * and / at the end of this line.




+static int timer_started;
+
+/* handler for the replenishment timer */
+static void repl_handler(void *data);
+
   struct rt_private {
   spinlock_t lock;/* the global coarse grand lock */
   struct list_head sdom;  /* list of availalbe domains, used for dump 
*/
@@ -426,6 +437,7 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu 
*svc)
   static int
   rt_init(struct scheduler *ops)
   {
+const int cpu = smp_processor_id();




Did the "cpu" be used? If not, please remove it here.




   struct rt_private *prv = xzalloc(struct rt_private);

   printk("Initializing RTDS scheduler\n"
@@ -454,6 +466,8 @@ rt_init(struct scheduler *ops)

   ops->sched_data = prv;

+init_timer(_timer, repl_handler, ops, 0);
+
   return 0;

no_mem:
@@ -473,6 +487,9 @@ rt_deinit(const struct scheduler *ops)
   xfree(_cpumask_scratch);
   _cpumask_scratch = NULL;
   }
+
+kill_timer(_timer);
+
   xfree(prv);
   }

@@ -635,6 +652,13 @@ rt_vcpu_insert(const struct scheduler *ops, struct vcpu 
*vc)

   /* add rt_vcpu svc to scheduler-specific vcpu list of the dom */
   list_add_tail(>sdom_elem, >sdom->vcpu);
+
+if(!timer_started)




space should be added..



Thanks, Meng. I will fix that.







+{
+/* the first vcpu starts the timer for the first time*/
+timer_started = 1;
+set_timer(_timer,svc->cur_deadline);
+}
   }

   /*
@@ -792,44 +816,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t 
*mask)
   }

   /*
- * Update vcpu's budget and
- * sort runq by insert the modifed vcpu back to runq
- * lock is grabbed before calling this function
- */
-static void
-__repl_update(const struct scheduler *ops, s_time_t now)
-{
-struct list_head *runq = rt_runq(ops);
-struct list_head *depletedq = rt_depletedq(ops);
-struct list_head *iter;
-struct list_head *tmp;
-struct rt_vcpu *svc = NULL;
-
-list_for_each_safe(iter, tmp, runq)
-{
-svc = __q_elem(iter);
-if ( now < svc->cur_deadline )
-break;
-
-rt_update_deadline(now, svc);
-/* reinsert the vcpu if its deadline is updated */
-__q_remove(svc);
-__runq_insert(ops, svc);
-}
-
-list_for_each_safe(iter, tmp, depletedq)
-{
-svc = __q_elem(iter);
-if ( now >= svc->cur_deadline )
-{
-rt_update_deadline(now, svc);
-__q_remove(svc); /* remove from depleted queue */
-__runq_insert(ops, svc); /* add to runq */
-}
-}
-}
-
-/*
* schedule function for rt scheduler.
* The lock is already grabbed in schedule.c, no need to lock here
*/
@@ -848,7 +834,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
bool_t tasklet_work_sched
   /* burn_budget would return for IDLE VCPU */
   burn_budget(ops, scurr, now);

-__repl_update(ops, now);

   if ( tasklet_work_scheduled )
   {
@@ -889,7 +874,7 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
bool_t tasklet_work_sched
   }
   }

-ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */
+ret.time = snext->budget; /* invoke the scheduler next time */
   ret.task = snext->vcpu;

   /* TRACE */
@@ -1033,10 +1018,6 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu 
*vc)
   {
   struct rt_vcpu * const svc = rt_vcpu(vc);
   s_time_t now = NOW();
-struct rt_private *prv = rt_priv(ops);
-struct rt_vcpu *snext = NULL; /* highest priority on RunQ */
- 

[Xen-devel] Questions about the use of idle_vcpu[]

2016-01-14 Thread Tianyang Chen

Hello,

I'm confused by the use of idle_vcpu[] in Xen schedulers. In credit, 
credit2 and rtds schedulers, they are used in conjunction with variable 
"tasklet_work_scheduled" like this:


if ( tasklet_work_scheduled )
{
snext = #_VCPU(idle_vcpu[cpu]);

}

The idle_vcpu array is initialized in cpu_schedule_up() and indeed they 
are set in corresponding alloc_vdata() to have IDLE_CREDIT and 0 weight 
or DEFAULT_PERIOD and 0 budget.


The value of tasklet_work_scheduled comes from scheduler.c interface. 
According to the comments in sched_credit2.c, if there's tasklet work to 
do, we want to chose the idle vcpu for this processor, and mark the 
current for delayed runqueue add. Can someone elaborate the reason why 
an idle vcpu should be picked? What does an idle_vcpu do to a tasklet? I 
feel like I'm missing something here.


If an idle vcpu is picked, the ret.time is set accordingly in both 
credit and credit2 by checking whether snext is idle. if so, credit 
returns -1 and credit2 returns 2ms. However, there is no corresponding 
code in the RTDS scheduler to handle this. When an idle_vcpu is picked, 
the value of ret.time would be 0 and the scheduler would be invoked 
again. What is the logic behind this?


I'd appreciate it if anyone could point me to the right direction.

Thanks,
Tianyang Chen

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 1/1] Improved RTDS scheduler

2016-01-05 Thread Tianyang Chen



On 1/4/2016 9:36 AM, Dario Faggioli wrote:

On Mon, 2016-01-04 at 21:48 +0800, Meng Xu wrote:

On Mon, Jan 4, 2016 at 7:42 PM, Ian Campbell <ian.campb...@citrix.com

wrote:
On Thu, 2015-12-31 at 04:45 -0500, Tianyang Chen wrote:

Budget replenishment is now handled by a dedicated timer which is
triggered at the most imminent release time of all runnable
vcpus.

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>
---
  -cover-letter.patch|   16 +++
  0001-Improved-RTDS-scheduler.patch |  280

  bak|   62 


I assume you didn't actually intend to commit those ;-)


No. This version was sent by mistake. Please ignore this version. :-(

Tianyang sent another version (v2) with the subject "[PATCH V2 1/1]
Improved RTDS scheduler".


Indeed. BTW, I'm back today from Winter Holidays. I'll give a look at
this series as soon as practical.

I'll comment directly on V2, with the only exception of this very
email.

So, first of all, Cc list (which is the same here and in V2): how have
the names of the people that are in there been picked?



Dario: I picked the names based on previous discussion threads between 
you, Meng and Dagaen. Now I have changed who will be cc'ed so they won't 
be bothered.



The patch is only about xen/common/sched_rt.c. As per MAINTAINERS, the
maintainer of the code hosted in that file is just me. Surely it's a
good thing to also include George, as he's the other maintainer of
scheduling in general.

All the other people, though, should not be bothered by being copied
directly... Especially IanC and IanJ, I'd say.


(In general single patches do not require a separate cover letter
unless
the required context contains a large amount of information which
is not
appropriate for the commit message of the actual change, I'll leave
it to
you and the scheduler maintainers to decide how much of your cover
letter
it would be appropriate to move to the commit message).


The cover letter is supposed to explain the design idea of the
improved RTDS scheduler so that reviewers could (potentially) save
time in reviewing the patch, we think. :-)
Probably, the commit message should be refined and self-contained?


That's true. In particular, the "context", defined as summary of what
happened and have been discussed on the mailing list before, during
previous submissions, etc., definitely belongs to a cover letter.



There are pages of discussions on the current design and we figured we 
should make a cover page to summarize it and put more details in.



High level design ideas and concepts, also do, e.g., in order to avoid
ending up with commit changelogs that are all several pages long! :-)

All that being said, we certainly want patches' subject lines and
changelogs to be descriptive of at least what is being done, and why.
If we commit this as it is right now, here it is what we'll find in
`git log':

   Improved RTDS scheduler

   Budget replenishment is now handled by a dedicated timer which is
   triggered at the most imminent release time of all runnable vcpus.

Which won't, I'm quite sure about it, be that much useful when looking
at it, say, in 3 years! :-D

I know, finding the proper balance of what's better put where is not
always super easy, especially at the beginning... but that's part of
the process of cooking a good patch (series) :-)




A few suggestions:

  - we want tags in the subject. E.g., in this case, something like
"xen: sched: Improved RTDS scheduler"

  - avoid (in both subject and changelog) generic terms like "improve"
"fix", etc., unless you can explain more specifically to what they
refer... I mean, pretty much all commits that touch sched_rt.c can
have "Improve RTDS" as a subject, can't they? (Unless we decide to
commit something making things deliberately worse!! :-))




Good call.


In this case, e.g. (although, perhaps a bit long):
  "xen: sched: convert RTDS from time to event driven model"

  - as Ian said, there are information in the cover letter that can
well be moved in the changelog, making it a lot more useful, and
not less refined or self contained.



Here is what I propose, please let me know your thoughts:

Cover letter:
1. How this patch could improve RTDS
2. Summary of previous discussion

Current RTDS scheduler is time driven and is called every 1ms. During 
each scheduler call, the repl_update() scans both runq and depeletedq, 
which might not be necessary every 1ms.


Since each vcpu is implemented as a deferable server, budget is 
preserved during its period and refilled in the next. It is not 
necessary to check every 1ms as the current design does. The 
replenishment is needed at the nearest next period(nearest 
current_deadline) 

Re: [Xen-devel] [PATCH V2 1/1] Improved RTDS scheduler

2016-01-05 Thread Tianyang Chen



On 12/31/2015 8:44 AM, Meng Xu wrote:

On Thu, Dec 31, 2015 at 6:20 PM, Tianyang Chen <ti...@seas.upenn.edu> wrote:


Budget replenishment is now handled by a dedicated timer which is
triggered at the most imminent release time of all runnable vcpus.

Changes since V1:
 None

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>
---
  xen/common/sched_rt.c |  159 +
  1 file changed, 95 insertions(+), 64 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 4372486..d522272 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -16,6 +16,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -147,6 +148,16 @@ static unsigned int nr_rt_ops;
   * Global lock is referenced by schedule_data.schedule_lock from all
   * physical cpus. It can be grabbed via vcpu_schedule_lock_irq()
   */
+
+/* dedicated timer for replenishment */
+static struct timer repl_timer;
+
+/* controls when to first start the timer*/



missing a space between * and / at the end of this line.




+static int timer_started;
+
+/* handler for the replenishment timer */
+static void repl_handler(void *data);
+
  struct rt_private {
  spinlock_t lock;/* the global coarse grand lock */
  struct list_head sdom;  /* list of availalbe domains, used for dump */
@@ -426,6 +437,7 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu 
*svc)
  static int
  rt_init(struct scheduler *ops)
  {
+const int cpu = smp_processor_id();



Did the "cpu" be used? If not, please remove it here.




  struct rt_private *prv = xzalloc(struct rt_private);

  printk("Initializing RTDS scheduler\n"
@@ -454,6 +466,8 @@ rt_init(struct scheduler *ops)

  ops->sched_data = prv;

+init_timer(_timer, repl_handler, ops, 0);
+
  return 0;

   no_mem:
@@ -473,6 +487,9 @@ rt_deinit(const struct scheduler *ops)
  xfree(_cpumask_scratch);
  _cpumask_scratch = NULL;
  }
+
+kill_timer(_timer);
+
  xfree(prv);
  }

@@ -635,6 +652,13 @@ rt_vcpu_insert(const struct scheduler *ops, struct vcpu 
*vc)

  /* add rt_vcpu svc to scheduler-specific vcpu list of the dom */
  list_add_tail(>sdom_elem, >sdom->vcpu);
+
+if(!timer_started)



space should be added..


Thanks, Meng. I will fix that.






+{
+/* the first vcpu starts the timer for the first time*/
+timer_started = 1;
+set_timer(_timer,svc->cur_deadline);
+}
  }

  /*
@@ -792,44 +816,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t 
*mask)
  }

  /*
- * Update vcpu's budget and
- * sort runq by insert the modifed vcpu back to runq
- * lock is grabbed before calling this function
- */
-static void
-__repl_update(const struct scheduler *ops, s_time_t now)
-{
-struct list_head *runq = rt_runq(ops);
-struct list_head *depletedq = rt_depletedq(ops);
-struct list_head *iter;
-struct list_head *tmp;
-struct rt_vcpu *svc = NULL;
-
-list_for_each_safe(iter, tmp, runq)
-{
-svc = __q_elem(iter);
-if ( now < svc->cur_deadline )
-break;
-
-rt_update_deadline(now, svc);
-/* reinsert the vcpu if its deadline is updated */
-__q_remove(svc);
-__runq_insert(ops, svc);
-}
-
-list_for_each_safe(iter, tmp, depletedq)
-{
-svc = __q_elem(iter);
-if ( now >= svc->cur_deadline )
-{
-rt_update_deadline(now, svc);
-__q_remove(svc); /* remove from depleted queue */
-__runq_insert(ops, svc); /* add to runq */
-}
-}
-}
-
-/*
   * schedule function for rt scheduler.
   * The lock is already grabbed in schedule.c, no need to lock here
   */
@@ -848,7 +834,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
bool_t tasklet_work_sched
  /* burn_budget would return for IDLE VCPU */
  burn_budget(ops, scurr, now);

-__repl_update(ops, now);

  if ( tasklet_work_scheduled )
  {
@@ -889,7 +874,7 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
bool_t tasklet_work_sched
  }
  }

-ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */
+ret.time = snext->budget; /* invoke the scheduler next time */
  ret.task = snext->vcpu;

  /* TRACE */
@@ -1033,10 +1018,6 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu 
*vc)
  {
  struct rt_vcpu * const svc = rt_vcpu(vc);
  s_time_t now = NOW();
-struct rt_private *prv = rt_priv(ops);
-struct rt_vcpu *snext = NULL; /* highest priority on RunQ */
-struct rt_dom *sdom = NULL;
-cpumask_t *online;

  BUG_ON( is_idle_vcpu(vc) );

@@ -1074,14 +1055,7 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu 
*vc)
  /* ins

[Xen-devel] [PATCH 1/1] Improved RTDS scheduler

2015-12-31 Thread Tianyang Chen
Budget replenishment is now handled by a dedicated timer which is
triggered at the most imminent release time of all runnable vcpus.

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>
---
 -cover-letter.patch|   16 +++
 0001-Improved-RTDS-scheduler.patch |  280 
 bak|   62 
 xen/common/sched_rt.c  |  159 +++-
 4 files changed, 453 insertions(+), 64 deletions(-)
 create mode 100644 -cover-letter.patch
 create mode 100644 0001-Improved-RTDS-scheduler.patch
 create mode 100644 bak

diff --git a/-cover-letter.patch b/-cover-letter.patch
new file mode 100644
index 000..f5aca91
--- /dev/null
+++ b/-cover-letter.patch
@@ -0,0 +1,16 @@
+From 25ca27ea281885eb9873244a11f08e6987efb36e Mon Sep 17 00:00:00 2001
+From: Tianyang Chen <ti...@seas.upenn.edu>
+Date: Thu, 31 Dec 2015 04:05:21 -0500
+Subject: [PATCH] *** SUBJECT HERE ***
+
+*** BLURB HERE ***
+
+Tianyang Chen (1):
+  Improved RTDS scheduler
+
+ xen/common/sched_rt.c |  159 +
+ 1 file changed, 95 insertions(+), 64 deletions(-)
+
+-- 
+1.7.9.5
+
diff --git a/0001-Improved-RTDS-scheduler.patch 
b/0001-Improved-RTDS-scheduler.patch
new file mode 100644
index 000..02cb1f7
--- /dev/null
+++ b/0001-Improved-RTDS-scheduler.patch
@@ -0,0 +1,280 @@
+From 25ca27ea281885eb9873244a11f08e6987efb36e Mon Sep 17 00:00:00 2001
+From: Tianyang Chen <tianyangp...@gmail.com>
+Date: Thu, 31 Dec 2015 01:55:19 -0500
+Subject: [PATCH] Improved RTDS scheduler
+
+Budget replenishment is now handled by a dedicated timer which is
+triggered at the most imminent release time of all runnable vcpus.
+
+Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
+Signed-off-by: Meng Xu <men...@cis.upenn.edu>
+Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>
+---
+ xen/common/sched_rt.c |  159 +
+ 1 file changed, 95 insertions(+), 64 deletions(-)
+
+diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
+index 4372486..d522272 100644
+--- a/xen/common/sched_rt.c
 b/xen/common/sched_rt.c
+@@ -16,6 +16,7 @@
+ #include 
+ #include 
+ #include 
++#include 
+ #include 
+ #include 
+ #include 
+@@ -147,6 +148,16 @@ static unsigned int nr_rt_ops;
+  * Global lock is referenced by schedule_data.schedule_lock from all
+  * physical cpus. It can be grabbed via vcpu_schedule_lock_irq()
+  */
++
++/* dedicated timer for replenishment */
++static struct timer repl_timer;
++
++/* controls when to first start the timer*/
++static int timer_started;
++
++/* handler for the replenishment timer */
++static void repl_handler(void *data); 
++
+ struct rt_private {
+ spinlock_t lock;/* the global coarse grand lock */
+ struct list_head sdom;  /* list of availalbe domains, used for dump */
+@@ -426,6 +437,7 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu 
*svc)
+ static int
+ rt_init(struct scheduler *ops)
+ {
++const int cpu = smp_processor_id(); 
+ struct rt_private *prv = xzalloc(struct rt_private);
+ 
+ printk("Initializing RTDS scheduler\n"
+@@ -454,6 +466,8 @@ rt_init(struct scheduler *ops)
+ 
+ ops->sched_data = prv;
+ 
++init_timer(_timer, repl_handler, ops, 0);
++
+ return 0;
+ 
+  no_mem:
+@@ -473,6 +487,9 @@ rt_deinit(const struct scheduler *ops)
+ xfree(_cpumask_scratch);
+ _cpumask_scratch = NULL;
+ }
++
++kill_timer(_timer);
++
+ xfree(prv);
+ }
+ 
+@@ -635,6 +652,13 @@ rt_vcpu_insert(const struct scheduler *ops, struct vcpu 
*vc)
+ 
+ /* add rt_vcpu svc to scheduler-specific vcpu list of the dom */
+ list_add_tail(>sdom_elem, >sdom->vcpu);
++
++if(!timer_started)
++{   
++/* the first vcpu starts the timer for the first time*/
++timer_started = 1;
++set_timer(_timer,svc->cur_deadline);
++}
+ }
+ 
+ /*
+@@ -792,44 +816,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t 
*mask)
+ }
+ 
+ /*
+- * Update vcpu's budget and
+- * sort runq by insert the modifed vcpu back to runq
+- * lock is grabbed before calling this function
+- */
+-static void
+-__repl_update(const struct scheduler *ops, s_time_t now)
+-{
+-struct list_head *runq = rt_runq(ops);
+-struct list_head *depletedq = rt_depletedq(ops);
+-struct list_head *iter;
+-struct list_head *tmp;
+-struct rt_vcpu *svc = NULL;
+-
+-list_for_each_safe(iter, tmp, runq)
+-{
+-svc = __q_elem(iter);
+-if ( now < svc->cur_deadline )
+-break;
+-
+-rt_update_deadline(now, svc);
+-/* reinsert the vcpu if its deadline is updated */
+-__q_remove(svc);
+-__runq_insert(ops, svc);
+-}
+-
+-list_for_each_safe(i

[Xen-devel] [PATCH V2 1/1] Improved RTDS scheduler

2015-12-31 Thread Tianyang Chen
Budget replenishment is now handled by a dedicated timer which is
triggered at the most imminent release time of all runnable vcpus.

Changes since V1:
None

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>
---
 xen/common/sched_rt.c |  159 +
 1 file changed, 95 insertions(+), 64 deletions(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 4372486..d522272 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -147,6 +148,16 @@ static unsigned int nr_rt_ops;
  * Global lock is referenced by schedule_data.schedule_lock from all
  * physical cpus. It can be grabbed via vcpu_schedule_lock_irq()
  */
+
+/* dedicated timer for replenishment */
+static struct timer repl_timer;
+
+/* controls when to first start the timer*/
+static int timer_started;
+
+/* handler for the replenishment timer */
+static void repl_handler(void *data); 
+
 struct rt_private {
 spinlock_t lock;/* the global coarse grand lock */
 struct list_head sdom;  /* list of availalbe domains, used for dump */
@@ -426,6 +437,7 @@ __runq_insert(const struct scheduler *ops, struct rt_vcpu 
*svc)
 static int
 rt_init(struct scheduler *ops)
 {
+const int cpu = smp_processor_id(); 
 struct rt_private *prv = xzalloc(struct rt_private);
 
 printk("Initializing RTDS scheduler\n"
@@ -454,6 +466,8 @@ rt_init(struct scheduler *ops)
 
 ops->sched_data = prv;
 
+init_timer(_timer, repl_handler, ops, 0);
+
 return 0;
 
  no_mem:
@@ -473,6 +487,9 @@ rt_deinit(const struct scheduler *ops)
 xfree(_cpumask_scratch);
 _cpumask_scratch = NULL;
 }
+
+kill_timer(_timer);
+
 xfree(prv);
 }
 
@@ -635,6 +652,13 @@ rt_vcpu_insert(const struct scheduler *ops, struct vcpu 
*vc)
 
 /* add rt_vcpu svc to scheduler-specific vcpu list of the dom */
 list_add_tail(>sdom_elem, >sdom->vcpu);
+
+if(!timer_started)
+{   
+/* the first vcpu starts the timer for the first time*/
+timer_started = 1;
+set_timer(_timer,svc->cur_deadline);
+}
 }
 
 /*
@@ -792,44 +816,6 @@ __runq_pick(const struct scheduler *ops, const cpumask_t 
*mask)
 }
 
 /*
- * Update vcpu's budget and
- * sort runq by insert the modifed vcpu back to runq
- * lock is grabbed before calling this function
- */
-static void
-__repl_update(const struct scheduler *ops, s_time_t now)
-{
-struct list_head *runq = rt_runq(ops);
-struct list_head *depletedq = rt_depletedq(ops);
-struct list_head *iter;
-struct list_head *tmp;
-struct rt_vcpu *svc = NULL;
-
-list_for_each_safe(iter, tmp, runq)
-{
-svc = __q_elem(iter);
-if ( now < svc->cur_deadline )
-break;
-
-rt_update_deadline(now, svc);
-/* reinsert the vcpu if its deadline is updated */
-__q_remove(svc);
-__runq_insert(ops, svc);
-}
-
-list_for_each_safe(iter, tmp, depletedq)
-{
-svc = __q_elem(iter);
-if ( now >= svc->cur_deadline )
-{
-rt_update_deadline(now, svc);
-__q_remove(svc); /* remove from depleted queue */
-__runq_insert(ops, svc); /* add to runq */
-}
-}
-}
-
-/*
  * schedule function for rt scheduler.
  * The lock is already grabbed in schedule.c, no need to lock here
  */
@@ -848,7 +834,6 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
bool_t tasklet_work_sched
 /* burn_budget would return for IDLE VCPU */
 burn_budget(ops, scurr, now);
 
-__repl_update(ops, now);
 
 if ( tasklet_work_scheduled )
 {
@@ -889,7 +874,7 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
bool_t tasklet_work_sched
 }
 }
 
-ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */
+ret.time = snext->budget; /* invoke the scheduler next time */
 ret.task = snext->vcpu;
 
 /* TRACE */
@@ -1033,10 +1018,6 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu 
*vc)
 {
 struct rt_vcpu * const svc = rt_vcpu(vc);
 s_time_t now = NOW();
-struct rt_private *prv = rt_priv(ops);
-struct rt_vcpu *snext = NULL; /* highest priority on RunQ */
-struct rt_dom *sdom = NULL;
-cpumask_t *online;
 
 BUG_ON( is_idle_vcpu(vc) );
 
@@ -1074,14 +1055,7 @@ rt_vcpu_wake(const struct scheduler *ops, struct vcpu 
*vc)
 /* insert svc to runq/depletedq because svc is not in queue now */
 __runq_insert(ops, svc);
 
-__repl_update(ops, now);
-
-ASSERT(!list_empty(>sdom));
-sdom = list_entry(prv->sdom.next, struct rt_dom, sdom_elem);
-online = cpupool_scheduler_cpumask(sdom->dom->cpupool);
-snext = __runq_pick(ops, online); /* pick snext from

[Xen-devel] [PATCH 0/1] Improved RTDS scheduler

2015-12-31 Thread Tianyang Chen
Current RTDS scheduler is time driven and is called every 1ms. During each 
scheduler call, the repl_update() scans both runq and depeletedq, which might 
not be necessary every 1ms.

Since each vcpu is implemented as a deferable server, budget is preserved 
during its period and refilled in the next. It is not necessary to check every 
1ms as the current design does. The replenishment is needed at the nearest next 
period(nearest current_deadline) of all runnable vcpus.

This improved design tries to reduce scheduler invocation by using an event 
driven approach;rt_schedule() will return a value when the scheduler needs to 
be called next time. In addition, the sched_rt will have one dedicated timer to 
handle replenishment when necessary. In other words, the budget replenishment 
and scheduler decision(rt_schedule) are separated.

Based on previous decision between Dario, Dagaen and Meng, the improved design 
can be implemented/modified as follows:

rt_schedule(): picks the highest runnable vcpu based on cpu affinity and 
ret.time will be passed to schedule().

rt_vcpu_wake(): when a vcpu is awake, it tickles instead of picking one from 
runq.

rt_context_saved(): when context switching is finished, the preempted vcpu will 
be put back into the runq. Picking from runq and tickling are removed.

repl_handler(): a timer handler which is reprogrammed to fire at the nearest 
vcpu deadline to replenish vcpus on depeletedq while keeping the runq sorted. 
When the replenishment is done, each replenished vcpu in the runq should tickle 
a pcpu to see if it needs to preempt any running vcpus.


An extra field to record the last replenishing time will be added.

schedule.c SCHEDULE_SOFTIRQ:
rt_schedule():
[spin_lock]
burn_budget(scurr)
snext = runq_pick()
[spin_unlock]


sched_rt.c TIMER_SOFTIRQ
replenishment_timer_handler()
[spin_lock]
<for_each_depleted_vcpu(i, i.repl_time < NOW()) {
replenish(i)
runq_tickle(i)
}>
program_timer()
[spin_lock]

The transient behavior should be noted. It happens between a vcpu tickles and a 
pcpu actually picks it. As previous discussions, this is unavoidable.

Previous discussions:
http://lists.xenproject.org/archives/html/xen-devel/2015-06/msg02629.html

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>


*** BLURB HERE ***

Tianyang Chen (1):
  Improved RTDS scheduler

 -cover-letter.patch|   16 +++
 0001-Improved-RTDS-scheduler.patch |  280 
 bak|   62 
 xen/common/sched_rt.c  |  159 +++-
 4 files changed, 453 insertions(+), 64 deletions(-)
 create mode 100644 -cover-letter.patch
 create mode 100644 0001-Improved-RTDS-scheduler.patch
 create mode 100644 bak

-- 
1.7.9.5


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH V2 0/1] Improved RTDS scheduler

2015-12-31 Thread Tianyang Chen
Changes since V1:
Removed redundant cover letter
Removed redundant patch file that was added with the last commit
Please dis-regard V1 because it was in wrong format. Sorry about that.

Current RTDS scheduler is time driven and is called every 1ms. During each 
scheduler call, the repl_update() scans both runq and depeletedq, which might 
not be necessary every 1ms.

Since each vcpu is implemented as a deferable server, budget is preserved 
during its period and refilled in the next. It is not necessary to check every 
1ms as the current design does. The replenishment is needed at the nearest next 
period(nearest current_deadline) of all runnable vcpus.

This improved design tries to reduce scheduler invocation by using an event 
driven approach;rt_schedule() will return a value when the scheduler needs to 
be called next time. In addition, the sched_rt will have one dedicated timer to 
handle replenishment when necessary. In other words, the budget replenishment 
and scheduler decision(rt_schedule) are separated.

Based on previous decision between Dario, Dagaen and Meng, the improved design 
can be implemented/modified as follows:

rt_schedule(): picks the highest runnable vcpu based on cpu affinity and 
ret.time will be passed to schedule().

rt_vcpu_wake(): when a vcpu is awake, it tickles instead of picking one from 
runq.

rt_context_saved(): when context switching is finished, the preempted vcpu will 
be put back into the runq. Picking from runq and tickling are removed.

repl_handler(): a timer handler which is reprogrammed to fire at the nearest 
vcpu deadline to replenish vcpus on runq and depeletedq. When the replenishment 
is done, each replenished vcpu in the runq should tickle a pcpu to see if it 
needs to preempt any running vcpus.


schedule.c SCHEDULE_SOFTIRQ:
rt_schedule():
[spin_lock]
burn_budget(scurr)
snext = runq_pick()
[spin_unlock]


sched_rt.c TIMER_SOFTIRQ
replenishment_timer_handler()
[spin_lock]
<for_each_depleted_vcpu(i, i.repl_time < NOW()) {
replenish(i)
runq_tickle(i)
}>
program_timer()
[spin_lock]

The transient behavior should be noted. It happens between a vcpu tickles and a 
pcpu actually picks it. As previous discussions, this is unavoidable.

Previous discussions:
http://lists.xenproject.org/archives/html/xen-devel/2015-06/msg02629.html

Signed-off-by: Tianyang Chen <ti...@seas.upenn.edu>
Signed-off-by: Meng Xu <men...@cis.upenn.edu>
Signed-off-by: Dagaen Golomb <dgol...@seas.upenn.edu>


Tianyang Chen (1):
  Improved RTDS scheduler

 xen/common/sched_rt.c |  159 +
 1 file changed, 95 insertions(+), 64 deletions(-)

-- 
1.7.9.5


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH] sched_rt: Improved nested virtualization performance

2015-11-18 Thread Tianyang Chen
In nested virtualization, choosing the smaller value for the
time slice between the MAX_SCHEDULE and the budget will cause
high host CPU usage.

Signed-off-by: Tianyang Chen <tianyangp...@gmail.com>
---
 xen/common/sched_rt.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 4372486..9da3f35 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -889,7 +889,7 @@ rt_schedule(const struct scheduler *ops, s_time_t now, 
bool_t tasklet_work_sched
 }
 }
 
-ret.time = MIN(snext->budget, MAX_SCHEDULE); /* sched quantum */
+ret.time = MAX_SCHEDULE; /*should be used in nested virtualization*/
 ret.task = snext->vcpu;
 
 /* TRACE */
-- 
1.7.9.5


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel